/[escript]/trunk/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /trunk/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3756 - (hide annotations)
Fri Jan 6 02:35:19 2012 UTC (7 years, 3 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Brick.cpp
File size: 377442 byte(s)
Fixed interpolation from DOF to nodes and moved common code to the base class.

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Brick.h>
15     extern "C" {
16 caltinay 3754 #include "paso/SystemMatrix.h"
17 caltinay 3691 }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33     int d1, int d2) :
34     RipleyDomain(3),
35     m_gNE0(n0),
36     m_gNE1(n1),
37     m_gNE2(n2),
38     m_l0(l0),
39     m_l1(l1),
40     m_l2(l2),
41     m_NX(d0),
42     m_NY(d1),
43     m_NZ(d2)
44     {
45     // ensure number of subdivisions is valid and nodes can be distributed
46     // among number of ranks
47     if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48     throw RipleyException("Invalid number of spatial subdivisions");
49    
50 caltinay 3753 if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0 || (n2+1)%m_NZ > 0)
51     throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension");
52 caltinay 3691
53 caltinay 3753 if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2) || (m_NZ > 1 && (n2+1)/m_NZ<2))
54     throw RipleyException("Too few elements for the number of ranks");
55    
56     // local number of elements (including overlap)
57     m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0);
58     if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1)
59     m_NE0++;
60     m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1);
61     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX>0 && m_mpiInfo->rank%(m_NX*m_NY)/m_NX<m_NY-1)
62     m_NE1++;
63     m_NE2 = (m_NZ>1 ? (n2+1)/m_NZ : n2);
64     if (m_mpiInfo->rank/(m_NX*m_NY)>0 && m_mpiInfo->rank/(m_NX*m_NY)<m_NZ-1)
65     m_NE2++;
66    
67     // local number of nodes
68 caltinay 3691 m_N0 = m_NE0+1;
69     m_N1 = m_NE1+1;
70     m_N2 = m_NE2+1;
71 caltinay 3753
72 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
73 caltinay 3753 m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX);
74     if (m_offset0 > 0)
75     m_offset0--;
76     m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
77     if (m_offset1 > 0)
78     m_offset1--;
79     m_offset2 = (n2+1)/m_NZ*(m_mpiInfo->rank/(m_NX*m_NY));
80     if (m_offset2 > 0)
81     m_offset2--;
82    
83 caltinay 3691 populateSampleIds();
84 caltinay 3756 createPattern();
85 caltinay 3691 }
86    
87    
88     Brick::~Brick()
89     {
90     }
91    
92     string Brick::getDescription() const
93     {
94     return "ripley::Brick";
95     }
96    
97     bool Brick::operator==(const AbstractDomain& other) const
98     {
99 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
100     if (o) {
101     return (RipleyDomain::operator==(other) &&
102     m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 && m_gNE2==o->m_gNE2
103     && m_l0==o->m_l0 && m_l1==o->m_l1 && m_l2==o->m_l2
104     && m_NX==o->m_NX && m_NY==o->m_NY && m_NZ==o->m_NZ);
105     }
106 caltinay 3691
107     return false;
108     }
109    
110     void Brick::dump(const string& fileName) const
111     {
112     #if USE_SILO
113     string fn(fileName);
114     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
115     fn+=".silo";
116     }
117    
118     const int NUM_SILO_FILES = 1;
119     const char* blockDirFmt = "/block%04d";
120     int driver=DB_HDF5;
121     string siloPath;
122     DBfile* dbfile = NULL;
123    
124     #ifdef ESYS_MPI
125     PMPIO_baton_t* baton = NULL;
126     #endif
127    
128     if (m_mpiInfo->size > 1) {
129     #ifdef ESYS_MPI
130     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
131     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
132     PMPIO_DefaultClose, (void*)&driver);
133     // try the fallback driver in case of error
134     if (!baton && driver != DB_PDB) {
135     driver = DB_PDB;
136     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
137     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
138     PMPIO_DefaultClose, (void*)&driver);
139     }
140     if (baton) {
141     char str[64];
142     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
143     siloPath = str;
144     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
145     }
146     #endif
147     } else {
148     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
149     getDescription().c_str(), driver);
150     // try the fallback driver in case of error
151     if (!dbfile && driver != DB_PDB) {
152     driver = DB_PDB;
153     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
154     getDescription().c_str(), driver);
155     }
156     }
157    
158     if (!dbfile)
159     throw RipleyException("dump: Could not create Silo file");
160    
161     /*
162     if (driver==DB_HDF5) {
163     // gzip level 1 already provides good compression with minimal
164     // performance penalty. Some tests showed that gzip levels >3 performed
165     // rather badly on escript data both in terms of time and space
166     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
167     }
168     */
169    
170     boost::scoped_ptr<double> x(new double[m_N0]);
171     boost::scoped_ptr<double> y(new double[m_N1]);
172     boost::scoped_ptr<double> z(new double[m_N2]);
173     double* coords[3] = { x.get(), y.get(), z.get() };
174 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
175     pair<double,double> ydy = getFirstCoordAndSpacing(1);
176     pair<double,double> zdz = getFirstCoordAndSpacing(2);
177 caltinay 3691 #pragma omp parallel
178     {
179     #pragma omp for
180     for (dim_t i0 = 0; i0 < m_N0; i0++) {
181 caltinay 3698 coords[0][i0]=xdx.first+i0*xdx.second;
182 caltinay 3691 }
183     #pragma omp for
184     for (dim_t i1 = 0; i1 < m_N1; i1++) {
185 caltinay 3698 coords[1][i1]=ydy.first+i1*ydy.second;
186 caltinay 3691 }
187     #pragma omp for
188     for (dim_t i2 = 0; i2 < m_N2; i2++) {
189 caltinay 3698 coords[2][i2]=zdz.first+i2*zdz.second;
190 caltinay 3691 }
191     }
192 caltinay 3698 IndexVector dims = getNumNodesPerDim();
193     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
194 caltinay 3691 DB_COLLINEAR, NULL);
195    
196 caltinay 3698 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
197     NULL, 0, DB_INT, DB_NODECENT, NULL);
198 caltinay 3691
199 caltinay 3698 // write element ids
200     dims = getNumElementsPerDim();
201     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
202     &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
203    
204     // rank 0 writes multimesh and multivar
205 caltinay 3691 if (m_mpiInfo->rank == 0) {
206     vector<string> tempstrings;
207     vector<char*> names;
208     for (dim_t i=0; i<m_mpiInfo->size; i++) {
209     stringstream path;
210     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
211     tempstrings.push_back(path.str());
212     names.push_back((char*)tempstrings.back().c_str());
213     }
214     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
215     DBSetDir(dbfile, "/");
216     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
217     &types[0], NULL);
218     tempstrings.clear();
219     names.clear();
220     for (dim_t i=0; i<m_mpiInfo->size; i++) {
221     stringstream path;
222     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
223     tempstrings.push_back(path.str());
224     names.push_back((char*)tempstrings.back().c_str());
225     }
226     types.assign(m_mpiInfo->size, DB_QUADVAR);
227     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
228     &types[0], NULL);
229 caltinay 3698 tempstrings.clear();
230     names.clear();
231     for (dim_t i=0; i<m_mpiInfo->size; i++) {
232     stringstream path;
233     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
234     tempstrings.push_back(path.str());
235     names.push_back((char*)tempstrings.back().c_str());
236     }
237     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
238     &types[0], NULL);
239 caltinay 3691 }
240    
241     if (m_mpiInfo->size > 1) {
242     #ifdef ESYS_MPI
243     PMPIO_HandOffBaton(baton, dbfile);
244     PMPIO_Finish(baton);
245     #endif
246     } else {
247     DBClose(dbfile);
248     }
249    
250     #else // USE_SILO
251     throw RipleyException("dump(): no Silo support");
252     #endif
253     }
254    
255     const int* Brick::borrowSampleReferenceIDs(int fsType) const
256     {
257 caltinay 3697 switch (fsType) {
258     case Nodes:
259 caltinay 3748 case ReducedNodes: //FIXME: reduced
260 caltinay 3753 return &m_nodeId[0];
261 caltinay 3750 case DegreesOfFreedom: //FIXME
262     case ReducedDegreesOfFreedom: //FIXME
263 caltinay 3753 return &m_dofId[0];
264 caltinay 3697 case Elements:
265 caltinay 3733 case ReducedElements:
266 caltinay 3697 return &m_elementId[0];
267 caltinay 3733 case ReducedFaceElements:
268 caltinay 3697 return &m_faceId[0];
269     default:
270     break;
271     }
272 caltinay 3691
273 caltinay 3697 stringstream msg;
274     msg << "borrowSampleReferenceIDs() not implemented for function space type "
275     << fsType;
276     throw RipleyException(msg.str());
277 caltinay 3691 }
278    
279     bool Brick::ownSample(int fsCode, index_t id) const
280     {
281     #ifdef ESYS_MPI
282     if (fsCode == Nodes) {
283 caltinay 3756 return (m_dofMap[id] < getNumDOF());
284 caltinay 3753 } else {
285     stringstream msg;
286     msg << "ownSample() not implemented for "
287     << functionSpaceTypeAsString(fsCode);
288     throw RipleyException(msg.str());
289     }
290 caltinay 3691 #else
291     return true;
292     #endif
293     }
294    
295 caltinay 3703 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
296     {
297     escript::Data& in = *const_cast<escript::Data*>(&cIn);
298     const dim_t numComp = in.getDataPointSize();
299     const double h0 = m_l0/m_gNE0;
300     const double h1 = m_l1/m_gNE1;
301     const double h2 = m_l1/m_gNE2;
302 caltinay 3731 const double C0 = .044658198738520451079;
303     const double C1 = .16666666666666666667;
304     const double C2 = .21132486540518711775;
305     const double C3 = .25;
306     const double C4 = .5;
307     const double C5 = .62200846792814621559;
308     const double C6 = .78867513459481288225;
309    
310 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
311 caltinay 3731 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
312 caltinay 3703 #pragma omp parallel for
313 caltinay 3707 for (index_t k2=0; k2 < m_NE2; ++k2) {
314     for (index_t k1=0; k1 < m_NE1; ++k1) {
315     for (index_t k0=0; k0 < m_NE0; ++k0) {
316 caltinay 3703 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
317     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
318     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
319     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
320     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
321     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
322     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
323     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
324     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
325     for (index_t i=0; i < numComp; ++i) {
326 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
327     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
328     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
329     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
330     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
331     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
332     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
333     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
334     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
335     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
336     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
337     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
338     o[INDEX3(i,0,0,numComp,3)] = V0;
339     o[INDEX3(i,1,0,numComp,3)] = V4;
340     o[INDEX3(i,2,0,numComp,3)] = V8;
341     o[INDEX3(i,0,1,numComp,3)] = V0;
342     o[INDEX3(i,1,1,numComp,3)] = V5;
343     o[INDEX3(i,2,1,numComp,3)] = V9;
344     o[INDEX3(i,0,2,numComp,3)] = V1;
345     o[INDEX3(i,1,2,numComp,3)] = V4;
346     o[INDEX3(i,2,2,numComp,3)] = V10;
347     o[INDEX3(i,0,3,numComp,3)] = V1;
348     o[INDEX3(i,1,3,numComp,3)] = V5;
349     o[INDEX3(i,2,3,numComp,3)] = V11;
350     o[INDEX3(i,0,4,numComp,3)] = V2;
351     o[INDEX3(i,1,4,numComp,3)] = V6;
352     o[INDEX3(i,2,4,numComp,3)] = V8;
353     o[INDEX3(i,0,5,numComp,3)] = V2;
354     o[INDEX3(i,1,5,numComp,3)] = V7;
355     o[INDEX3(i,2,5,numComp,3)] = V9;
356     o[INDEX3(i,0,6,numComp,3)] = V3;
357     o[INDEX3(i,1,6,numComp,3)] = V6;
358     o[INDEX3(i,2,6,numComp,3)] = V10;
359     o[INDEX3(i,0,7,numComp,3)] = V3;
360     o[INDEX3(i,1,7,numComp,3)] = V7;
361     o[INDEX3(i,2,7,numComp,3)] = V11;
362 caltinay 3703 } /* end of component loop i */
363     } /* end of k0 loop */
364     } /* end of k1 loop */
365     } /* end of k2 loop */
366     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
367 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
368 caltinay 3731 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
369 caltinay 3711 #pragma omp parallel for
370     for (index_t k2=0; k2 < m_NE2; ++k2) {
371     for (index_t k1=0; k1 < m_NE1; ++k1) {
372     for (index_t k0=0; k0 < m_NE0; ++k0) {
373     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
374     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
375     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
376     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
377     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
378     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
379     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
380     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
381     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
382     for (index_t i=0; i < numComp; ++i) {
383 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
384     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
385     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
386 caltinay 3711 } /* end of component loop i */
387     } /* end of k0 loop */
388     } /* end of k1 loop */
389     } /* end of k2 loop */
390     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
391 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
392 caltinay 3731 /*** GENERATOR SNIP_GRAD_FACES TOP */
393 caltinay 3722 #pragma omp parallel
394     {
395     if (m_faceOffset[0] > -1) {
396     #pragma omp for nowait
397     for (index_t k2=0; k2 < m_NE2; ++k2) {
398     for (index_t k1=0; k1 < m_NE1; ++k1) {
399     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
400     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
401     const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
402     const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
403     const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
404     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
405     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
406     const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
407     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
408     for (index_t i=0; i < numComp; ++i) {
409 caltinay 3731 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
410     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
411     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
412     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
413     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
414     o[INDEX3(i,1,0,numComp,3)] = V0;
415     o[INDEX3(i,2,0,numComp,3)] = V2;
416     o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
417     o[INDEX3(i,1,1,numComp,3)] = V0;
418     o[INDEX3(i,2,1,numComp,3)] = V3;
419     o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
420     o[INDEX3(i,1,2,numComp,3)] = V1;
421     o[INDEX3(i,2,2,numComp,3)] = V2;
422     o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
423     o[INDEX3(i,1,3,numComp,3)] = V1;
424     o[INDEX3(i,2,3,numComp,3)] = V3;
425 caltinay 3722 } /* end of component loop i */
426     } /* end of k1 loop */
427     } /* end of k2 loop */
428     } /* end of face 0 */
429     if (m_faceOffset[1] > -1) {
430     #pragma omp for nowait
431     for (index_t k2=0; k2 < m_NE2; ++k2) {
432     for (index_t k1=0; k1 < m_NE1; ++k1) {
433     const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
434     const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
435     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
436     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
437     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
438     const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
439     const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
440     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
441     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
442     for (index_t i=0; i < numComp; ++i) {
443 caltinay 3731 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
444     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
445     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
446     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
447     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
448     o[INDEX3(i,1,0,numComp,3)] = V0;
449     o[INDEX3(i,2,0,numComp,3)] = V2;
450     o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
451     o[INDEX3(i,1,1,numComp,3)] = V0;
452     o[INDEX3(i,2,1,numComp,3)] = V3;
453     o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
454     o[INDEX3(i,1,2,numComp,3)] = V1;
455     o[INDEX3(i,2,2,numComp,3)] = V2;
456     o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
457     o[INDEX3(i,1,3,numComp,3)] = V1;
458     o[INDEX3(i,2,3,numComp,3)] = V3;
459 caltinay 3722 } /* end of component loop i */
460     } /* end of k1 loop */
461     } /* end of k2 loop */
462     } /* end of face 1 */
463     if (m_faceOffset[2] > -1) {
464     #pragma omp for nowait
465     for (index_t k2=0; k2 < m_NE2; ++k2) {
466     for (index_t k0=0; k0 < m_NE0; ++k0) {
467     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
468     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
469     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
470     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
471     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
472     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
473     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
474     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
475     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
476     for (index_t i=0; i < numComp; ++i) {
477 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
478     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
479     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
480     o[INDEX3(i,0,0,numComp,3)] = V0;
481     o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
482     o[INDEX3(i,2,0,numComp,3)] = V1;
483     o[INDEX3(i,0,1,numComp,3)] = V0;
484     o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
485     o[INDEX3(i,2,1,numComp,3)] = V2;
486     o[INDEX3(i,0,2,numComp,3)] = V0;
487     o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
488     o[INDEX3(i,2,2,numComp,3)] = V1;
489     o[INDEX3(i,0,3,numComp,3)] = V0;
490     o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
491     o[INDEX3(i,2,3,numComp,3)] = V2;
492 caltinay 3722 } /* end of component loop i */
493     } /* end of k0 loop */
494     } /* end of k2 loop */
495     } /* end of face 2 */
496     if (m_faceOffset[3] > -1) {
497     #pragma omp for nowait
498     for (index_t k2=0; k2 < m_NE2; ++k2) {
499     for (index_t k0=0; k0 < m_NE0; ++k0) {
500     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
501     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
502     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
503     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
504     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
505     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
506     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
507     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
508     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
509     for (index_t i=0; i < numComp; ++i) {
510 caltinay 3731 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
511     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
512     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
513     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
514     o[INDEX3(i,0,0,numComp,3)] = V0;
515     o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
516     o[INDEX3(i,2,0,numComp,3)] = V2;
517     o[INDEX3(i,0,1,numComp,3)] = V0;
518     o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
519     o[INDEX3(i,2,1,numComp,3)] = V3;
520     o[INDEX3(i,0,2,numComp,3)] = V1;
521     o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
522     o[INDEX3(i,2,2,numComp,3)] = V2;
523     o[INDEX3(i,0,3,numComp,3)] = V1;
524     o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
525     o[INDEX3(i,2,3,numComp,3)] = V3;
526 caltinay 3722 } /* end of component loop i */
527     } /* end of k0 loop */
528     } /* end of k2 loop */
529     } /* end of face 3 */
530     if (m_faceOffset[4] > -1) {
531     #pragma omp for nowait
532 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
533 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
534     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
535     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
536     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
537     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
538     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
539     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
540     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
541     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
542     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
543     for (index_t i=0; i < numComp; ++i) {
544 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
545     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
546     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
547     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
548     o[INDEX3(i,0,0,numComp,3)] = V0;
549     o[INDEX3(i,1,0,numComp,3)] = V2;
550     o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
551     o[INDEX3(i,0,1,numComp,3)] = V0;
552     o[INDEX3(i,1,1,numComp,3)] = V3;
553     o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
554     o[INDEX3(i,0,2,numComp,3)] = V1;
555     o[INDEX3(i,1,2,numComp,3)] = V2;
556     o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
557     o[INDEX3(i,0,3,numComp,3)] = V1;
558     o[INDEX3(i,1,3,numComp,3)] = V3;
559     o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
560 caltinay 3722 } /* end of component loop i */
561     } /* end of k0 loop */
562 caltinay 3707 } /* end of k1 loop */
563 caltinay 3722 } /* end of face 4 */
564     if (m_faceOffset[5] > -1) {
565     #pragma omp for nowait
566 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
567 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
568     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
569     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
570     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
571     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
572     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
573     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
574     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
575     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
576     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
577     for (index_t i=0; i < numComp; ++i) {
578 caltinay 3731 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
579     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
580     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
581     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
582     o[INDEX3(i,0,0,numComp,3)] = V0;
583     o[INDEX3(i,1,0,numComp,3)] = V2;
584     o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
585     o[INDEX3(i,0,1,numComp,3)] = V0;
586     o[INDEX3(i,1,1,numComp,3)] = V3;
587     o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
588     o[INDEX3(i,0,2,numComp,3)] = V1;
589     o[INDEX3(i,1,2,numComp,3)] = V2;
590     o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
591     o[INDEX3(i,0,3,numComp,3)] = V1;
592     o[INDEX3(i,1,3,numComp,3)] = V3;
593     o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
594 caltinay 3722 } /* end of component loop i */
595     } /* end of k0 loop */
596 caltinay 3707 } /* end of k1 loop */
597 caltinay 3722 } /* end of face 5 */
598     } // end of parallel section
599 caltinay 3707 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
600 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
601 caltinay 3731 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
602 caltinay 3722 #pragma omp parallel
603     {
604     if (m_faceOffset[0] > -1) {
605     #pragma omp for nowait
606     for (index_t k2=0; k2 < m_NE2; ++k2) {
607     for (index_t k1=0; k1 < m_NE1; ++k1) {
608     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
609     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
610     const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
611     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
612     const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
613     const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
614     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
615     const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
616     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
617     for (index_t i=0; i < numComp; ++i) {
618 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
619     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
620     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
621 caltinay 3722 } /* end of component loop i */
622     } /* end of k1 loop */
623     } /* end of k2 loop */
624     } /* end of face 0 */
625     if (m_faceOffset[1] > -1) {
626     #pragma omp for nowait
627     for (index_t k2=0; k2 < m_NE2; ++k2) {
628     for (index_t k1=0; k1 < m_NE1; ++k1) {
629     const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
630     const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
631     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
632     const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
633     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
634     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
635     const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
636     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
637     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
638     for (index_t i=0; i < numComp; ++i) {
639 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
640     o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
641     o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
642 caltinay 3722 } /* end of component loop i */
643     } /* end of k1 loop */
644     } /* end of k2 loop */
645     } /* end of face 1 */
646     if (m_faceOffset[2] > -1) {
647     #pragma omp for nowait
648     for (index_t k2=0; k2 < m_NE2; ++k2) {
649     for (index_t k0=0; k0 < m_NE0; ++k0) {
650     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
651     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
652     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
653     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
654     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
655     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
656     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
657     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
658     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
659     for (index_t i=0; i < numComp; ++i) {
660 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
661     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
662     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
663 caltinay 3722 } /* end of component loop i */
664     } /* end of k0 loop */
665     } /* end of k2 loop */
666     } /* end of face 2 */
667     if (m_faceOffset[3] > -1) {
668     #pragma omp for nowait
669     for (index_t k2=0; k2 < m_NE2; ++k2) {
670     for (index_t k0=0; k0 < m_NE0; ++k0) {
671     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
672     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
673     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
674     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
675     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
676     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
677     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
678     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
679     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
680     for (index_t i=0; i < numComp; ++i) {
681 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
682     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
683     o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
684 caltinay 3722 } /* end of component loop i */
685     } /* end of k0 loop */
686     } /* end of k2 loop */
687     } /* end of face 3 */
688     if (m_faceOffset[4] > -1) {
689     #pragma omp for nowait
690 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
691 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
692     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
693     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
694     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
695     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
696     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
697     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
698     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
699     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
700     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
701     for (index_t i=0; i < numComp; ++i) {
702 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
703     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
704     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
705 caltinay 3722 } /* end of component loop i */
706     } /* end of k0 loop */
707 caltinay 3711 } /* end of k1 loop */
708 caltinay 3722 } /* end of face 4 */
709     if (m_faceOffset[5] > -1) {
710     #pragma omp for nowait
711 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
712 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
713     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
714     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
715     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
716     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
717     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
718     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
719     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
720     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
721     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
722     for (index_t i=0; i < numComp; ++i) {
723 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
724     o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
725     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
726 caltinay 3722 } /* end of component loop i */
727     } /* end of k0 loop */
728 caltinay 3711 } /* end of k1 loop */
729 caltinay 3722 } /* end of face 5 */
730     } // end of parallel section
731 caltinay 3711 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
732 caltinay 3703 } else {
733 caltinay 3707 stringstream msg;
734     msg << "setToGradient() not implemented for "
735     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
736     throw RipleyException(msg.str());
737 caltinay 3703 }
738     }
739    
740 caltinay 3713 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
741     {
742     escript::Data& in = *const_cast<escript::Data*>(&arg);
743     const dim_t numComp = in.getDataPointSize();
744     const double h0 = m_l0/m_gNE0;
745     const double h1 = m_l1/m_gNE1;
746     const double h2 = m_l2/m_gNE2;
747     if (arg.getFunctionSpace().getTypeCode() == Elements) {
748     const double w_0 = h0*h1*h2/8.;
749     #pragma omp parallel
750     {
751     vector<double> int_local(numComp, 0);
752 caltinay 3722 #pragma omp for nowait
753 caltinay 3713 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
754     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
755     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
756     const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
757     for (index_t i=0; i < numComp; ++i) {
758     const register double f_0 = f[INDEX2(i,0,numComp)];
759     const register double f_1 = f[INDEX2(i,1,numComp)];
760     const register double f_2 = f[INDEX2(i,2,numComp)];
761     const register double f_3 = f[INDEX2(i,3,numComp)];
762     const register double f_4 = f[INDEX2(i,4,numComp)];
763     const register double f_5 = f[INDEX2(i,5,numComp)];
764     const register double f_6 = f[INDEX2(i,6,numComp)];
765     const register double f_7 = f[INDEX2(i,7,numComp)];
766     int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
767     } /* end of component loop i */
768     } /* end of k0 loop */
769     } /* end of k1 loop */
770     } /* end of k2 loop */
771    
772     #pragma omp critical
773     for (index_t i=0; i<numComp; i++)
774     integrals[i]+=int_local[i];
775 caltinay 3722 } // end of parallel section
776 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
777     const double w_0 = h0*h1*h2;
778     #pragma omp parallel
779     {
780     vector<double> int_local(numComp, 0);
781 caltinay 3722 #pragma omp for nowait
782 caltinay 3713 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
783     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
784     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
785     const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
786     for (index_t i=0; i < numComp; ++i) {
787     int_local[i]+=f[i]*w_0;
788     } /* end of component loop i */
789     } /* end of k0 loop */
790     } /* end of k1 loop */
791     } /* end of k2 loop */
792    
793     #pragma omp critical
794     for (index_t i=0; i<numComp; i++)
795     integrals[i]+=int_local[i];
796 caltinay 3722 } // end of parallel section
797 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
798     const double w_0 = h1*h2/4.;
799     const double w_1 = h0*h2/4.;
800     const double w_2 = h0*h1/4.;
801     #pragma omp parallel
802     {
803     vector<double> int_local(numComp, 0);
804     if (m_faceOffset[0] > -1) {
805 caltinay 3722 #pragma omp for nowait
806 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
807     for (index_t k1=0; k1 < m_NE1; ++k1) {
808     const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
809     for (index_t i=0; i < numComp; ++i) {
810     const register double f_0 = f[INDEX2(i,0,numComp)];
811     const register double f_1 = f[INDEX2(i,1,numComp)];
812     const register double f_2 = f[INDEX2(i,2,numComp)];
813     const register double f_3 = f[INDEX2(i,3,numComp)];
814     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
815     } /* end of component loop i */
816     } /* end of k1 loop */
817     } /* end of k2 loop */
818     }
819    
820     if (m_faceOffset[1] > -1) {
821 caltinay 3722 #pragma omp for nowait
822 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
823     for (index_t k1=0; k1 < m_NE1; ++k1) {
824     const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
825     for (index_t i=0; i < numComp; ++i) {
826     const register double f_0 = f[INDEX2(i,0,numComp)];
827     const register double f_1 = f[INDEX2(i,1,numComp)];
828     const register double f_2 = f[INDEX2(i,2,numComp)];
829     const register double f_3 = f[INDEX2(i,3,numComp)];
830     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
831     } /* end of component loop i */
832     } /* end of k1 loop */
833     } /* end of k2 loop */
834     }
835    
836     if (m_faceOffset[2] > -1) {
837 caltinay 3722 #pragma omp for nowait
838 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
839     for (index_t k0=0; k0 < m_NE0; ++k0) {
840     const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
841     for (index_t i=0; i < numComp; ++i) {
842     const register double f_0 = f[INDEX2(i,0,numComp)];
843     const register double f_1 = f[INDEX2(i,1,numComp)];
844     const register double f_2 = f[INDEX2(i,2,numComp)];
845     const register double f_3 = f[INDEX2(i,3,numComp)];
846     int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
847     } /* end of component loop i */
848     } /* end of k1 loop */
849     } /* end of k2 loop */
850     }
851    
852     if (m_faceOffset[3] > -1) {
853 caltinay 3722 #pragma omp for nowait
854 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
855     for (index_t k0=0; k0 < m_NE0; ++k0) {
856     const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
857     for (index_t i=0; i < numComp; ++i) {
858     const register double f_0 = f[INDEX2(i,0,numComp)];
859     const register double f_1 = f[INDEX2(i,1,numComp)];
860     const register double f_2 = f[INDEX2(i,2,numComp)];
861     const register double f_3 = f[INDEX2(i,3,numComp)];
862     int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
863     } /* end of component loop i */
864     } /* end of k1 loop */
865     } /* end of k2 loop */
866     }
867    
868     if (m_faceOffset[4] > -1) {
869 caltinay 3722 #pragma omp for nowait
870 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
871     for (index_t k0=0; k0 < m_NE0; ++k0) {
872     const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
873     for (index_t i=0; i < numComp; ++i) {
874     const register double f_0 = f[INDEX2(i,0,numComp)];
875     const register double f_1 = f[INDEX2(i,1,numComp)];
876     const register double f_2 = f[INDEX2(i,2,numComp)];
877     const register double f_3 = f[INDEX2(i,3,numComp)];
878     int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
879     } /* end of component loop i */
880     } /* end of k1 loop */
881     } /* end of k2 loop */
882     }
883    
884     if (m_faceOffset[5] > -1) {
885 caltinay 3722 #pragma omp for nowait
886 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
887     for (index_t k0=0; k0 < m_NE0; ++k0) {
888     const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
889     for (index_t i=0; i < numComp; ++i) {
890     const register double f_0 = f[INDEX2(i,0,numComp)];
891     const register double f_1 = f[INDEX2(i,1,numComp)];
892     const register double f_2 = f[INDEX2(i,2,numComp)];
893     const register double f_3 = f[INDEX2(i,3,numComp)];
894     int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
895     } /* end of component loop i */
896     } /* end of k1 loop */
897     } /* end of k2 loop */
898     }
899    
900     #pragma omp critical
901     for (index_t i=0; i<numComp; i++)
902     integrals[i]+=int_local[i];
903 caltinay 3722 } // end of parallel section
904 caltinay 3713
905     } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
906     const double w_0 = h1*h2;
907     const double w_1 = h0*h2;
908     const double w_2 = h0*h1;
909     #pragma omp parallel
910     {
911     vector<double> int_local(numComp, 0);
912     if (m_faceOffset[0] > -1) {
913 caltinay 3722 #pragma omp for nowait
914 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
915     for (index_t k1=0; k1 < m_NE1; ++k1) {
916     const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
917     for (index_t i=0; i < numComp; ++i) {
918     int_local[i]+=f[i]*w_0;
919     } /* end of component loop i */
920     } /* end of k1 loop */
921     } /* end of k2 loop */
922     }
923    
924     if (m_faceOffset[1] > -1) {
925 caltinay 3722 #pragma omp for nowait
926 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
927     for (index_t k1=0; k1 < m_NE1; ++k1) {
928     const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
929     for (index_t i=0; i < numComp; ++i) {
930     int_local[i]+=f[i]*w_0;
931     } /* end of component loop i */
932     } /* end of k1 loop */
933     } /* end of k2 loop */
934     }
935    
936     if (m_faceOffset[2] > -1) {
937 caltinay 3722 #pragma omp for nowait
938 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
939     for (index_t k0=0; k0 < m_NE0; ++k0) {
940     const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
941     for (index_t i=0; i < numComp; ++i) {
942     int_local[i]+=f[i]*w_1;
943     } /* end of component loop i */
944     } /* end of k1 loop */
945     } /* end of k2 loop */
946     }
947    
948     if (m_faceOffset[3] > -1) {
949 caltinay 3722 #pragma omp for nowait
950 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
951     for (index_t k0=0; k0 < m_NE0; ++k0) {
952     const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
953     for (index_t i=0; i < numComp; ++i) {
954     int_local[i]+=f[i]*w_1;
955     } /* end of component loop i */
956     } /* end of k1 loop */
957     } /* end of k2 loop */
958     }
959    
960     if (m_faceOffset[4] > -1) {
961 caltinay 3722 #pragma omp for nowait
962 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
963     for (index_t k0=0; k0 < m_NE0; ++k0) {
964     const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
965     for (index_t i=0; i < numComp; ++i) {
966     int_local[i]+=f[i]*w_2;
967     } /* end of component loop i */
968     } /* end of k1 loop */
969     } /* end of k2 loop */
970     }
971    
972     if (m_faceOffset[5] > -1) {
973 caltinay 3722 #pragma omp for nowait
974 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
975     for (index_t k0=0; k0 < m_NE0; ++k0) {
976     const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
977     for (index_t i=0; i < numComp; ++i) {
978     int_local[i]+=f[i]*w_2;
979     } /* end of component loop i */
980     } /* end of k1 loop */
981     } /* end of k2 loop */
982     }
983    
984     #pragma omp critical
985     for (index_t i=0; i<numComp; i++)
986     integrals[i]+=int_local[i];
987 caltinay 3722 } // end of parallel section
988 caltinay 3713
989     } else {
990     stringstream msg;
991     msg << "setToIntegrals() not implemented for "
992     << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
993     throw RipleyException(msg.str());
994     }
995     }
996    
997 caltinay 3722 void Brick::setToNormal(escript::Data& out) const
998     {
999     if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1000     #pragma omp parallel
1001     {
1002     if (m_faceOffset[0] > -1) {
1003     #pragma omp for nowait
1004     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1005     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1006     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1007     // set vector at four quadrature points
1008     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1009     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1010     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1011     *o++ = -1.; *o++ = 0.; *o = 0.;
1012     }
1013     }
1014     }
1015    
1016     if (m_faceOffset[1] > -1) {
1017     #pragma omp for nowait
1018     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1019     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1020     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1021     // set vector at four quadrature points
1022     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1023     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1024     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1025     *o++ = 1.; *o++ = 0.; *o = 0.;
1026     }
1027     }
1028     }
1029    
1030     if (m_faceOffset[2] > -1) {
1031     #pragma omp for nowait
1032     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1033     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1034     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1035     // set vector at four quadrature points
1036     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1037     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1038     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1039     *o++ = 0.; *o++ = -1.; *o = 0.;
1040     }
1041     }
1042     }
1043    
1044     if (m_faceOffset[3] > -1) {
1045     #pragma omp for nowait
1046     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1047     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1048     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1049     // set vector at four quadrature points
1050     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1051     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1052     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1053     *o++ = 0.; *o++ = 1.; *o = 0.;
1054     }
1055     }
1056     }
1057    
1058     if (m_faceOffset[4] > -1) {
1059     #pragma omp for nowait
1060     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1061     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1062     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1063     // set vector at four quadrature points
1064     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1065     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1066     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1067     *o++ = 0.; *o++ = 0.; *o = -1.;
1068     }
1069     }
1070     }
1071    
1072     if (m_faceOffset[5] > -1) {
1073     #pragma omp for nowait
1074     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1075     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1076     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1077     // set vector at four quadrature points
1078     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1079     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1080     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1081     *o++ = 0.; *o++ = 0.; *o = 1.;
1082     }
1083     }
1084     }
1085     } // end of parallel section
1086     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1087     #pragma omp parallel
1088     {
1089     if (m_faceOffset[0] > -1) {
1090     #pragma omp for nowait
1091     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1092     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1093     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1094     *o++ = -1.;
1095     *o++ = 0.;
1096     *o = 0.;
1097     }
1098     }
1099     }
1100    
1101     if (m_faceOffset[1] > -1) {
1102     #pragma omp for nowait
1103     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1104     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1105     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1106     *o++ = 1.;
1107     *o++ = 0.;
1108     *o = 0.;
1109     }
1110     }
1111     }
1112    
1113     if (m_faceOffset[2] > -1) {
1114     #pragma omp for nowait
1115     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1116     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1117     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1118     *o++ = 0.;
1119     *o++ = -1.;
1120     *o = 0.;
1121     }
1122     }
1123     }
1124    
1125     if (m_faceOffset[3] > -1) {
1126     #pragma omp for nowait
1127     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1128     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1129     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1130     *o++ = 0.;
1131     *o++ = 1.;
1132     *o = 0.;
1133     }
1134     }
1135     }
1136    
1137     if (m_faceOffset[4] > -1) {
1138     #pragma omp for nowait
1139     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1140     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1141     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1142     *o++ = 0.;
1143     *o++ = 0.;
1144     *o = -1.;
1145     }
1146     }
1147     }
1148    
1149     if (m_faceOffset[5] > -1) {
1150     #pragma omp for nowait
1151     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1152     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1153     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1154     *o++ = 0.;
1155     *o++ = 0.;
1156     *o = 1.;
1157     }
1158     }
1159     }
1160     } // end of parallel section
1161    
1162     } else {
1163     stringstream msg;
1164     msg << "setToNormal() not implemented for "
1165     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1166     throw RipleyException(msg.str());
1167     }
1168     }
1169    
1170 caltinay 3691 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1171     bool reducedColOrder) const
1172     {
1173     if (reducedRowOrder || reducedColOrder)
1174     throw RipleyException("getPattern() not implemented for reduced order");
1175    
1176 caltinay 3756 return m_pattern;
1177     }
1178    
1179     void Brick::Print_Mesh_Info(const bool full) const
1180     {
1181     RipleyDomain::Print_Mesh_Info(full);
1182     if (full) {
1183     cout << " Id Coordinates" << endl;
1184     cout.precision(15);
1185     cout.setf(ios::scientific, ios::floatfield);
1186     pair<double,double> xdx = getFirstCoordAndSpacing(0);
1187     pair<double,double> ydy = getFirstCoordAndSpacing(1);
1188     pair<double,double> zdz = getFirstCoordAndSpacing(2);
1189     for (index_t i=0; i < getNumNodes(); i++) {
1190     cout << " " << setw(5) << m_nodeId[i]
1191     << " " << xdx.first+(i%m_N0)*xdx.second
1192     << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1193     << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1194     }
1195     }
1196     }
1197    
1198     IndexVector Brick::getNumNodesPerDim() const
1199     {
1200     IndexVector ret;
1201     ret.push_back(m_N0);
1202     ret.push_back(m_N1);
1203     ret.push_back(m_N2);
1204     return ret;
1205     }
1206    
1207     IndexVector Brick::getNumElementsPerDim() const
1208     {
1209     IndexVector ret;
1210     ret.push_back(m_NE0);
1211     ret.push_back(m_NE1);
1212     ret.push_back(m_NE2);
1213     return ret;
1214     }
1215    
1216     IndexVector Brick::getNumFacesPerBoundary() const
1217     {
1218     IndexVector ret(6, 0);
1219     //left
1220     if (m_offset0==0)
1221     ret[0]=m_NE1*m_NE2;
1222     //right
1223     if (m_mpiInfo->rank%m_NX==m_NX-1)
1224     ret[1]=m_NE1*m_NE2;
1225     //bottom
1226     if (m_offset1==0)
1227     ret[2]=m_NE0*m_NE2;
1228     //top
1229     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1230     ret[3]=m_NE0*m_NE2;
1231     //front
1232     if (m_offset2==0)
1233     ret[4]=m_NE0*m_NE1;
1234     //back
1235     if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1236     ret[5]=m_NE0*m_NE1;
1237     return ret;
1238     }
1239    
1240     pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1241     {
1242     if (dim==0)
1243     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1244     else if (dim==1)
1245     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1246     else if (dim==2)
1247     return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1248    
1249     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1250     }
1251    
1252     //protected
1253     dim_t Brick::getNumDOF() const
1254     {
1255     return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY*(m_gNE2+1)/m_NZ;
1256     }
1257    
1258     //protected
1259     dim_t Brick::getNumFaceElements() const
1260     {
1261     const IndexVector faces = getNumFacesPerBoundary();
1262     dim_t n=0;
1263     for (size_t i=0; i<faces.size(); i++)
1264     n+=faces[i];
1265     return n;
1266     }
1267    
1268     //protected
1269     void Brick::assembleCoordinates(escript::Data& arg) const
1270     {
1271     escriptDataC x = arg.getDataC();
1272     int numDim = m_numDim;
1273     if (!isDataPointShapeEqual(&x, 1, &numDim))
1274     throw RipleyException("setToX: Invalid Data object shape");
1275     if (!numSamplesEqual(&x, 1, getNumNodes()))
1276     throw RipleyException("setToX: Illegal number of samples in Data object");
1277    
1278     pair<double,double> xdx = getFirstCoordAndSpacing(0);
1279     pair<double,double> ydy = getFirstCoordAndSpacing(1);
1280     pair<double,double> zdz = getFirstCoordAndSpacing(2);
1281     arg.requireWrite();
1282     #pragma omp parallel for
1283     for (dim_t i2 = 0; i2 < m_N2; i2++) {
1284     for (dim_t i1 = 0; i1 < m_N1; i1++) {
1285     for (dim_t i0 = 0; i0 < m_N0; i0++) {
1286     double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1287     point[0] = xdx.first+i0*xdx.second;
1288     point[1] = ydy.first+i1*ydy.second;
1289     point[2] = zdz.first+i2*zdz.second;
1290     }
1291     }
1292     }
1293     }
1294    
1295     //protected
1296     dim_t Brick::insertNeighbourNodes(IndexVector& index, index_t node) const
1297     {
1298     const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1299     const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1300     const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1301     const int x=node%nDOF0;
1302     const int y=node%(nDOF0*nDOF1)/nDOF0;
1303     const int z=node/(nDOF0*nDOF1);
1304     int num=0;
1305     // loop through potential neighbours and add to index if positions are
1306     // within bounds
1307     for (int i2=-1; i2<2; i2++) {
1308     for (int i1=-1; i1<2; i1++) {
1309     for (int i0=-1; i0<2; i0++) {
1310     // skip node itself
1311     if (i0==0 && i1==0 && i2==0)
1312     continue;
1313     // location of neighbour node
1314     const int nx=x+i0;
1315     const int ny=y+i1;
1316     const int nz=z+i2;
1317     if (nx>=0 && ny>=0 && nz>=0
1318     && nx<nDOF0 && ny<nDOF1 && nz<nDOF2) {
1319     index.push_back(nz*nDOF0*nDOF1+ny*nDOF0+nx);
1320     num++;
1321     }
1322     }
1323     }
1324     }
1325    
1326     return num;
1327     }
1328    
1329     //protected
1330     void Brick::nodesToDOF(escript::Data& out, escript::Data& in) const
1331     {
1332     const dim_t numComp = in.getDataPointSize();
1333     out.requireWrite();
1334    
1335 caltinay 3754 const index_t left = (m_offset0==0 ? 0 : 1);
1336     const index_t bottom = (m_offset1==0 ? 0 : 1);
1337     const index_t front = (m_offset2==0 ? 0 : 1);
1338 caltinay 3756 const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1339     const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1340     const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1341     #pragma omp parallel for
1342     for (index_t i=0; i<nDOF2; i++) {
1343     for (index_t j=0; j<nDOF1; j++) {
1344     for (index_t k=0; k<nDOF0; k++) {
1345     const index_t n=k+left+(j+bottom)*m_N0+(i+front)*m_N0*m_N1;
1346     const double* src=in.getSampleDataRO(n);
1347     copy(src, src+numComp, out.getSampleDataRW(k+j*nDOF0+i*nDOF0*nDOF1));
1348     }
1349     }
1350     }
1351     }
1352 caltinay 3754
1353 caltinay 3756 //protected
1354     void Brick::dofToNodes(escript::Data& out, escript::Data& in) const
1355     {
1356     const dim_t numComp = in.getDataPointSize();
1357     Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp);
1358     in.requireWrite();
1359     Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0));
1360    
1361     const dim_t numDOF = getNumDOF();
1362     out.requireWrite();
1363     const double* buffer = Paso_Coupler_finishCollect(coupler);
1364    
1365     #pragma omp parallel for
1366     for (index_t i=0; i<getNumNodes(); i++) {
1367     const double* src=(m_dofMap[i]<numDOF ?
1368     in.getSampleDataRO(m_dofMap[i])
1369     : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1370     copy(src, src+numComp, out.getSampleDataRW(i));
1371     }
1372     }
1373    
1374     //private
1375     void Brick::populateSampleIds()
1376     {
1377     // identifiers are ordered from left to right, bottom to top, front to back
1378     // globally
1379    
1380     // build node distribution vector first.
1381     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1382     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1383     const dim_t numDOF=getNumDOF();
1384     for (dim_t k=1; k<m_mpiInfo->size; k++) {
1385     m_nodeDistribution[k]=k*numDOF;
1386     }
1387     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1388     m_nodeId.resize(getNumNodes());
1389     m_dofId.resize(numDOF);
1390     m_elementId.resize(getNumElements());
1391     m_faceId.resize(getNumFaceElements());
1392    
1393     #pragma omp parallel
1394     {
1395     #pragma omp for nowait
1396     // nodes
1397     for (dim_t i2=0; i2<m_N2; i2++) {
1398     for (dim_t i1=0; i1<m_N1; i1++) {
1399     for (dim_t i0=0; i0<m_N0; i0++) {
1400     m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] =
1401     (m_offset2+i2)*(m_gNE0+1)*(m_gNE1+1)
1402     +(m_offset1+i1)*(m_gNE0+1)
1403     +m_offset0+i0;
1404     }
1405     }
1406     }
1407    
1408     // degrees of freedom
1409     #pragma omp for nowait
1410     for (dim_t k=0; k<numDOF; k++)
1411     m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k;
1412    
1413     // elements
1414     #pragma omp for nowait
1415     for (dim_t i2=0; i2<m_NE2; i2++) {
1416     for (dim_t i1=0; i1<m_NE1; i1++) {
1417     for (dim_t i0=0; i0<m_NE0; i0++) {
1418     m_elementId[i0+i1*m_NE0+i2*m_NE0*m_NE1] =
1419     (m_offset2+i2)*m_gNE0*m_gNE1
1420     +(m_offset1+i1)*m_gNE0
1421     +m_offset0+i0;
1422     }
1423     }
1424     }
1425    
1426     // face elements
1427     #pragma omp for
1428     for (dim_t k=0; k<getNumFaceElements(); k++)
1429     m_faceId[k]=k;
1430     } // end parallel section
1431    
1432     m_nodeTags.assign(getNumNodes(), 0);
1433     updateTagsInUse(Nodes);
1434    
1435     m_elementTags.assign(getNumElements(), 0);
1436     updateTagsInUse(Elements);
1437    
1438     // generate face offset vector and set face tags
1439     const IndexVector facesPerEdge = getNumFacesPerBoundary();
1440     const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1441     const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1442     m_faceOffset.assign(facesPerEdge.size(), -1);
1443     m_faceTags.clear();
1444     index_t offset=0;
1445     for (size_t i=0; i<facesPerEdge.size(); i++) {
1446     if (facesPerEdge[i]>0) {
1447     m_faceOffset[i]=offset;
1448     offset+=facesPerEdge[i];
1449     m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1450     }
1451     }
1452     setTagMap("left", LEFT);
1453     setTagMap("right", RIGHT);
1454     setTagMap("bottom", BOTTOM);
1455     setTagMap("top", TOP);
1456     setTagMap("front", FRONT);
1457     setTagMap("back", BACK);
1458     updateTagsInUse(FaceElements);
1459     }
1460    
1461     //private
1462     void Brick::createPattern()
1463     {
1464     const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1465     const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1466     const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1467     const index_t left = (m_offset0==0 ? 0 : 1);
1468     const index_t bottom = (m_offset1==0 ? 0 : 1);
1469     const index_t front = (m_offset2==0 ? 0 : 1);
1470    
1471     // populate node->DOF mapping with own degrees of freedom.
1472     // The rest is assigned in the loop further down
1473 caltinay 3754 m_dofMap.assign(getNumNodes(), 0);
1474     #pragma omp parallel for
1475     for (index_t i=front; i<m_N2; i++) {
1476     for (index_t j=bottom; j<m_N1; j++) {
1477     for (index_t k=left; k<m_N0; k++) {
1478     m_dofMap[i*m_N0*m_N1+j*m_N0+k]=(i-front)*nDOF0*nDOF1+(j-bottom)*nDOF0+k-left;
1479     }
1480     }
1481     }
1482    
1483     // build list of shared components and neighbours by looping through
1484     // all potential neighbouring ranks and checking if positions are
1485     // within bounds
1486 caltinay 3756 const dim_t numDOF=nDOF0*nDOF1*nDOF2;
1487     vector<IndexVector> colIndices(numDOF); // for the couple blocks
1488     RankVector neighbour;
1489     IndexVector offsetInShared(1,0);
1490     IndexVector sendShared, recvShared;
1491     int numShared=0;
1492 caltinay 3754 const int x=m_mpiInfo->rank%m_NX;
1493     const int y=m_mpiInfo->rank%(m_NX*m_NY)/m_NX;
1494     const int z=m_mpiInfo->rank/(m_NX*m_NY);
1495     for (int i2=-1; i2<2; i2++) {
1496     for (int i1=-1; i1<2; i1++) {
1497     for (int i0=-1; i0<2; i0++) {
1498 caltinay 3756 // skip this rank
1499 caltinay 3754 if (i0==0 && i1==0 && i2==0)
1500     continue;
1501     // location of neighbour rank
1502     const int nx=x+i0;
1503     const int ny=y+i1;
1504     const int nz=z+i2;
1505     if (nx>=0 && ny>=0 && nz>=0 && nx<m_NX && ny<m_NY && nz<m_NZ) {
1506     neighbour.push_back(nz*m_NX*m_NY+ny*m_NX+nx);
1507     if (i0==0 && i1==0) {
1508     // sharing front or back plane
1509     offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF1);
1510     for (dim_t i=0; i<nDOF1; i++) {
1511 caltinay 3755 const int firstDOF=(i2==-1 ? i*nDOF0
1512     : i*nDOF0 + nDOF0*nDOF1*(nDOF2-1));
1513     const int firstNode=(i2==-1 ? left+(i+bottom)*m_N0
1514     : left+(i+bottom)*m_N0+m_N0*m_N1*(m_N2-1));
1515 caltinay 3754 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1516     sendShared.push_back(firstDOF+j);
1517     recvShared.push_back(numDOF+numShared);
1518     if (j>0) {
1519     if (i>0)
1520     colIndices[firstDOF+j-1-nDOF0].push_back(numShared);
1521     colIndices[firstDOF+j-1].push_back(numShared);
1522     if (i<nDOF1-1)
1523     colIndices[firstDOF+j-1+nDOF0].push_back(numShared);
1524     }
1525     if (i>0)
1526     colIndices[firstDOF+j-nDOF0].push_back(numShared);
1527     colIndices[firstDOF+j].push_back(numShared);
1528     if (i<nDOF1-1)
1529     colIndices[firstDOF+j+nDOF0].push_back(numShared);
1530     if (j<nDOF0-1) {
1531     if (i>0)
1532     colIndices[firstDOF+j+1-nDOF0].push_back(numShared);
1533     colIndices[firstDOF+j+1].push_back(numShared);
1534     if (i<nDOF1-1)
1535     colIndices[firstDOF+j+1+nDOF0].push_back(numShared);
1536     }
1537     m_dofMap[firstNode+j]=numDOF+numShared;
1538     }
1539     }
1540     } else if (i0==0 && i2==0) {
1541     // sharing top or bottom plane
1542     offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF2);
1543     for (dim_t i=0; i<nDOF2; i++) {
1544 caltinay 3755 const int firstDOF=(i1==-1 ? i*nDOF0*nDOF1
1545     : nDOF0*((i+1)*nDOF1-1));
1546     const int firstNode=(i1==-1 ?
1547     left+(i+front)*m_N0*m_N1
1548     : left+m_N0*((i+1+front)*m_N1-1));
1549 caltinay 3754 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1550     sendShared.push_back(firstDOF+j);
1551     recvShared.push_back(numDOF+numShared);
1552     if (j>0) {
1553     if (i>0)
1554     colIndices[firstDOF+j-1-nDOF0*nDOF1].push_back(numShared);
1555     colIndices[firstDOF+j-1].push_back(numShared);
1556     if (i<nDOF2-1)
1557     colIndices[firstDOF+j-1+nDOF0*nDOF1].push_back(numShared);
1558     }
1559     if (i>0)
1560     colIndices[firstDOF+j-nDOF0*nDOF1].push_back(numShared);
1561     colIndices[firstDOF+j].push_back(numShared);
1562     if (i<nDOF2-1)
1563     colIndices[firstDOF+j+nDOF0*nDOF1].push_back(numShared);
1564     if (j<nDOF0-1) {
1565     if (i>0)
1566     colIndices[firstDOF+j+1-nDOF0*nDOF1].push_back(numShared);
1567     colIndices[firstDOF+j+1].push_back(numShared);
1568     if (i<nDOF2-1)
1569     colIndices[firstDOF+j+1+nDOF0*nDOF1].push_back(numShared);
1570     }
1571     m_dofMap[firstNode+j]=numDOF+numShared;
1572     }
1573     }
1574     } else if (i1==0 && i2==0) {
1575     // sharing left or right plane
1576     offsetInShared.push_back(offsetInShared.back()+nDOF1*nDOF2);
1577     for (dim_t i=0; i<nDOF2; i++) {
1578 caltinay 3755 const int firstDOF=(i0==-1 ? i*nDOF0*nDOF1
1579     : nDOF0*(1+i*nDOF1)-1);
1580     const int firstNode=(i0==-1 ?
1581     (bottom+(i+front)*m_N1)*m_N0
1582     : (bottom+1+(i+front)*m_N1)*m_N0-1);
1583 caltinay 3754 for (dim_t j=0; j<nDOF1; j++, numShared++) {
1584     sendShared.push_back(firstDOF+j*nDOF0);
1585     recvShared.push_back(numDOF+numShared);
1586     if (j>0) {
1587     if (i>0)
1588     colIndices[firstDOF+(j-1)*nDOF0-nDOF0*nDOF1].push_back(numShared);
1589     colIndices[firstDOF+(j-1)*nDOF0].push_back(numShared);
1590     if (i<nDOF2-1)
1591     colIndices[firstDOF+(j-1)*nDOF0+nDOF0*nDOF1].push_back(numShared);
1592     }
1593     if (i>0)
1594     colIndices[firstDOF+j*nDOF0-nDOF0*nDOF1].push_back(numShared);
1595     colIndices[firstDOF+j*nDOF0].push_back(numShared);
1596     if (i<nDOF2-1)
1597     colIndices[firstDOF+j*nDOF0+nDOF0*nDOF1].push_back(numShared);
1598     if (j<nDOF1-1) {
1599     if (i<