/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3759 - (hide annotations)
Fri Jan 6 06:54:51 2012 UTC (7 years, 3 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Brick.cpp
File size: 379221 byte(s)
Implemented ownSample for face elements.

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Brick.h>
15     extern "C" {
16 caltinay 3754 #include "paso/SystemMatrix.h"
17 caltinay 3691 }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33     int d1, int d2) :
34     RipleyDomain(3),
35     m_gNE0(n0),
36     m_gNE1(n1),
37     m_gNE2(n2),
38     m_l0(l0),
39     m_l1(l1),
40     m_l2(l2),
41     m_NX(d0),
42     m_NY(d1),
43     m_NZ(d2)
44     {
45     // ensure number of subdivisions is valid and nodes can be distributed
46     // among number of ranks
47     if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48     throw RipleyException("Invalid number of spatial subdivisions");
49    
50 caltinay 3753 if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0 || (n2+1)%m_NZ > 0)
51     throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension");
52 caltinay 3691
53 caltinay 3753 if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2) || (m_NZ > 1 && (n2+1)/m_NZ<2))
54     throw RipleyException("Too few elements for the number of ranks");
55    
56     // local number of elements (including overlap)
57     m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0);
58     if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1)
59     m_NE0++;
60     m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1);
61     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX>0 && m_mpiInfo->rank%(m_NX*m_NY)/m_NX<m_NY-1)
62     m_NE1++;
63     m_NE2 = (m_NZ>1 ? (n2+1)/m_NZ : n2);
64     if (m_mpiInfo->rank/(m_NX*m_NY)>0 && m_mpiInfo->rank/(m_NX*m_NY)<m_NZ-1)
65     m_NE2++;
66    
67     // local number of nodes
68 caltinay 3691 m_N0 = m_NE0+1;
69     m_N1 = m_NE1+1;
70     m_N2 = m_NE2+1;
71 caltinay 3753
72 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
73 caltinay 3753 m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX);
74     if (m_offset0 > 0)
75     m_offset0--;
76     m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
77     if (m_offset1 > 0)
78     m_offset1--;
79     m_offset2 = (n2+1)/m_NZ*(m_mpiInfo->rank/(m_NX*m_NY));
80     if (m_offset2 > 0)
81     m_offset2--;
82    
83 caltinay 3691 populateSampleIds();
84 caltinay 3756 createPattern();
85 caltinay 3691 }
86    
87    
88     Brick::~Brick()
89     {
90     }
91    
92     string Brick::getDescription() const
93     {
94     return "ripley::Brick";
95     }
96    
97     bool Brick::operator==(const AbstractDomain& other) const
98     {
99 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
100     if (o) {
101     return (RipleyDomain::operator==(other) &&
102     m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 && m_gNE2==o->m_gNE2
103     && m_l0==o->m_l0 && m_l1==o->m_l1 && m_l2==o->m_l2
104     && m_NX==o->m_NX && m_NY==o->m_NY && m_NZ==o->m_NZ);
105     }
106 caltinay 3691
107     return false;
108     }
109    
110     void Brick::dump(const string& fileName) const
111     {
112     #if USE_SILO
113     string fn(fileName);
114     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
115     fn+=".silo";
116     }
117    
118     const int NUM_SILO_FILES = 1;
119     const char* blockDirFmt = "/block%04d";
120     int driver=DB_HDF5;
121     string siloPath;
122     DBfile* dbfile = NULL;
123    
124     #ifdef ESYS_MPI
125     PMPIO_baton_t* baton = NULL;
126     #endif
127    
128     if (m_mpiInfo->size > 1) {
129     #ifdef ESYS_MPI
130     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
131     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
132     PMPIO_DefaultClose, (void*)&driver);
133     // try the fallback driver in case of error
134     if (!baton && driver != DB_PDB) {
135     driver = DB_PDB;
136     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
137     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
138     PMPIO_DefaultClose, (void*)&driver);
139     }
140     if (baton) {
141     char str[64];
142     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
143     siloPath = str;
144     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
145     }
146     #endif
147     } else {
148     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
149     getDescription().c_str(), driver);
150     // try the fallback driver in case of error
151     if (!dbfile && driver != DB_PDB) {
152     driver = DB_PDB;
153     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
154     getDescription().c_str(), driver);
155     }
156     }
157    
158     if (!dbfile)
159     throw RipleyException("dump: Could not create Silo file");
160    
161     /*
162     if (driver==DB_HDF5) {
163     // gzip level 1 already provides good compression with minimal
164     // performance penalty. Some tests showed that gzip levels >3 performed
165     // rather badly on escript data both in terms of time and space
166     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
167     }
168     */
169    
170     boost::scoped_ptr<double> x(new double[m_N0]);
171     boost::scoped_ptr<double> y(new double[m_N1]);
172     boost::scoped_ptr<double> z(new double[m_N2]);
173     double* coords[3] = { x.get(), y.get(), z.get() };
174 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
175     pair<double,double> ydy = getFirstCoordAndSpacing(1);
176     pair<double,double> zdz = getFirstCoordAndSpacing(2);
177 caltinay 3691 #pragma omp parallel
178     {
179     #pragma omp for
180     for (dim_t i0 = 0; i0 < m_N0; i0++) {
181 caltinay 3698 coords[0][i0]=xdx.first+i0*xdx.second;
182 caltinay 3691 }
183     #pragma omp for
184     for (dim_t i1 = 0; i1 < m_N1; i1++) {
185 caltinay 3698 coords[1][i1]=ydy.first+i1*ydy.second;
186 caltinay 3691 }
187     #pragma omp for
188     for (dim_t i2 = 0; i2 < m_N2; i2++) {
189 caltinay 3698 coords[2][i2]=zdz.first+i2*zdz.second;
190 caltinay 3691 }
191     }
192 caltinay 3698 IndexVector dims = getNumNodesPerDim();
193     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
194 caltinay 3691 DB_COLLINEAR, NULL);
195    
196 caltinay 3698 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
197     NULL, 0, DB_INT, DB_NODECENT, NULL);
198 caltinay 3691
199 caltinay 3698 // write element ids
200     dims = getNumElementsPerDim();
201     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
202     &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
203    
204     // rank 0 writes multimesh and multivar
205 caltinay 3691 if (m_mpiInfo->rank == 0) {
206     vector<string> tempstrings;
207     vector<char*> names;
208     for (dim_t i=0; i<m_mpiInfo->size; i++) {
209     stringstream path;
210     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
211     tempstrings.push_back(path.str());
212     names.push_back((char*)tempstrings.back().c_str());
213     }
214     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
215     DBSetDir(dbfile, "/");
216     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
217     &types[0], NULL);
218     tempstrings.clear();
219     names.clear();
220     for (dim_t i=0; i<m_mpiInfo->size; i++) {
221     stringstream path;
222     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
223     tempstrings.push_back(path.str());
224     names.push_back((char*)tempstrings.back().c_str());
225     }
226     types.assign(m_mpiInfo->size, DB_QUADVAR);
227     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
228     &types[0], NULL);
229 caltinay 3698 tempstrings.clear();
230     names.clear();
231     for (dim_t i=0; i<m_mpiInfo->size; i++) {
232     stringstream path;
233     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
234     tempstrings.push_back(path.str());
235     names.push_back((char*)tempstrings.back().c_str());
236     }
237     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
238     &types[0], NULL);
239 caltinay 3691 }
240    
241     if (m_mpiInfo->size > 1) {
242     #ifdef ESYS_MPI
243     PMPIO_HandOffBaton(baton, dbfile);
244     PMPIO_Finish(baton);
245     #endif
246     } else {
247     DBClose(dbfile);
248     }
249    
250     #else // USE_SILO
251     throw RipleyException("dump(): no Silo support");
252     #endif
253     }
254    
255     const int* Brick::borrowSampleReferenceIDs(int fsType) const
256     {
257 caltinay 3697 switch (fsType) {
258     case Nodes:
259 caltinay 3748 case ReducedNodes: //FIXME: reduced
260 caltinay 3753 return &m_nodeId[0];
261 caltinay 3757 case DegreesOfFreedom:
262     case ReducedDegreesOfFreedom: //FIXME: reduced
263 caltinay 3753 return &m_dofId[0];
264 caltinay 3697 case Elements:
265 caltinay 3733 case ReducedElements:
266 caltinay 3697 return &m_elementId[0];
267 caltinay 3757 case FaceElements:
268 caltinay 3733 case ReducedFaceElements:
269 caltinay 3697 return &m_faceId[0];
270     default:
271     break;
272     }
273 caltinay 3691
274 caltinay 3697 stringstream msg;
275     msg << "borrowSampleReferenceIDs() not implemented for function space type "
276     << fsType;
277     throw RipleyException(msg.str());
278 caltinay 3691 }
279    
280 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
281 caltinay 3691 {
282 caltinay 3759 if (getMPISize()==1)
283     return true;
284    
285 caltinay 3757 switch (fsType) {
286     case Nodes:
287     case ReducedNodes: //FIXME: reduced
288     return (m_dofMap[id] < getNumDOF());
289     case DegreesOfFreedom:
290     case ReducedDegreesOfFreedom:
291     return true;
292     case Elements:
293     case ReducedElements:
294     {
295     // check ownership of element's _last_ node
296     const index_t x=id%m_NE0 + 1;
297     const index_t y=id%(m_NE0*m_NE1)/m_NE0 + 1;
298     const index_t z=id/(m_NE0*m_NE1) + 1;
299     return (m_dofMap[x + m_N0*y +m_N0*m_N1*z] < getNumDOF());
300     }
301     case FaceElements:
302     case ReducedFaceElements:
303 caltinay 3759 {
304     // check ownership of face element's last node
305     const IndexVector faces = getNumFacesPerBoundary();
306     dim_t n=0;
307     for (size_t i=0; i<faces.size(); i++) {
308     n+=faces[i];
309     if (id<n) {
310     const index_t j=id-n+faces[i];
311     if (i>=4) { // front or back
312     const index_t first=(i==4 ? 0 : m_N0*m_N1*(m_N2-1));
313     return (m_dofMap[first+j%m_NE0+1+(j/m_NE0+1)*m_N0] < getNumDOF());
314     } else if (i>=2) { // bottom or top
315     const index_t first=(i==2 ? 0 : m_N0*(m_N1-1));
316     return (m_dofMap[first+j%m_NE0+1+(j/m_NE0+1)*m_N0*m_N1] < getNumDOF());
317     } else { // left or right
318     const index_t first=(i==0 ? 0 : m_N0-1);
319     return (m_dofMap[first+(j%m_NE1+1)*m_N0+(j/m_NE1+1)*m_N0*m_N1] < getNumDOF());
320     }
321     }
322     }
323     return false;
324     }
325 caltinay 3757 default:
326     break;
327 caltinay 3753 }
328 caltinay 3757
329     stringstream msg;
330     msg << "ownSample() not implemented for "
331     << functionSpaceTypeAsString(fsType);
332     throw RipleyException(msg.str());
333 caltinay 3691 }
334    
335 caltinay 3703 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
336     {
337     escript::Data& in = *const_cast<escript::Data*>(&cIn);
338     const dim_t numComp = in.getDataPointSize();
339     const double h0 = m_l0/m_gNE0;
340     const double h1 = m_l1/m_gNE1;
341     const double h2 = m_l1/m_gNE2;
342 caltinay 3731 const double C0 = .044658198738520451079;
343     const double C1 = .16666666666666666667;
344     const double C2 = .21132486540518711775;
345     const double C3 = .25;
346     const double C4 = .5;
347     const double C5 = .62200846792814621559;
348     const double C6 = .78867513459481288225;
349    
350 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
351 caltinay 3731 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
352 caltinay 3703 #pragma omp parallel for
353 caltinay 3707 for (index_t k2=0; k2 < m_NE2; ++k2) {
354     for (index_t k1=0; k1 < m_NE1; ++k1) {
355     for (index_t k0=0; k0 < m_NE0; ++k0) {
356 caltinay 3703 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
357     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
358     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
359     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
360     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
361     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
362     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
363     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
364     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
365     for (index_t i=0; i < numComp; ++i) {
366 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
367     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
368     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
369     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
370     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
371     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
372     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
373     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
374     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
375     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
376     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
377     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
378     o[INDEX3(i,0,0,numComp,3)] = V0;
379     o[INDEX3(i,1,0,numComp,3)] = V4;
380     o[INDEX3(i,2,0,numComp,3)] = V8;
381     o[INDEX3(i,0,1,numComp,3)] = V0;
382     o[INDEX3(i,1,1,numComp,3)] = V5;
383     o[INDEX3(i,2,1,numComp,3)] = V9;
384     o[INDEX3(i,0,2,numComp,3)] = V1;
385     o[INDEX3(i,1,2,numComp,3)] = V4;
386     o[INDEX3(i,2,2,numComp,3)] = V10;
387     o[INDEX3(i,0,3,numComp,3)] = V1;
388     o[INDEX3(i,1,3,numComp,3)] = V5;
389     o[INDEX3(i,2,3,numComp,3)] = V11;
390     o[INDEX3(i,0,4,numComp,3)] = V2;
391     o[INDEX3(i,1,4,numComp,3)] = V6;
392     o[INDEX3(i,2,4,numComp,3)] = V8;
393     o[INDEX3(i,0,5,numComp,3)] = V2;
394     o[INDEX3(i,1,5,numComp,3)] = V7;
395     o[INDEX3(i,2,5,numComp,3)] = V9;
396     o[INDEX3(i,0,6,numComp,3)] = V3;
397     o[INDEX3(i,1,6,numComp,3)] = V6;
398     o[INDEX3(i,2,6,numComp,3)] = V10;
399     o[INDEX3(i,0,7,numComp,3)] = V3;
400     o[INDEX3(i,1,7,numComp,3)] = V7;
401     o[INDEX3(i,2,7,numComp,3)] = V11;
402 caltinay 3703 } /* end of component loop i */
403     } /* end of k0 loop */
404     } /* end of k1 loop */
405     } /* end of k2 loop */
406     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
407 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
408 caltinay 3731 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
409 caltinay 3711 #pragma omp parallel for
410     for (index_t k2=0; k2 < m_NE2; ++k2) {
411     for (index_t k1=0; k1 < m_NE1; ++k1) {
412     for (index_t k0=0; k0 < m_NE0; ++k0) {
413     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
414     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
415     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
416     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
417     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
418     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
419     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
420     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
421     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
422     for (index_t i=0; i < numComp; ++i) {
423 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
424     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
425     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
426 caltinay 3711 } /* end of component loop i */
427     } /* end of k0 loop */
428     } /* end of k1 loop */
429     } /* end of k2 loop */
430     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
431 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
432 caltinay 3731 /*** GENERATOR SNIP_GRAD_FACES TOP */
433 caltinay 3722 #pragma omp parallel
434     {
435     if (m_faceOffset[0] > -1) {
436     #pragma omp for nowait
437     for (index_t k2=0; k2 < m_NE2; ++k2) {
438     for (index_t k1=0; k1 < m_NE1; ++k1) {
439     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
440     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
441     const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
442     const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
443     const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
444     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
445     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
446     const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
447     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
448     for (index_t i=0; i < numComp; ++i) {
449 caltinay 3731 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
450     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
451     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
452     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
453     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
454     o[INDEX3(i,1,0,numComp,3)] = V0;
455     o[INDEX3(i,2,0,numComp,3)] = V2;
456     o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
457     o[INDEX3(i,1,1,numComp,3)] = V0;
458     o[INDEX3(i,2,1,numComp,3)] = V3;
459     o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
460     o[INDEX3(i,1,2,numComp,3)] = V1;
461     o[INDEX3(i,2,2,numComp,3)] = V2;
462     o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
463     o[INDEX3(i,1,3,numComp,3)] = V1;
464     o[INDEX3(i,2,3,numComp,3)] = V3;
465 caltinay 3722 } /* end of component loop i */
466     } /* end of k1 loop */
467     } /* end of k2 loop */
468     } /* end of face 0 */
469     if (m_faceOffset[1] > -1) {
470     #pragma omp for nowait
471     for (index_t k2=0; k2 < m_NE2; ++k2) {
472     for (index_t k1=0; k1 < m_NE1; ++k1) {
473     const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
474     const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
475     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
476     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
477     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
478     const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
479     const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
480     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
481     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
482     for (index_t i=0; i < numComp; ++i) {
483 caltinay 3731 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
484     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
485     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
486     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
487     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
488     o[INDEX3(i,1,0,numComp,3)] = V0;
489     o[INDEX3(i,2,0,numComp,3)] = V2;
490     o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
491     o[INDEX3(i,1,1,numComp,3)] = V0;
492     o[INDEX3(i,2,1,numComp,3)] = V3;
493     o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
494     o[INDEX3(i,1,2,numComp,3)] = V1;
495     o[INDEX3(i,2,2,numComp,3)] = V2;
496     o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
497     o[INDEX3(i,1,3,numComp,3)] = V1;
498     o[INDEX3(i,2,3,numComp,3)] = V3;
499 caltinay 3722 } /* end of component loop i */
500     } /* end of k1 loop */
501     } /* end of k2 loop */
502     } /* end of face 1 */
503     if (m_faceOffset[2] > -1) {
504     #pragma omp for nowait
505     for (index_t k2=0; k2 < m_NE2; ++k2) {
506     for (index_t k0=0; k0 < m_NE0; ++k0) {
507     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
508     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
509     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
510     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
511     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
512     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
513     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
514     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
515     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
516     for (index_t i=0; i < numComp; ++i) {
517 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
518     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
519     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
520     o[INDEX3(i,0,0,numComp,3)] = V0;
521     o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
522     o[INDEX3(i,2,0,numComp,3)] = V1;
523     o[INDEX3(i,0,1,numComp,3)] = V0;
524     o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
525     o[INDEX3(i,2,1,numComp,3)] = V2;
526     o[INDEX3(i,0,2,numComp,3)] = V0;
527     o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
528     o[INDEX3(i,2,2,numComp,3)] = V1;
529     o[INDEX3(i,0,3,numComp,3)] = V0;
530     o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
531     o[INDEX3(i,2,3,numComp,3)] = V2;
532 caltinay 3722 } /* end of component loop i */
533     } /* end of k0 loop */
534     } /* end of k2 loop */
535     } /* end of face 2 */
536     if (m_faceOffset[3] > -1) {
537     #pragma omp for nowait
538     for (index_t k2=0; k2 < m_NE2; ++k2) {
539     for (index_t k0=0; k0 < m_NE0; ++k0) {
540     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
541     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
542     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
543     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
544     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
545     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
546     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
547     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
548     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
549     for (index_t i=0; i < numComp; ++i) {
550 caltinay 3731 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
551     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
552     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
553     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
554     o[INDEX3(i,0,0,numComp,3)] = V0;
555     o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
556     o[INDEX3(i,2,0,numComp,3)] = V2;
557     o[INDEX3(i,0,1,numComp,3)] = V0;
558     o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
559     o[INDEX3(i,2,1,numComp,3)] = V3;
560     o[INDEX3(i,0,2,numComp,3)] = V1;
561     o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
562     o[INDEX3(i,2,2,numComp,3)] = V2;
563     o[INDEX3(i,0,3,numComp,3)] = V1;
564     o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
565     o[INDEX3(i,2,3,numComp,3)] = V3;
566 caltinay 3722 } /* end of component loop i */
567     } /* end of k0 loop */
568     } /* end of k2 loop */
569     } /* end of face 3 */
570     if (m_faceOffset[4] > -1) {
571     #pragma omp for nowait
572 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
573 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
574     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
575     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
576     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
577     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
578     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
579     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
580     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
581     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
582     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
583     for (index_t i=0; i < numComp; ++i) {
584 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
585     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
586     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
587     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
588     o[INDEX3(i,0,0,numComp,3)] = V0;
589     o[INDEX3(i,1,0,numComp,3)] = V2;
590     o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
591     o[INDEX3(i,0,1,numComp,3)] = V0;
592     o[INDEX3(i,1,1,numComp,3)] = V3;
593     o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
594     o[INDEX3(i,0,2,numComp,3)] = V1;
595     o[INDEX3(i,1,2,numComp,3)] = V2;
596     o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
597     o[INDEX3(i,0,3,numComp,3)] = V1;
598     o[INDEX3(i,1,3,numComp,3)] = V3;
599     o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
600 caltinay 3722 } /* end of component loop i */
601     } /* end of k0 loop */
602 caltinay 3707 } /* end of k1 loop */
603 caltinay 3722 } /* end of face 4 */
604     if (m_faceOffset[5] > -1) {
605     #pragma omp for nowait
606 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
607 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
608     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
609     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
610     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
611     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
612     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
613     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
614     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
615     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
616     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
617     for (index_t i=0; i < numComp; ++i) {
618 caltinay 3731 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
619     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
620     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
621     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
622     o[INDEX3(i,0,0,numComp,3)] = V0;
623     o[INDEX3(i,1,0,numComp,3)] = V2;
624     o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
625     o[INDEX3(i,0,1,numComp,3)] = V0;
626     o[INDEX3(i,1,1,numComp,3)] = V3;
627     o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
628     o[INDEX3(i,0,2,numComp,3)] = V1;
629     o[INDEX3(i,1,2,numComp,3)] = V2;
630     o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
631     o[INDEX3(i,0,3,numComp,3)] = V1;
632     o[INDEX3(i,1,3,numComp,3)] = V3;
633     o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
634 caltinay 3722 } /* end of component loop i */
635     } /* end of k0 loop */
636 caltinay 3707 } /* end of k1 loop */
637 caltinay 3722 } /* end of face 5 */
638     } // end of parallel section
639 caltinay 3707 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
640 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
641 caltinay 3731 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
642 caltinay 3722 #pragma omp parallel
643     {
644     if (m_faceOffset[0] > -1) {
645     #pragma omp for nowait
646     for (index_t k2=0; k2 < m_NE2; ++k2) {
647     for (index_t k1=0; k1 < m_NE1; ++k1) {
648     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
649     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
650     const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
651     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
652     const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
653     const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
654     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
655     const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
656     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
657     for (index_t i=0; i < numComp; ++i) {
658 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
659     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
660     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
661 caltinay 3722 } /* end of component loop i */
662     } /* end of k1 loop */
663     } /* end of k2 loop */
664     } /* end of face 0 */
665     if (m_faceOffset[1] > -1) {
666     #pragma omp for nowait
667     for (index_t k2=0; k2 < m_NE2; ++k2) {
668     for (index_t k1=0; k1 < m_NE1; ++k1) {
669     const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
670     const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
671     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
672     const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
673     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
674     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
675     const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
676     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
677     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
678     for (index_t i=0; i < numComp; ++i) {
679 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
680     o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
681     o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
682 caltinay 3722 } /* end of component loop i */
683     } /* end of k1 loop */
684     } /* end of k2 loop */
685     } /* end of face 1 */
686     if (m_faceOffset[2] > -1) {
687     #pragma omp for nowait
688     for (index_t k2=0; k2 < m_NE2; ++k2) {
689     for (index_t k0=0; k0 < m_NE0; ++k0) {
690     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
691     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
692     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
693     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
694     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
695     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
696     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
697     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
698     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
699     for (index_t i=0; i < numComp; ++i) {
700 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
701     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
702     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
703 caltinay 3722 } /* end of component loop i */
704     } /* end of k0 loop */
705     } /* end of k2 loop */
706     } /* end of face 2 */
707     if (m_faceOffset[3] > -1) {
708     #pragma omp for nowait
709     for (index_t k2=0; k2 < m_NE2; ++k2) {
710     for (index_t k0=0; k0 < m_NE0; ++k0) {
711     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
712     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
713     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
714     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
715     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
716     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
717     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
718     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
719     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
720     for (index_t i=0; i < numComp; ++i) {
721 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
722     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
723     o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
724 caltinay 3722 } /* end of component loop i */
725     } /* end of k0 loop */
726     } /* end of k2 loop */
727     } /* end of face 3 */
728     if (m_faceOffset[4] > -1) {
729     #pragma omp for nowait
730 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
731 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
732     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
733     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
734     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
735     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
736     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
737     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
738     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
739     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
740     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
741     for (index_t i=0; i < numComp; ++i) {
742 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
743     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
744     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
745 caltinay 3722 } /* end of component loop i */
746     } /* end of k0 loop */
747 caltinay 3711 } /* end of k1 loop */
748 caltinay 3722 } /* end of face 4 */
749     if (m_faceOffset[5] > -1) {
750     #pragma omp for nowait
751 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
752 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
753     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
754     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
755     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
756     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
757     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
758     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
759     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
760     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
761     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
762     for (index_t i=0; i < numComp; ++i) {
763 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
764     o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
765     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
766 caltinay 3722 } /* end of component loop i */
767     } /* end of k0 loop */
768 caltinay 3711 } /* end of k1 loop */
769 caltinay 3722 } /* end of face 5 */
770     } // end of parallel section
771 caltinay 3711 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
772 caltinay 3703 } else {
773 caltinay 3707 stringstream msg;
774     msg << "setToGradient() not implemented for "
775     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
776     throw RipleyException(msg.str());
777 caltinay 3703 }
778     }
779    
780 caltinay 3713 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
781     {
782     escript::Data& in = *const_cast<escript::Data*>(&arg);
783     const dim_t numComp = in.getDataPointSize();
784     const double h0 = m_l0/m_gNE0;
785     const double h1 = m_l1/m_gNE1;
786     const double h2 = m_l2/m_gNE2;
787     if (arg.getFunctionSpace().getTypeCode() == Elements) {
788     const double w_0 = h0*h1*h2/8.;
789     #pragma omp parallel
790     {
791     vector<double> int_local(numComp, 0);
792 caltinay 3722 #pragma omp for nowait
793 caltinay 3713 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
794     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
795     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
796     const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
797     for (index_t i=0; i < numComp; ++i) {
798     const register double f_0 = f[INDEX2(i,0,numComp)];
799     const register double f_1 = f[INDEX2(i,1,numComp)];
800     const register double f_2 = f[INDEX2(i,2,numComp)];
801     const register double f_3 = f[INDEX2(i,3,numComp)];
802     const register double f_4 = f[INDEX2(i,4,numComp)];
803     const register double f_5 = f[INDEX2(i,5,numComp)];
804     const register double f_6 = f[INDEX2(i,6,numComp)];
805     const register double f_7 = f[INDEX2(i,7,numComp)];
806     int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
807     } /* end of component loop i */
808     } /* end of k0 loop */
809     } /* end of k1 loop */
810     } /* end of k2 loop */
811    
812     #pragma omp critical
813     for (index_t i=0; i<numComp; i++)
814     integrals[i]+=int_local[i];
815 caltinay 3722 } // end of parallel section
816 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
817     const double w_0 = h0*h1*h2;
818     #pragma omp parallel
819     {
820     vector<double> int_local(numComp, 0);
821 caltinay 3722 #pragma omp for nowait
822 caltinay 3713 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
823     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
824     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
825     const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
826     for (index_t i=0; i < numComp; ++i) {
827     int_local[i]+=f[i]*w_0;
828     } /* end of component loop i */
829     } /* end of k0 loop */
830     } /* end of k1 loop */
831     } /* end of k2 loop */
832    
833     #pragma omp critical
834     for (index_t i=0; i<numComp; i++)
835     integrals[i]+=int_local[i];
836 caltinay 3722 } // end of parallel section
837 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
838     const double w_0 = h1*h2/4.;
839     const double w_1 = h0*h2/4.;
840     const double w_2 = h0*h1/4.;
841     #pragma omp parallel
842     {
843     vector<double> int_local(numComp, 0);
844     if (m_faceOffset[0] > -1) {
845 caltinay 3722 #pragma omp for nowait
846 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
847     for (index_t k1=0; k1 < m_NE1; ++k1) {
848     const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
849     for (index_t i=0; i < numComp; ++i) {
850     const register double f_0 = f[INDEX2(i,0,numComp)];
851     const register double f_1 = f[INDEX2(i,1,numComp)];
852     const register double f_2 = f[INDEX2(i,2,numComp)];
853     const register double f_3 = f[INDEX2(i,3,numComp)];
854     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
855     } /* end of component loop i */
856     } /* end of k1 loop */
857     } /* end of k2 loop */
858     }
859    
860     if (m_faceOffset[1] > -1) {
861 caltinay 3722 #pragma omp for nowait
862 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
863     for (index_t k1=0; k1 < m_NE1; ++k1) {
864     const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
865     for (index_t i=0; i < numComp; ++i) {
866     const register double f_0 = f[INDEX2(i,0,numComp)];
867     const register double f_1 = f[INDEX2(i,1,numComp)];
868     const register double f_2 = f[INDEX2(i,2,numComp)];
869     const register double f_3 = f[INDEX2(i,3,numComp)];
870     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
871     } /* end of component loop i */
872     } /* end of k1 loop */
873     } /* end of k2 loop */
874     }
875    
876     if (m_faceOffset[2] > -1) {
877 caltinay 3722 #pragma omp for nowait
878 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
879     for (index_t k0=0; k0 < m_NE0; ++k0) {
880     const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
881     for (index_t i=0; i < numComp; ++i) {
882     const register double f_0 = f[INDEX2(i,0,numComp)];
883     const register double f_1 = f[INDEX2(i,1,numComp)];
884     const register double f_2 = f[INDEX2(i,2,numComp)];
885     const register double f_3 = f[INDEX2(i,3,numComp)];
886     int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
887     } /* end of component loop i */
888     } /* end of k1 loop */
889     } /* end of k2 loop */
890     }
891    
892     if (m_faceOffset[3] > -1) {
893 caltinay 3722 #pragma omp for nowait
894 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
895     for (index_t k0=0; k0 < m_NE0; ++k0) {
896     const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
897     for (index_t i=0; i < numComp; ++i) {
898     const register double f_0 = f[INDEX2(i,0,numComp)];
899     const register double f_1 = f[INDEX2(i,1,numComp)];
900     const register double f_2 = f[INDEX2(i,2,numComp)];
901     const register double f_3 = f[INDEX2(i,3,numComp)];
902     int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
903     } /* end of component loop i */
904     } /* end of k1 loop */
905     } /* end of k2 loop */
906     }
907    
908     if (m_faceOffset[4] > -1) {
909 caltinay 3722 #pragma omp for nowait
910 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
911     for (index_t k0=0; k0 < m_NE0; ++k0) {
912     const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
913     for (index_t i=0; i < numComp; ++i) {
914     const register double f_0 = f[INDEX2(i,0,numComp)];
915     const register double f_1 = f[INDEX2(i,1,numComp)];
916     const register double f_2 = f[INDEX2(i,2,numComp)];
917     const register double f_3 = f[INDEX2(i,3,numComp)];
918     int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
919     } /* end of component loop i */
920     } /* end of k1 loop */
921     } /* end of k2 loop */
922     }
923    
924     if (m_faceOffset[5] > -1) {
925 caltinay 3722 #pragma omp for nowait
926 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
927     for (index_t k0=0; k0 < m_NE0; ++k0) {
928     const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
929     for (index_t i=0; i < numComp; ++i) {
930     const register double f_0 = f[INDEX2(i,0,numComp)];
931     const register double f_1 = f[INDEX2(i,1,numComp)];
932     const register double f_2 = f[INDEX2(i,2,numComp)];
933     const register double f_3 = f[INDEX2(i,3,numComp)];
934     int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
935     } /* end of component loop i */
936     } /* end of k1 loop */
937     } /* end of k2 loop */
938     }
939    
940     #pragma omp critical
941     for (index_t i=0; i<numComp; i++)
942     integrals[i]+=int_local[i];
943 caltinay 3722 } // end of parallel section
944 caltinay 3713
945     } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
946     const double w_0 = h1*h2;
947     const double w_1 = h0*h2;
948     const double w_2 = h0*h1;
949     #pragma omp parallel
950     {
951     vector<double> int_local(numComp, 0);
952     if (m_faceOffset[0] > -1) {
953 caltinay 3722 #pragma omp for nowait
954 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
955     for (index_t k1=0; k1 < m_NE1; ++k1) {
956     const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
957     for (index_t i=0; i < numComp; ++i) {
958     int_local[i]+=f[i]*w_0;
959     } /* end of component loop i */
960     } /* end of k1 loop */
961     } /* end of k2 loop */
962     }
963    
964     if (m_faceOffset[1] > -1) {
965 caltinay 3722 #pragma omp for nowait
966 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
967     for (index_t k1=0; k1 < m_NE1; ++k1) {
968     const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
969     for (index_t i=0; i < numComp; ++i) {
970     int_local[i]+=f[i]*w_0;
971     } /* end of component loop i */
972     } /* end of k1 loop */
973     } /* end of k2 loop */
974     }
975    
976     if (m_faceOffset[2] > -1) {
977 caltinay 3722 #pragma omp for nowait
978 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
979     for (index_t k0=0; k0 < m_NE0; ++k0) {
980     const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
981     for (index_t i=0; i < numComp; ++i) {
982     int_local[i]+=f[i]*w_1;
983     } /* end of component loop i */
984     } /* end of k1 loop */
985     } /* end of k2 loop */
986     }
987    
988     if (m_faceOffset[3] > -1) {
989 caltinay 3722 #pragma omp for nowait
990 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
991     for (index_t k0=0; k0 < m_NE0; ++k0) {
992     const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
993     for (index_t i=0; i < numComp; ++i) {
994     int_local[i]+=f[i]*w_1;
995     } /* end of component loop i */
996     } /* end of k1 loop */
997     } /* end of k2 loop */
998     }
999    
1000     if (m_faceOffset[4] > -1) {
1001 caltinay 3722 #pragma omp for nowait
1002 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
1003     for (index_t k0=0; k0 < m_NE0; ++k0) {
1004     const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1005     for (index_t i=0; i < numComp; ++i) {
1006     int_local[i]+=f[i]*w_2;
1007     } /* end of component loop i */
1008     } /* end of k1 loop */
1009     } /* end of k2 loop */
1010     }
1011    
1012     if (m_faceOffset[5] > -1) {
1013 caltinay 3722 #pragma omp for nowait
1014 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
1015     for (index_t k0=0; k0 < m_NE0; ++k0) {
1016     const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1017     for (index_t i=0; i < numComp; ++i) {
1018     int_local[i]+=f[i]*w_2;
1019     } /* end of component loop i */
1020     } /* end of k1 loop */
1021     } /* end of k2 loop */
1022     }
1023    
1024     #pragma omp critical
1025     for (index_t i=0; i<numComp; i++)
1026     integrals[i]+=int_local[i];
1027 caltinay 3722 } // end of parallel section
1028 caltinay 3713
1029     } else {
1030     stringstream msg;
1031     msg << "setToIntegrals() not implemented for "
1032     << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
1033     throw RipleyException(msg.str());
1034     }
1035     }
1036    
1037 caltinay 3722 void Brick::setToNormal(escript::Data& out) const
1038     {
1039     if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1040     #pragma omp parallel
1041     {
1042     if (m_faceOffset[0] > -1) {
1043     #pragma omp for nowait
1044     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1045     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1046     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1047     // set vector at four quadrature points
1048     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1049     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1050     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1051     *o++ = -1.; *o++ = 0.; *o = 0.;
1052     }
1053     }
1054     }
1055    
1056     if (m_faceOffset[1] > -1) {
1057     #pragma omp for nowait
1058     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1059     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1060     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1061     // set vector at four quadrature points
1062     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1063     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1064     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1065     *o++ = 1.; *o++ = 0.; *o = 0.;
1066     }
1067     }
1068     }
1069    
1070     if (m_faceOffset[2] > -1) {
1071     #pragma omp for nowait
1072     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1073     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1074     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1075     // set vector at four quadrature points
1076     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1077     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1078     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1079     *o++ = 0.; *o++ = -1.; *o = 0.;
1080     }
1081     }
1082     }
1083    
1084     if (m_faceOffset[3] > -1) {
1085     #pragma omp for nowait
1086     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1087     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1088     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1089     // set vector at four quadrature points
1090     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1091     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1092     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1093     *o++ = 0.; *o++ = 1.; *o = 0.;
1094     }
1095     }
1096     }
1097    
1098     if (m_faceOffset[4] > -1) {
1099     #pragma omp for nowait
1100     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1101     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1102     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1103     // set vector at four quadrature points
1104     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1105     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1106     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1107     *o++ = 0.; *o++ = 0.; *o = -1.;
1108     }
1109     }
1110     }
1111    
1112     if (m_faceOffset[5] > -1) {
1113     #pragma omp for nowait
1114     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1115     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1116     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1117     // set vector at four quadrature points
1118     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1119     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1120     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1121     *o++ = 0.; *o++ = 0.; *o = 1.;
1122     }
1123     }
1124     }
1125     } // end of parallel section
1126     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1127     #pragma omp parallel
1128     {
1129     if (m_faceOffset[0] > -1) {
1130     #pragma omp for nowait
1131     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1132     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1133     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1134     *o++ = -1.;
1135     *o++ = 0.;
1136     *o = 0.;
1137     }
1138     }
1139     }
1140    
1141     if (m_faceOffset[1] > -1) {
1142     #pragma omp for nowait
1143     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1144     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1145     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1146     *o++ = 1.;
1147     *o++ = 0.;
1148     *o = 0.;
1149     }
1150     }
1151     }
1152    
1153     if (m_faceOffset[2] > -1) {
1154     #pragma omp for nowait
1155     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1156     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1157     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1158     *o++ = 0.;
1159     *o++ = -1.;
1160     *o = 0.;
1161     }
1162     }
1163     }
1164    
1165     if (m_faceOffset[3] > -1) {
1166     #pragma omp for nowait
1167     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1168     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1169     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1170     *o++ = 0.;
1171     *o++ = 1.;
1172     *o = 0.;
1173     }
1174     }
1175     }
1176    
1177     if (m_faceOffset[4] > -1) {
1178     #pragma omp for nowait
1179     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1180     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1181     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1182     *o++ = 0.;
1183     *o++ = 0.;
1184     *o = -1.;
1185     }
1186     }
1187     }
1188    
1189     if (m_faceOffset[5] > -1) {
1190     #pragma omp for nowait
1191     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1192     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1193     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1194     *o++ = 0.;
1195     *o++ = 0.;
1196     *o = 1.;
1197     }
1198     }
1199     }
1200     } // end of parallel section
1201    
1202     } else {
1203     stringstream msg;
1204     msg << "setToNormal() not implemented for "
1205     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1206     throw RipleyException(msg.str());
1207     }
1208     }
1209    
1210 caltinay 3691 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1211     bool reducedColOrder) const
1212     {
1213     if (reducedRowOrder || reducedColOrder)
1214     throw RipleyException("getPattern() not implemented for reduced order");
1215    
1216 caltinay 3756 return m_pattern;
1217     }
1218    
1219     void Brick::Print_Mesh_Info(const bool full) const
1220     {
1221     RipleyDomain::Print_Mesh_Info(full);
1222     if (full) {
1223     cout << " Id Coordinates" << endl;
1224     cout.precision(15);
1225     cout.setf(ios::scientific, ios::floatfield);
1226     pair<double,double> xdx = getFirstCoordAndSpacing(0);
1227     pair<double,double> ydy = getFirstCoordAndSpacing(1);
1228     pair<double,double> zdz = getFirstCoordAndSpacing(2);
1229     for (index_t i=0; i < getNumNodes(); i++) {
1230     cout << " " << setw(5) << m_nodeId[i]
1231     << " " << xdx.first+(i%m_N0)*xdx.second
1232     << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1233     << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1234     }
1235     }
1236     }
1237    
1238     IndexVector Brick::getNumNodesPerDim() const
1239     {
1240     IndexVector ret;
1241     ret.push_back(m_N0);
1242     ret.push_back(m_N1);
1243     ret.push_back(m_N2);
1244     return ret;
1245     }
1246    
1247     IndexVector Brick::getNumElementsPerDim() const
1248     {
1249     IndexVector ret;
1250     ret.push_back(m_NE0);
1251     ret.push_back(m_NE1);
1252     ret.push_back(m_NE2);
1253     return ret;
1254     }
1255    
1256     IndexVector Brick::getNumFacesPerBoundary() const
1257     {
1258     IndexVector ret(6, 0);
1259     //left
1260     if (m_offset0==0)
1261     ret[0]=m_NE1*m_NE2;
1262     //right
1263     if (m_mpiInfo->rank%m_NX==m_NX-1)
1264     ret[1]=m_NE1*m_NE2;
1265     //bottom
1266     if (m_offset1==0)
1267     ret[2]=m_NE0*m_NE2;
1268     //top
1269     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1270     ret[3]=m_NE0*m_NE2;
1271     //front
1272     if (m_offset2==0)
1273     ret[4]=m_NE0*m_NE1;
1274     //back
1275     if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1276     ret[5]=m_NE0*m_NE1;
1277     return ret;
1278     }
1279    
1280     pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1281     {
1282     if (dim==0)
1283     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1284     else if (dim==1)
1285     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1286     else if (dim==2)
1287     return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1288    
1289     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1290     }
1291    
1292     //protected
1293     dim_t Brick::getNumDOF() const
1294     {
1295     return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY*(m_gNE2+1)/m_NZ;
1296     }
1297    
1298     //protected
1299     dim_t Brick::getNumFaceElements() const
1300     {
1301     const IndexVector faces = getNumFacesPerBoundary();
1302     dim_t n=0;
1303     for (size_t i=0; i<faces.size(); i++)
1304     n+=faces[i];
1305     return n;
1306     }
1307    
1308     //protected
1309     void Brick::assembleCoordinates(escript::Data& arg) const
1310     {
1311     escriptDataC x = arg.getDataC();
1312     int numDim = m_numDim;
1313     if (!isDataPointShapeEqual(&x, 1, &numDim))
1314     throw RipleyException("setToX: Invalid Data object shape");
1315     if (!numSamplesEqual(&x, 1, getNumNodes()))
1316     throw RipleyException("setToX: Illegal number of samples in Data object");
1317    
1318     pair<double,double> xdx = getFirstCoordAndSpacing(0);
1319     pair<double,double> ydy = getFirstCoordAndSpacing(1);
1320     pair<double,double> zdz = getFirstCoordAndSpacing(2);
1321     arg.requireWrite();
1322     #pragma omp parallel for
1323     for (dim_t i2 = 0; i2 < m_N2; i2++) {
1324     for (dim_t i1 = 0; i1 < m_N1; i1++) {
1325     for (dim_t i0 = 0; i0 < m_N0; i0++) {
1326     double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1327     point[0] = xdx.first+i0*xdx.second;
1328     point[1] = ydy.first+i1*ydy.second;
1329     point[2] = zdz.first+i2*zdz.second;
1330     }
1331     }
1332     }
1333     }
1334    
1335     //protected
1336     dim_t Brick::insertNeighbourNodes(IndexVector& index, index_t node) const
1337     {
1338     const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1339     const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1340     const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1341     const int x=node%nDOF0;
1342     const int y=node%(nDOF0*nDOF1)/nDOF0;
1343     const int z=node/(nDOF0*nDOF1);
1344     int num=0;
1345     // loop through potential neighbours and add to index if positions are
1346     // within bounds
1347     for (int i2=-1; i2<2; i2++) {
1348     for (int i1=-1; i1<2; i1++) {
1349     for (int i0=-1; i0<2; i0++) {
1350     // skip node itself
1351     if (i0==0 && i1==0 && i2==0)
1352     continue;
1353     // location of neighbour node
1354     const int nx=x+i0;
1355     const int ny=y+i1;
1356     const int nz=z+i2;
1357     if (nx>=0 && ny>=0 && nz>=0
1358     && nx<nDOF0 && ny<nDOF1 && nz<nDOF2) {
1359     index.push_back(nz*nDOF0*nDOF1+ny*nDOF0+nx);
1360     num++;
1361     }
1362     }
1363     }
1364     }
1365    
1366     return num;
1367     }
1368    
1369     //protected
1370     void Brick::nodesToDOF(escript::Data& out, escript::Data& in) const
1371     {
1372     const dim_t numComp = in.getDataPointSize();
1373     out.requireWrite();
1374    
1375 caltinay 3754 const index_t left = (m_offset0==0 ? 0 : 1);
1376     const index_t bottom = (m_offset1==0 ? 0 : 1);
1377     const index_t front = (m_offset2==0 ? 0 : 1);
1378 caltinay 3756 const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1379     const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1380     const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1381     #pragma omp parallel for
1382     for (index_t i=0; i<nDOF2; i++) {
1383     for (index_t j=0; j<nDOF1; j++) {
1384     for (index_t k=0; k<nDOF0; k++) {
1385     const index_t n=k+left+(j+bottom)*m_N0+(i+front)*m_N0*m_N1;
1386     const double* src=in.getSampleDataRO(n);
1387     copy(src, src+numComp, out.getSampleDataRW(k+j*nDOF0+i*nDOF0*nDOF1));
1388     }
1389     }
1390     }
1391     }
1392 caltinay 3754
1393 caltinay 3756 //protected
1394     void Brick::dofToNodes(escript::Data& out, escript::Data& in) const
1395     {
1396     const dim_t numComp = in.getDataPointSize();
1397     Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp);
1398     in.requireWrite();
1399     Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0));
1400    
1401     const dim_t numDOF = getNumDOF();
1402     out.requireWrite();
1403     const double* buffer = Paso_Coupler_finishCollect(coupler);
1404    
1405     #pragma omp parallel for
1406     for (index_t i=0; i<getNumNodes(); i++) {
1407     const double* src=(m_dofMap[i]<numDOF ?
1408     in.getSampleDataRO(m_dofMap[i])
1409     : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1410     copy(src, src+numComp, out.getSampleDataRW(i));
1411     }
1412     }
1413    
1414     //private
1415     void Brick::populateSampleIds()
1416     {
1417     // identifiers are ordered from left to right, bottom to top, front to back
1418     // globally
1419    
1420     // build node distribution vector first.
1421     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1422     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1423     const dim_t numDOF=getNumDOF();
1424     for (dim_t k=1; k<m_mpiInfo->size; k++) {
1425     m_nodeDistribution[k]=k*numDOF;
1426     }
1427     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1428     m_nodeId.resize(getNumNodes());
1429     m_dofId.resize(numDOF);
1430     m_elementId.resize(getNumElements());
1431     m_faceId.resize(getNumFaceElements());
1432    
1433     #pragma omp parallel
1434     {
1435     #pragma omp for nowait
1436     // nodes
1437     for (dim_t i2=0; i2<m_N2; i2++) {
1438     for (dim_t i1=0; i1<m_N1; i1++) {
1439     for (dim_t i0=0; i0<m_N0; i0++) {
1440     m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] =
1441     (m_offset2+i2)*(m_gNE0+1)*(m_gNE1+1)
1442     +(m_offset1+i1)*(m_gNE0+1)
1443     +m_offset0+i0;
1444     }
1445     }
1446     }
1447    
1448     // degrees of freedom
1449     #pragma omp for nowait
1450     for (dim_t k=0; k<numDOF; k++)
1451     m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k;
1452    
1453     // elements
1454     #pragma omp for nowait
1455     for (dim_t i2=0; i2<m_NE2; i2++) {
1456     for (dim_t i1=0; i1<m_NE1; i1++) {
1457     for (dim_t i0=0; i0<m_NE0; i0++) {
1458     m_elementId[i0+i1*m_NE0+i2*m_NE0*m_NE1] =
1459     (m_offset2+i2)*m_gNE0*m_gNE1
1460     +(m_offset1+i1)*m_gNE0
1461     +m_offset0+i0;
1462     }
1463     }
1464     }
1465    
1466     // face elements
1467     #pragma omp for
1468     for (dim_t k=0; k<getNumFaceElements(); k++)
1469     m_faceId[k]=k;
1470     } // end parallel section
1471    
1472     m_nodeTags.assign(getNumNodes(), 0);
1473     updateTagsInUse(Nodes);
1474    
1475     m_elementTags.assign(getNumElements(), 0);
1476     updateTagsInUse(Elements);
1477    
1478     // generate face offset vector and set face tags
1479     const IndexVector facesPerEdge = getNumFacesPerBoundary();
1480     const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1481     const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1482     m_faceOffset.assign(facesPerEdge.size(), -1);
1483     m_faceTags.clear();
1484     index_t offset=0;
1485     for (size_t i=0; i<facesPerEdge.size(); i++) {
1486     if (facesPerEdge[i]>0) {
1487     m_faceOffset[i]=offset;
1488     offset+=facesPerEdge[i];
1489     m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1490     }
1491     }
1492     setTagMap("left", LEFT);
1493     setTagMap("right", RIGHT);
1494     setTagMap("bottom", BOTTOM);
1495     setTagMap("top", TOP);
1496     setTagMap("front", FRONT);
1497     setTagMap("back", BACK);
1498     updateTagsInUse(FaceElements);
1499     }
1500    
1501     //private
1502     void Brick::createPattern()
1503     {
1504     const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1505     const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1506     const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1507     const index_t left = (m_offset0==0 ? 0 : 1);
1508     const index_t bottom = (m_offset1==0 ? 0 : 1);
1509     const index_t front = (m_offset2==0 ? 0 : 1);
1510    
1511     // populate node->DOF mapping with own degrees of freedom.
1512     // The rest is assigned in the loop further down
1513 caltinay 3754 m_dofMap.assign(getNumNodes(), 0);
1514     #pragma omp parallel for
1515     for (index_t i=front; i<m_N2; i++) {
1516     for (index_t j=bottom; j<m_N1; j++) {
1517     for (index_t k=left; k<m_N0; k++) {
1518     m_dofMap[i*m_N0*m_N1+j*m_N0+k]=(i-front)*nDOF0*nDOF1+(j-bottom)*nDOF0+k-left;
1519     }
1520     }
1521     }
1522    
1523     // build list of shared components and neighbours by looping through
1524     // all potential neighbouring ranks and checking if positions are
1525     // within bounds
1526 caltinay 3756 const dim_t numDOF=nDOF0*nDOF1*nDOF2;
1527     vector<IndexVector> colIndices(numDOF); // for the couple blocks
1528     RankVector neighbour;
1529     IndexVector offsetInShared(1,0);
1530     IndexVector sendShared, recvShared;
1531     int numShared=0;
1532 caltinay 3754 const int x=m_mpiInfo->rank%m_NX;
1533     const int y=m_mpiInfo->rank%(m_NX*m_NY)/m_NX;
1534     const int z=m_mpiInfo->rank/(m_NX*m_NY);
1535     for (int i2=-1; i2<2; i2++) {
1536     for (int i1=-1; i1<2; i1++) {
1537     for (int i0=-1; i0<2; i0++) {
1538 caltinay 3756 // skip this rank
1539 caltinay 3754 if (i0==0 && i1==0 && i2==0)
1540     continue;
1541     // location of neighbour rank
1542     const int nx=x+i0;
1543     const int ny=y+i1;
1544     const int nz=z+i2;
1545     if (nx>=0 && ny>=0 && nz>=0 && nx<m_NX && ny<m_NY && nz<m_NZ) {
1546     neighbour.push_back(nz*m_NX*m_NY+ny*m_NX+nx);
1547     if (i0==0 && i1==0) {
1548     // sharing front or back plane
1549     offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF1);
1550     for (dim_t i=0; i<nDOF1; i++) {
1551 caltinay 3755 const int firstDOF=(i2==-1 ? i*nDOF0
1552     : i*nDOF0 + nDOF0*nDOF1*(nDOF2-1));
1553     const int firstNode=(i2==-1 ? left+(i+bottom)*m_N0
1554     : left+(i+bottom)*m_N0+m_N0*m_N1*(m_N2-1));
1555 caltinay 3754 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1556     sendShared.push_back(firstDOF+j);
1557     recvShared.push_back(numDOF+numShared);
1558     if (j>0) {
1559     if (i>0)
1560     colIndices[firstDOF+j-1-nDOF0].push_back(numShared);
1561     colIndices[firstDOF+j-1].push_back(numShared);
1562     if (i<nDOF1-1)
1563     colIndices[firstDOF+j-1+nDOF0].push_back(numShared);
1564     }
1565     if (i>0)
1566     colIndices[firstDOF+j-nDOF0].push_back(numShared);
1567     colIndices[firstDOF+j].push_back(numShared);
1568     if (i<nDOF1-1)
1569     colIndices[firstDOF+j+nDOF0].push_back(numShared);
1570     if (j<nDOF0-1) {
1571     if (i>0)
1572     colIndices[firstDOF+j+1-nDOF0].push_back(numShared);
1573     colIndices[firstDOF+j+1].push_back(numShared);
1574     if (i<nDOF1-1)
1575     colIndices[firstDOF+j+1+nDOF0].push_back(numShared);
1576     }
1577     m_dofMap[firstNode+j]=numDOF+numShared;
1578     }
1579     }
1580     } else if (i0==0 && i2==0) {
1581     // sharing top or bottom plane
1582     offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF2);
1583     for (dim_t i=0; i<nDOF2; i++) {
1584 caltinay 3755 const int firstDOF=(i1==-1 ? i*nDOF0*nDOF1
1585     : nDOF0*((i+1)*nDOF1-1));
1586     const int firstNode=(i1==-1 ?
1587     left+(i+front)*m_N0*m_N1
1588     : left+m_N0*((i+1+front)*m_N1-1));
1589 caltinay 3754 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1590     sendShared.push_back(firstDOF+j);
1591     recvShared.push_back(numDOF+numShared);
1592     if (j>0) {
1593     if (i>0)
1594     colIndices[firstDOF+j-1-nDOF0*nDOF1].push_back(numShared);
1595     colIndices[firstDOF+j-1].push_back(numShared);
1596     if (i<nDOF2-1)
1597     colIndices[firstDOF+j-1+nDOF0*nDOF1].push_back(numShared);
1598     }
1599     if (i>0)
1600     colIndices[firstDOF+j-nDOF0*nDOF1].push_back(numShared);
1601     colIndices[firstDOF+j].push_back(numShared);
1602     if (i<nDOF2-1)
1603