/[escript]/branches/ripleygmg_from_3668/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/ripleygmg_from_3668/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3760 - (hide annotations)
Mon Jan 9 05:21:18 2012 UTC (7 years, 3 months ago) by caltinay
File size: 383491 byte(s)
-implemented addPDEToRHS() and setToSize()
-added a few missing calls to requireWrite()
-added assemblePDESystem() to Rectangle but haven't tested yet

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Brick.h>
15     extern "C" {
16 caltinay 3754 #include "paso/SystemMatrix.h"
17 caltinay 3691 }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33     int d1, int d2) :
34     RipleyDomain(3),
35     m_gNE0(n0),
36     m_gNE1(n1),
37     m_gNE2(n2),
38     m_l0(l0),
39     m_l1(l1),
40     m_l2(l2),
41     m_NX(d0),
42     m_NY(d1),
43     m_NZ(d2)
44     {
45     // ensure number of subdivisions is valid and nodes can be distributed
46     // among number of ranks
47     if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48     throw RipleyException("Invalid number of spatial subdivisions");
49    
50 caltinay 3753 if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0 || (n2+1)%m_NZ > 0)
51     throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension");
52 caltinay 3691
53 caltinay 3753 if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2) || (m_NZ > 1 && (n2+1)/m_NZ<2))
54     throw RipleyException("Too few elements for the number of ranks");
55    
56     // local number of elements (including overlap)
57     m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0);
58     if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1)
59     m_NE0++;
60     m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1);
61     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX>0 && m_mpiInfo->rank%(m_NX*m_NY)/m_NX<m_NY-1)
62     m_NE1++;
63     m_NE2 = (m_NZ>1 ? (n2+1)/m_NZ : n2);
64     if (m_mpiInfo->rank/(m_NX*m_NY)>0 && m_mpiInfo->rank/(m_NX*m_NY)<m_NZ-1)
65     m_NE2++;
66    
67     // local number of nodes
68 caltinay 3691 m_N0 = m_NE0+1;
69     m_N1 = m_NE1+1;
70     m_N2 = m_NE2+1;
71 caltinay 3753
72 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
73 caltinay 3753 m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX);
74     if (m_offset0 > 0)
75     m_offset0--;
76     m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
77     if (m_offset1 > 0)
78     m_offset1--;
79     m_offset2 = (n2+1)/m_NZ*(m_mpiInfo->rank/(m_NX*m_NY));
80     if (m_offset2 > 0)
81     m_offset2--;
82    
83 caltinay 3691 populateSampleIds();
84 caltinay 3756 createPattern();
85 caltinay 3691 }
86    
87    
88     Brick::~Brick()
89     {
90     }
91    
92     string Brick::getDescription() const
93     {
94     return "ripley::Brick";
95     }
96    
97     bool Brick::operator==(const AbstractDomain& other) const
98     {
99 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
100     if (o) {
101     return (RipleyDomain::operator==(other) &&
102     m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 && m_gNE2==o->m_gNE2
103     && m_l0==o->m_l0 && m_l1==o->m_l1 && m_l2==o->m_l2
104     && m_NX==o->m_NX && m_NY==o->m_NY && m_NZ==o->m_NZ);
105     }
106 caltinay 3691
107     return false;
108     }
109    
110     void Brick::dump(const string& fileName) const
111     {
112     #if USE_SILO
113     string fn(fileName);
114     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
115     fn+=".silo";
116     }
117    
118     const int NUM_SILO_FILES = 1;
119     const char* blockDirFmt = "/block%04d";
120     int driver=DB_HDF5;
121     string siloPath;
122     DBfile* dbfile = NULL;
123    
124     #ifdef ESYS_MPI
125     PMPIO_baton_t* baton = NULL;
126     #endif
127    
128     if (m_mpiInfo->size > 1) {
129     #ifdef ESYS_MPI
130     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
131     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
132     PMPIO_DefaultClose, (void*)&driver);
133     // try the fallback driver in case of error
134     if (!baton && driver != DB_PDB) {
135     driver = DB_PDB;
136     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
137     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
138     PMPIO_DefaultClose, (void*)&driver);
139     }
140     if (baton) {
141     char str[64];
142     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
143     siloPath = str;
144     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
145     }
146     #endif
147     } else {
148     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
149     getDescription().c_str(), driver);
150     // try the fallback driver in case of error
151     if (!dbfile && driver != DB_PDB) {
152     driver = DB_PDB;
153     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
154     getDescription().c_str(), driver);
155     }
156     }
157    
158     if (!dbfile)
159     throw RipleyException("dump: Could not create Silo file");
160    
161     /*
162     if (driver==DB_HDF5) {
163     // gzip level 1 already provides good compression with minimal
164     // performance penalty. Some tests showed that gzip levels >3 performed
165     // rather badly on escript data both in terms of time and space
166     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
167     }
168     */
169    
170     boost::scoped_ptr<double> x(new double[m_N0]);
171     boost::scoped_ptr<double> y(new double[m_N1]);
172     boost::scoped_ptr<double> z(new double[m_N2]);
173     double* coords[3] = { x.get(), y.get(), z.get() };
174 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
175     pair<double,double> ydy = getFirstCoordAndSpacing(1);
176     pair<double,double> zdz = getFirstCoordAndSpacing(2);
177 caltinay 3691 #pragma omp parallel
178     {
179     #pragma omp for
180     for (dim_t i0 = 0; i0 < m_N0; i0++) {
181 caltinay 3698 coords[0][i0]=xdx.first+i0*xdx.second;
182 caltinay 3691 }
183     #pragma omp for
184     for (dim_t i1 = 0; i1 < m_N1; i1++) {
185 caltinay 3698 coords[1][i1]=ydy.first+i1*ydy.second;
186 caltinay 3691 }
187     #pragma omp for
188     for (dim_t i2 = 0; i2 < m_N2; i2++) {
189 caltinay 3698 coords[2][i2]=zdz.first+i2*zdz.second;
190 caltinay 3691 }
191     }
192 caltinay 3698 IndexVector dims = getNumNodesPerDim();
193     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
194 caltinay 3691 DB_COLLINEAR, NULL);
195    
196 caltinay 3698 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
197     NULL, 0, DB_INT, DB_NODECENT, NULL);
198 caltinay 3691
199 caltinay 3698 // write element ids
200     dims = getNumElementsPerDim();
201     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
202     &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
203    
204     // rank 0 writes multimesh and multivar
205 caltinay 3691 if (m_mpiInfo->rank == 0) {
206     vector<string> tempstrings;
207     vector<char*> names;
208     for (dim_t i=0; i<m_mpiInfo->size; i++) {
209     stringstream path;
210     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
211     tempstrings.push_back(path.str());
212     names.push_back((char*)tempstrings.back().c_str());
213     }
214     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
215     DBSetDir(dbfile, "/");
216     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
217     &types[0], NULL);
218     tempstrings.clear();
219     names.clear();
220     for (dim_t i=0; i<m_mpiInfo->size; i++) {
221     stringstream path;
222     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
223     tempstrings.push_back(path.str());
224     names.push_back((char*)tempstrings.back().c_str());
225     }
226     types.assign(m_mpiInfo->size, DB_QUADVAR);
227     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
228     &types[0], NULL);
229 caltinay 3698 tempstrings.clear();
230     names.clear();
231     for (dim_t i=0; i<m_mpiInfo->size; i++) {
232     stringstream path;
233     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
234     tempstrings.push_back(path.str());
235     names.push_back((char*)tempstrings.back().c_str());
236     }
237     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
238     &types[0], NULL);
239 caltinay 3691 }
240    
241     if (m_mpiInfo->size > 1) {
242     #ifdef ESYS_MPI
243     PMPIO_HandOffBaton(baton, dbfile);
244     PMPIO_Finish(baton);
245     #endif
246     } else {
247     DBClose(dbfile);
248     }
249    
250     #else // USE_SILO
251     throw RipleyException("dump(): no Silo support");
252     #endif
253     }
254    
255     const int* Brick::borrowSampleReferenceIDs(int fsType) const
256     {
257 caltinay 3697 switch (fsType) {
258     case Nodes:
259 caltinay 3748 case ReducedNodes: //FIXME: reduced
260 caltinay 3753 return &m_nodeId[0];
261 caltinay 3757 case DegreesOfFreedom:
262     case ReducedDegreesOfFreedom: //FIXME: reduced
263 caltinay 3753 return &m_dofId[0];
264 caltinay 3697 case Elements:
265 caltinay 3733 case ReducedElements:
266 caltinay 3697 return &m_elementId[0];
267 caltinay 3757 case FaceElements:
268 caltinay 3733 case ReducedFaceElements:
269 caltinay 3697 return &m_faceId[0];
270     default:
271     break;
272     }
273 caltinay 3691
274 caltinay 3697 stringstream msg;
275     msg << "borrowSampleReferenceIDs() not implemented for function space type "
276     << fsType;
277     throw RipleyException(msg.str());
278 caltinay 3691 }
279    
280 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
281 caltinay 3691 {
282 caltinay 3759 if (getMPISize()==1)
283     return true;
284    
285 caltinay 3757 switch (fsType) {
286     case Nodes:
287     case ReducedNodes: //FIXME: reduced
288     return (m_dofMap[id] < getNumDOF());
289     case DegreesOfFreedom:
290     case ReducedDegreesOfFreedom:
291     return true;
292     case Elements:
293     case ReducedElements:
294     {
295     // check ownership of element's _last_ node
296     const index_t x=id%m_NE0 + 1;
297     const index_t y=id%(m_NE0*m_NE1)/m_NE0 + 1;
298     const index_t z=id/(m_NE0*m_NE1) + 1;
299     return (m_dofMap[x + m_N0*y +m_N0*m_N1*z] < getNumDOF());
300     }
301     case FaceElements:
302     case ReducedFaceElements:
303 caltinay 3759 {
304     // check ownership of face element's last node
305     const IndexVector faces = getNumFacesPerBoundary();
306     dim_t n=0;
307     for (size_t i=0; i<faces.size(); i++) {
308     n+=faces[i];
309     if (id<n) {
310     const index_t j=id-n+faces[i];
311     if (i>=4) { // front or back
312     const index_t first=(i==4 ? 0 : m_N0*m_N1*(m_N2-1));
313     return (m_dofMap[first+j%m_NE0+1+(j/m_NE0+1)*m_N0] < getNumDOF());
314     } else if (i>=2) { // bottom or top
315     const index_t first=(i==2 ? 0 : m_N0*(m_N1-1));
316     return (m_dofMap[first+j%m_NE0+1+(j/m_NE0+1)*m_N0*m_N1] < getNumDOF());
317     } else { // left or right
318     const index_t first=(i==0 ? 0 : m_N0-1);
319     return (m_dofMap[first+(j%m_NE1+1)*m_N0+(j/m_NE1+1)*m_N0*m_N1] < getNumDOF());
320     }
321     }
322     }
323     return false;
324     }
325 caltinay 3757 default:
326     break;
327 caltinay 3753 }
328 caltinay 3757
329     stringstream msg;
330     msg << "ownSample() not implemented for "
331     << functionSpaceTypeAsString(fsType);
332     throw RipleyException(msg.str());
333 caltinay 3691 }
334    
335 caltinay 3703 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
336     {
337     escript::Data& in = *const_cast<escript::Data*>(&cIn);
338     const dim_t numComp = in.getDataPointSize();
339     const double h0 = m_l0/m_gNE0;
340     const double h1 = m_l1/m_gNE1;
341     const double h2 = m_l1/m_gNE2;
342 caltinay 3731 const double C0 = .044658198738520451079;
343     const double C1 = .16666666666666666667;
344     const double C2 = .21132486540518711775;
345     const double C3 = .25;
346     const double C4 = .5;
347     const double C5 = .62200846792814621559;
348     const double C6 = .78867513459481288225;
349    
350 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
351 caltinay 3760 out.requireWrite();
352 caltinay 3731 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
353 caltinay 3703 #pragma omp parallel for
354 caltinay 3707 for (index_t k2=0; k2 < m_NE2; ++k2) {
355     for (index_t k1=0; k1 < m_NE1; ++k1) {
356     for (index_t k0=0; k0 < m_NE0; ++k0) {
357 caltinay 3703 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
358     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
359     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
360     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
361     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
362     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
363     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
364     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
365     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
366     for (index_t i=0; i < numComp; ++i) {
367 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
368     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
369     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
370     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
371     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
372     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
373     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
374     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
375     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
376     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
377     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
378     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
379     o[INDEX3(i,0,0,numComp,3)] = V0;
380     o[INDEX3(i,1,0,numComp,3)] = V4;
381     o[INDEX3(i,2,0,numComp,3)] = V8;
382     o[INDEX3(i,0,1,numComp,3)] = V0;
383     o[INDEX3(i,1,1,numComp,3)] = V5;
384     o[INDEX3(i,2,1,numComp,3)] = V9;
385     o[INDEX3(i,0,2,numComp,3)] = V1;
386     o[INDEX3(i,1,2,numComp,3)] = V4;
387     o[INDEX3(i,2,2,numComp,3)] = V10;
388     o[INDEX3(i,0,3,numComp,3)] = V1;
389     o[INDEX3(i,1,3,numComp,3)] = V5;
390     o[INDEX3(i,2,3,numComp,3)] = V11;
391     o[INDEX3(i,0,4,numComp,3)] = V2;
392     o[INDEX3(i,1,4,numComp,3)] = V6;
393     o[INDEX3(i,2,4,numComp,3)] = V8;
394     o[INDEX3(i,0,5,numComp,3)] = V2;
395     o[INDEX3(i,1,5,numComp,3)] = V7;
396     o[INDEX3(i,2,5,numComp,3)] = V9;
397     o[INDEX3(i,0,6,numComp,3)] = V3;
398     o[INDEX3(i,1,6,numComp,3)] = V6;
399     o[INDEX3(i,2,6,numComp,3)] = V10;
400     o[INDEX3(i,0,7,numComp,3)] = V3;
401     o[INDEX3(i,1,7,numComp,3)] = V7;
402     o[INDEX3(i,2,7,numComp,3)] = V11;
403 caltinay 3703 } /* end of component loop i */
404     } /* end of k0 loop */
405     } /* end of k1 loop */
406     } /* end of k2 loop */
407     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
408 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
409 caltinay 3760 out.requireWrite();
410 caltinay 3731 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
411 caltinay 3711 #pragma omp parallel for
412     for (index_t k2=0; k2 < m_NE2; ++k2) {
413     for (index_t k1=0; k1 < m_NE1; ++k1) {
414     for (index_t k0=0; k0 < m_NE0; ++k0) {
415     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
416     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
417     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
418     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
419     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
420     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
421     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
422     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
423     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
424     for (index_t i=0; i < numComp; ++i) {
425 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
426     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
427     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
428 caltinay 3711 } /* end of component loop i */
429     } /* end of k0 loop */
430     } /* end of k1 loop */
431     } /* end of k2 loop */
432     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
433 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
434 caltinay 3760 out.requireWrite();
435 caltinay 3731 /*** GENERATOR SNIP_GRAD_FACES TOP */
436 caltinay 3722 #pragma omp parallel
437     {
438     if (m_faceOffset[0] > -1) {
439     #pragma omp for nowait
440     for (index_t k2=0; k2 < m_NE2; ++k2) {
441     for (index_t k1=0; k1 < m_NE1; ++k1) {
442     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
443     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
444     const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
445     const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
446     const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
447     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
448     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
449     const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
450     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
451     for (index_t i=0; i < numComp; ++i) {
452 caltinay 3731 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
453     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
454     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
455     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
456     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
457     o[INDEX3(i,1,0,numComp,3)] = V0;
458     o[INDEX3(i,2,0,numComp,3)] = V2;
459     o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
460     o[INDEX3(i,1,1,numComp,3)] = V0;
461     o[INDEX3(i,2,1,numComp,3)] = V3;
462     o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
463     o[INDEX3(i,1,2,numComp,3)] = V1;
464     o[INDEX3(i,2,2,numComp,3)] = V2;
465     o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
466     o[INDEX3(i,1,3,numComp,3)] = V1;
467     o[INDEX3(i,2,3,numComp,3)] = V3;
468 caltinay 3722 } /* end of component loop i */
469     } /* end of k1 loop */
470     } /* end of k2 loop */
471     } /* end of face 0 */
472     if (m_faceOffset[1] > -1) {
473     #pragma omp for nowait
474     for (index_t k2=0; k2 < m_NE2; ++k2) {
475     for (index_t k1=0; k1 < m_NE1; ++k1) {
476     const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
477     const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
478     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
479     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
480     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
481     const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
482     const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
483     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
484     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
485     for (index_t i=0; i < numComp; ++i) {
486 caltinay 3731 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
487     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
488     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
489     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
490     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
491     o[INDEX3(i,1,0,numComp,3)] = V0;
492     o[INDEX3(i,2,0,numComp,3)] = V2;
493     o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
494     o[INDEX3(i,1,1,numComp,3)] = V0;
495     o[INDEX3(i,2,1,numComp,3)] = V3;
496     o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
497     o[INDEX3(i,1,2,numComp,3)] = V1;
498     o[INDEX3(i,2,2,numComp,3)] = V2;
499     o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
500     o[INDEX3(i,1,3,numComp,3)] = V1;
501     o[INDEX3(i,2,3,numComp,3)] = V3;
502 caltinay 3722 } /* end of component loop i */
503     } /* end of k1 loop */
504     } /* end of k2 loop */
505     } /* end of face 1 */
506     if (m_faceOffset[2] > -1) {
507     #pragma omp for nowait
508     for (index_t k2=0; k2 < m_NE2; ++k2) {
509     for (index_t k0=0; k0 < m_NE0; ++k0) {
510     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
511     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
512     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
513     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
514     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
515     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
516     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
517     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
518     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
519     for (index_t i=0; i < numComp; ++i) {
520 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
521     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
522     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
523     o[INDEX3(i,0,0,numComp,3)] = V0;
524     o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
525     o[INDEX3(i,2,0,numComp,3)] = V1;
526     o[INDEX3(i,0,1,numComp,3)] = V0;
527     o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
528     o[INDEX3(i,2,1,numComp,3)] = V2;
529     o[INDEX3(i,0,2,numComp,3)] = V0;
530     o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
531     o[INDEX3(i,2,2,numComp,3)] = V1;
532     o[INDEX3(i,0,3,numComp,3)] = V0;
533     o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
534     o[INDEX3(i,2,3,numComp,3)] = V2;
535 caltinay 3722 } /* end of component loop i */
536     } /* end of k0 loop */
537     } /* end of k2 loop */
538     } /* end of face 2 */
539     if (m_faceOffset[3] > -1) {
540     #pragma omp for nowait
541     for (index_t k2=0; k2 < m_NE2; ++k2) {
542     for (index_t k0=0; k0 < m_NE0; ++k0) {
543     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
544     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
545     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
546     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
547     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
548     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
549     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
550     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
551     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
552     for (index_t i=0; i < numComp; ++i) {
553 caltinay 3731 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
554     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
555     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
556     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
557     o[INDEX3(i,0,0,numComp,3)] = V0;
558     o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
559     o[INDEX3(i,2,0,numComp,3)] = V2;
560     o[INDEX3(i,0,1,numComp,3)] = V0;
561     o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
562     o[INDEX3(i,2,1,numComp,3)] = V3;
563     o[INDEX3(i,0,2,numComp,3)] = V1;
564     o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
565     o[INDEX3(i,2,2,numComp,3)] = V2;
566     o[INDEX3(i,0,3,numComp,3)] = V1;
567     o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
568     o[INDEX3(i,2,3,numComp,3)] = V3;
569 caltinay 3722 } /* end of component loop i */
570     } /* end of k0 loop */
571     } /* end of k2 loop */
572     } /* end of face 3 */
573     if (m_faceOffset[4] > -1) {
574     #pragma omp for nowait
575 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
576 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
577     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
578     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
579     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
580     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
581     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
582     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
583     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
584     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
585     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
586     for (index_t i=0; i < numComp; ++i) {
587 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
588     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
589     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
590     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
591     o[INDEX3(i,0,0,numComp,3)] = V0;
592     o[INDEX3(i,1,0,numComp,3)] = V2;
593     o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
594     o[INDEX3(i,0,1,numComp,3)] = V0;
595     o[INDEX3(i,1,1,numComp,3)] = V3;
596     o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
597     o[INDEX3(i,0,2,numComp,3)] = V1;
598     o[INDEX3(i,1,2,numComp,3)] = V2;
599     o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
600     o[INDEX3(i,0,3,numComp,3)] = V1;
601     o[INDEX3(i,1,3,numComp,3)] = V3;
602     o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
603 caltinay 3722 } /* end of component loop i */
604     } /* end of k0 loop */
605 caltinay 3707 } /* end of k1 loop */
606 caltinay 3722 } /* end of face 4 */
607     if (m_faceOffset[5] > -1) {
608     #pragma omp for nowait
609 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
610 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
611     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
612     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
613     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
614     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
615     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
616     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
617     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
618     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
619     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
620     for (index_t i=0; i < numComp; ++i) {
621 caltinay 3731 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
622     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
623     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
624     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
625     o[INDEX3(i,0,0,numComp,3)] = V0;
626     o[INDEX3(i,1,0,numComp,3)] = V2;
627     o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
628     o[INDEX3(i,0,1,numComp,3)] = V0;
629     o[INDEX3(i,1,1,numComp,3)] = V3;
630     o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
631     o[INDEX3(i,0,2,numComp,3)] = V1;
632     o[INDEX3(i,1,2,numComp,3)] = V2;
633     o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
634     o[INDEX3(i,0,3,numComp,3)] = V1;
635     o[INDEX3(i,1,3,numComp,3)] = V3;
636     o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
637 caltinay 3722 } /* end of component loop i */
638     } /* end of k0 loop */
639 caltinay 3707 } /* end of k1 loop */
640 caltinay 3722 } /* end of face 5 */
641     } // end of parallel section
642 caltinay 3707 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
643 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
644 caltinay 3760 out.requireWrite();
645 caltinay 3731 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
646 caltinay 3722 #pragma omp parallel
647     {
648     if (m_faceOffset[0] > -1) {
649     #pragma omp for nowait
650     for (index_t k2=0; k2 < m_NE2; ++k2) {
651     for (index_t k1=0; k1 < m_NE1; ++k1) {
652     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
653     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
654     const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
655     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
656     const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
657     const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
658     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
659     const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
660     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
661     for (index_t i=0; i < numComp; ++i) {
662 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
663     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
664     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
665 caltinay 3722 } /* end of component loop i */
666     } /* end of k1 loop */
667     } /* end of k2 loop */
668     } /* end of face 0 */
669     if (m_faceOffset[1] > -1) {
670     #pragma omp for nowait
671     for (index_t k2=0; k2 < m_NE2; ++k2) {
672     for (index_t k1=0; k1 < m_NE1; ++k1) {
673     const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
674     const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
675     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
676     const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
677     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
678     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
679     const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
680     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
681     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
682     for (index_t i=0; i < numComp; ++i) {
683 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
684     o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
685     o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
686 caltinay 3722 } /* end of component loop i */
687     } /* end of k1 loop */
688     } /* end of k2 loop */
689     } /* end of face 1 */
690     if (m_faceOffset[2] > -1) {
691     #pragma omp for nowait
692     for (index_t k2=0; k2 < m_NE2; ++k2) {
693     for (index_t k0=0; k0 < m_NE0; ++k0) {
694     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
695     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
696     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
697     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
698     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
699     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
700     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
701     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
702     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
703     for (index_t i=0; i < numComp; ++i) {
704 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
705     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
706     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
707 caltinay 3722 } /* end of component loop i */
708     } /* end of k0 loop */
709     } /* end of k2 loop */
710     } /* end of face 2 */
711     if (m_faceOffset[3] > -1) {
712     #pragma omp for nowait
713     for (index_t k2=0; k2 < m_NE2; ++k2) {
714     for (index_t k0=0; k0 < m_NE0; ++k0) {
715     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
716     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
717     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
718     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
719     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
720     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
721     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
722     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
723     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
724     for (index_t i=0; i < numComp; ++i) {
725 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
726     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
727     o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
728 caltinay 3722 } /* end of component loop i */
729     } /* end of k0 loop */
730     } /* end of k2 loop */
731     } /* end of face 3 */
732     if (m_faceOffset[4] > -1) {
733     #pragma omp for nowait
734 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
735 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
736     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
737     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
738     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
739     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
740     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
741     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
742     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
743     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
744     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
745     for (index_t i=0; i < numComp; ++i) {
746 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
747     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
748     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
749 caltinay 3722 } /* end of component loop i */
750     } /* end of k0 loop */
751 caltinay 3711 } /* end of k1 loop */
752 caltinay 3722 } /* end of face 4 */
753     if (m_faceOffset[5] > -1) {
754     #pragma omp for nowait
755 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
756 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
757     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
758     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
759     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
760     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
761     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
762     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
763     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
764     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
765     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
766     for (index_t i=0; i < numComp; ++i) {
767 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
768     o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
769     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
770 caltinay 3722 } /* end of component loop i */
771     } /* end of k0 loop */
772 caltinay 3711 } /* end of k1 loop */
773 caltinay 3722 } /* end of face 5 */
774     } // end of parallel section
775 caltinay 3711 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
776 caltinay 3703 } else {
777 caltinay 3707 stringstream msg;
778     msg << "setToGradient() not implemented for "
779     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
780     throw RipleyException(msg.str());
781 caltinay 3703 }
782     }
783    
784 caltinay 3713 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
785     {
786     escript::Data& in = *const_cast<escript::Data*>(&arg);
787     const dim_t numComp = in.getDataPointSize();
788     const double h0 = m_l0/m_gNE0;
789     const double h1 = m_l1/m_gNE1;
790     const double h2 = m_l2/m_gNE2;
791     if (arg.getFunctionSpace().getTypeCode() == Elements) {
792     const double w_0 = h0*h1*h2/8.;
793     #pragma omp parallel
794     {
795     vector<double> int_local(numComp, 0);
796 caltinay 3722 #pragma omp for nowait
797 caltinay 3713 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
798     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
799     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
800     const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
801     for (index_t i=0; i < numComp; ++i) {
802     const register double f_0 = f[INDEX2(i,0,numComp)];
803     const register double f_1 = f[INDEX2(i,1,numComp)];
804     const register double f_2 = f[INDEX2(i,2,numComp)];
805     const register double f_3 = f[INDEX2(i,3,numComp)];
806     const register double f_4 = f[INDEX2(i,4,numComp)];
807     const register double f_5 = f[INDEX2(i,5,numComp)];
808     const register double f_6 = f[INDEX2(i,6,numComp)];
809     const register double f_7 = f[INDEX2(i,7,numComp)];
810     int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
811     } /* end of component loop i */
812     } /* end of k0 loop */
813     } /* end of k1 loop */
814     } /* end of k2 loop */
815    
816     #pragma omp critical
817     for (index_t i=0; i<numComp; i++)
818     integrals[i]+=int_local[i];
819 caltinay 3722 } // end of parallel section
820 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
821     const double w_0 = h0*h1*h2;
822     #pragma omp parallel
823     {
824     vector<double> int_local(numComp, 0);
825 caltinay 3722 #pragma omp for nowait
826 caltinay 3713 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
827     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
828     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
829     const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
830     for (index_t i=0; i < numComp; ++i) {
831     int_local[i]+=f[i]*w_0;
832     } /* end of component loop i */
833     } /* end of k0 loop */
834     } /* end of k1 loop */
835     } /* end of k2 loop */
836    
837     #pragma omp critical
838     for (index_t i=0; i<numComp; i++)
839     integrals[i]+=int_local[i];
840 caltinay 3722 } // end of parallel section
841 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
842     const double w_0 = h1*h2/4.;
843     const double w_1 = h0*h2/4.;
844     const double w_2 = h0*h1/4.;
845     #pragma omp parallel
846     {
847     vector<double> int_local(numComp, 0);
848     if (m_faceOffset[0] > -1) {
849 caltinay 3722 #pragma omp for nowait
850 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
851     for (index_t k1=0; k1 < m_NE1; ++k1) {
852     const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
853     for (index_t i=0; i < numComp; ++i) {
854     const register double f_0 = f[INDEX2(i,0,numComp)];
855     const register double f_1 = f[INDEX2(i,1,numComp)];
856     const register double f_2 = f[INDEX2(i,2,numComp)];
857     const register double f_3 = f[INDEX2(i,3,numComp)];
858     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
859     } /* end of component loop i */
860     } /* end of k1 loop */
861     } /* end of k2 loop */
862     }
863    
864     if (m_faceOffset[1] > -1) {
865 caltinay 3722 #pragma omp for nowait
866 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
867     for (index_t k1=0; k1 < m_NE1; ++k1) {
868     const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
869     for (index_t i=0; i < numComp; ++i) {
870     const register double f_0 = f[INDEX2(i,0,numComp)];
871     const register double f_1 = f[INDEX2(i,1,numComp)];
872     const register double f_2 = f[INDEX2(i,2,numComp)];
873     const register double f_3 = f[INDEX2(i,3,numComp)];
874     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
875     } /* end of component loop i */
876     } /* end of k1 loop */
877     } /* end of k2 loop */
878     }
879    
880     if (m_faceOffset[2] > -1) {
881 caltinay 3722 #pragma omp for nowait
882 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
883     for (index_t k0=0; k0 < m_NE0; ++k0) {
884     const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
885     for (index_t i=0; i < numComp; ++i) {
886     const register double f_0 = f[INDEX2(i,0,numComp)];
887     const register double f_1 = f[INDEX2(i,1,numComp)];
888     const register double f_2 = f[INDEX2(i,2,numComp)];
889     const register double f_3 = f[INDEX2(i,3,numComp)];
890     int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
891     } /* end of component loop i */
892     } /* end of k1 loop */
893     } /* end of k2 loop */
894     }
895    
896     if (m_faceOffset[3] > -1) {
897 caltinay 3722 #pragma omp for nowait
898 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
899     for (index_t k0=0; k0 < m_NE0; ++k0) {
900     const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
901     for (index_t i=0; i < numComp; ++i) {
902     const register double f_0 = f[INDEX2(i,0,numComp)];
903     const register double f_1 = f[INDEX2(i,1,numComp)];
904     const register double f_2 = f[INDEX2(i,2,numComp)];
905     const register double f_3 = f[INDEX2(i,3,numComp)];
906     int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
907     } /* end of component loop i */
908     } /* end of k1 loop */
909     } /* end of k2 loop */
910     }
911    
912     if (m_faceOffset[4] > -1) {
913 caltinay 3722 #pragma omp for nowait
914 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
915     for (index_t k0=0; k0 < m_NE0; ++k0) {
916     const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
917     for (index_t i=0; i < numComp; ++i) {
918     const register double f_0 = f[INDEX2(i,0,numComp)];
919     const register double f_1 = f[INDEX2(i,1,numComp)];
920     const register double f_2 = f[INDEX2(i,2,numComp)];
921     const register double f_3 = f[INDEX2(i,3,numComp)];
922     int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
923     } /* end of component loop i */
924     } /* end of k1 loop */
925     } /* end of k2 loop */
926     }
927    
928     if (m_faceOffset[5] > -1) {
929 caltinay 3722 #pragma omp for nowait
930 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
931     for (index_t k0=0; k0 < m_NE0; ++k0) {
932     const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
933     for (index_t i=0; i < numComp; ++i) {
934     const register double f_0 = f[INDEX2(i,0,numComp)];
935     const register double f_1 = f[INDEX2(i,1,numComp)];
936     const register double f_2 = f[INDEX2(i,2,numComp)];
937     const register double f_3 = f[INDEX2(i,3,numComp)];
938     int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
939     } /* end of component loop i */
940     } /* end of k1 loop */
941     } /* end of k2 loop */
942     }
943    
944     #pragma omp critical
945     for (index_t i=0; i<numComp; i++)
946     integrals[i]+=int_local[i];
947 caltinay 3722 } // end of parallel section
948 caltinay 3713
949     } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
950     const double w_0 = h1*h2;
951     const double w_1 = h0*h2;
952     const double w_2 = h0*h1;
953     #pragma omp parallel
954     {
955     vector<double> int_local(numComp, 0);
956     if (m_faceOffset[0] > -1) {
957 caltinay 3722 #pragma omp for nowait
958 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
959     for (index_t k1=0; k1 < m_NE1; ++k1) {
960     const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
961     for (index_t i=0; i < numComp; ++i) {
962     int_local[i]+=f[i]*w_0;
963     } /* end of component loop i */
964     } /* end of k1 loop */
965     } /* end of k2 loop */
966     }
967    
968     if (m_faceOffset[1] > -1) {
969 caltinay 3722 #pragma omp for nowait
970 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
971     for (index_t k1=0; k1 < m_NE1; ++k1) {
972     const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
973     for (index_t i=0; i < numComp; ++i) {
974     int_local[i]+=f[i]*w_0;
975     } /* end of component loop i */
976     } /* end of k1 loop */
977     } /* end of k2 loop */
978     }
979    
980     if (m_faceOffset[2] > -1) {
981 caltinay 3722 #pragma omp for nowait
982 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
983     for (index_t k0=0; k0 < m_NE0; ++k0) {
984     const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
985     for (index_t i=0; i < numComp; ++i) {
986     int_local[i]+=f[i]*w_1;
987     } /* end of component loop i */
988     } /* end of k1 loop */
989     } /* end of k2 loop */
990     }
991    
992     if (m_faceOffset[3] > -1) {
993 caltinay 3722 #pragma omp for nowait
994 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
995     for (index_t k0=0; k0 < m_NE0; ++k0) {
996     const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
997     for (index_t i=0; i < numComp; ++i) {
998     int_local[i]+=f[i]*w_1;
999     } /* end of component loop i */
1000     } /* end of k1 loop */
1001     } /* end of k2 loop */
1002     }
1003    
1004     if (m_faceOffset[4] > -1) {
1005 caltinay 3722 #pragma omp for nowait
1006 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
1007     for (index_t k0=0; k0 < m_NE0; ++k0) {
1008     const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1009     for (index_t i=0; i < numComp; ++i) {
1010     int_local[i]+=f[i]*w_2;
1011     } /* end of component loop i */
1012     } /* end of k1 loop */
1013     } /* end of k2 loop */
1014     }
1015    
1016     if (m_faceOffset[5] > -1) {
1017 caltinay 3722 #pragma omp for nowait
1018 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
1019     for (index_t k0=0; k0 < m_NE0; ++k0) {
1020     const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1021     for (index_t i=0; i < numComp; ++i) {
1022     int_local[i]+=f[i]*w_2;
1023     } /* end of component loop i */
1024     } /* end of k1 loop */
1025     } /* end of k2 loop */
1026     }
1027    
1028     #pragma omp critical
1029     for (index_t i=0; i<numComp; i++)
1030     integrals[i]+=int_local[i];
1031 caltinay 3722 } // end of parallel section
1032 caltinay 3713
1033     } else {
1034     stringstream msg;
1035     msg << "setToIntegrals() not implemented for "
1036     << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
1037     throw RipleyException(msg.str());
1038     }
1039     }
1040    
1041 caltinay 3722 void Brick::setToNormal(escript::Data& out) const
1042     {
1043     if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1044 caltinay 3760 out.requireWrite();
1045 caltinay 3722 #pragma omp parallel
1046     {
1047     if (m_faceOffset[0] > -1) {
1048     #pragma omp for nowait
1049     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1050     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1051     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1052     // set vector at four quadrature points
1053     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1054     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1055     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1056     *o++ = -1.; *o++ = 0.; *o = 0.;
1057     }
1058     }
1059     }
1060    
1061     if (m_faceOffset[1] > -1) {
1062     #pragma omp for nowait
1063     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1064     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1065     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1066     // set vector at four quadrature points
1067     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1068     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1069     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1070     *o++ = 1.; *o++ = 0.; *o = 0.;
1071     }
1072     }
1073     }
1074    
1075     if (m_faceOffset[2] > -1) {
1076     #pragma omp for nowait
1077     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1078     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1079     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1080     // set vector at four quadrature points
1081     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1082     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1083     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1084     *o++ = 0.; *o++ = -1.; *o = 0.;
1085     }
1086     }
1087     }
1088    
1089     if (m_faceOffset[3] > -1) {
1090     #pragma omp for nowait
1091     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1092     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1093     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1094     // set vector at four quadrature points
1095     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1096     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1097     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1098     *o++ = 0.; *o++ = 1.; *o = 0.;
1099     }
1100     }
1101     }
1102    
1103     if (m_faceOffset[4] > -1) {
1104     #pragma omp for nowait
1105     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1106     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1107     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1108     // set vector at four quadrature points
1109     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1110     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1111     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1112     *o++ = 0.; *o++ = 0.; *o = -1.;
1113     }
1114     }
1115     }
1116    
1117     if (m_faceOffset[5] > -1) {
1118     #pragma omp for nowait
1119     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1120     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1121     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1122     // set vector at four quadrature points
1123     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1124     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1125     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1126     *o++ = 0.; *o++ = 0.; *o = 1.;
1127     }
1128     }
1129     }
1130     } // end of parallel section
1131     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1132 caltinay 3760 out.requireWrite();
1133 caltinay 3722 #pragma omp parallel
1134     {
1135     if (m_faceOffset[0] > -1) {
1136     #pragma omp for nowait
1137     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1138     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1139     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1140     *o++ = -1.;
1141     *o++ = 0.;
1142     *o = 0.;
1143     }
1144     }
1145     }
1146    
1147     if (m_faceOffset[1] > -1) {
1148     #pragma omp for nowait
1149     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1150     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1151     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1152     *o++ = 1.;
1153     *o++ = 0.;
1154     *o = 0.;
1155     }
1156     }
1157     }
1158    
1159     if (m_faceOffset[2] > -1) {
1160     #pragma omp for nowait
1161     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1162     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1163     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1164     *o++ = 0.;
1165     *o++ = -1.;
1166     *o = 0.;
1167     }
1168     }
1169     }
1170    
1171     if (m_faceOffset[3] > -1) {
1172     #pragma omp for nowait
1173     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1174     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1175     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1176     *o++ = 0.;
1177     *o++ = 1.;
1178     *o = 0.;
1179     }
1180     }
1181     }
1182    
1183     if (m_faceOffset[4] > -1) {
1184     #pragma omp for nowait
1185     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1186     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1187     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1188     *o++ = 0.;
1189     *o++ = 0.;
1190     *o = -1.;
1191     }
1192     }
1193     }
1194    
1195     if (m_faceOffset[5] > -1) {
1196     #pragma omp for nowait
1197     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1198     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1199     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1200     *o++ = 0.;
1201     *o++ = 0.;
1202     *o = 1.;
1203     }
1204     }
1205     }
1206     } // end of parallel section
1207    
1208     } else {
1209     stringstream msg;
1210     msg << "setToNormal() not implemented for "
1211     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1212     throw RipleyException(msg.str());
1213     }
1214     }
1215    
1216 caltinay 3760 void Brick::setToSize(escript::Data& out) const
1217     {
1218     if (out.getFunctionSpace().getTypeCode() == Elements
1219     || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1220     out.requireWrite();
1221     const dim_t numQuad=out.getNumDataPointsPerSample();
1222     const double xSize=getFirstCoordAndSpacing(0).second;
1223     const double ySize=getFirstCoordAndSpacing(1).second;
1224     const double zSize=getFirstCoordAndSpacing(2).second;
1225     const double size=min(min(xSize,ySize),zSize);
1226     #pragma omp parallel for
1227     for (index_t k = 0; k < getNumElements(); ++k) {
1228     double* o = out.getSampleDataRW(k);
1229     fill(o, o+numQuad, size);
1230     }
1231     } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1232     || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1233     out.requireWrite();
1234     const dim_t numQuad=out.getNumDataPointsPerSample();
1235     const double xSize=getFirstCoordAndSpacing(0).second;
1236     const double ySize=getFirstCoordAndSpacing(1).second;
1237     const double zSize=getFirstCoordAndSpacing(2).second;
1238     #pragma omp parallel
1239     {
1240     if (m_faceOffset[0] > -1) {
1241     const double size=min(ySize,zSize);
1242     #pragma omp for nowait
1243     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1244     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1245     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1246     fill(o, o+numQuad, size);
1247     }
1248     }
1249     }
1250    
1251     if (m_faceOffset[1] > -1) {
1252     const double size=min(ySize,zSize);
1253     #pragma omp for nowait
1254     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1255     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1256     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1257     fill(o, o+numQuad, size);
1258     }
1259     }
1260     }
1261    
1262     if (m_faceOffset[2] > -1) {
1263     const double size=min(xSize,zSize);
1264     #pragma omp for nowait
1265     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1266     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1267     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1268     fill(o, o+numQuad, size);
1269     }
1270     }
1271     }
1272    
1273     if (m_faceOffset[3] > -1) {
1274     const double size=min(xSize,zSize);
1275     #pragma omp for nowait
1276     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1277     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1278     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1279     fill(o, o+numQuad, size);
1280     }
1281     }
1282     }
1283    
1284     if (m_faceOffset[4] > -1) {
1285     const double size=min(xSize,ySize);
1286     #pragma omp for nowait
1287     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1288     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1289     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1290     fill(o, o+numQuad, size);
1291     }
1292     }
1293     }
1294    
1295     if (m_faceOffset[5] > -1) {
1296     const double size=min(xSize,ySize);
1297     #pragma omp for nowait
1298     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1299     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1300     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1301     fill(o, o+numQuad, size);
1302     }
1303     }
1304     }
1305     } // end of parallel section
1306    
1307     } else {
1308     stringstream msg;
1309     msg << "setToSize() not implemented for "
1310     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1311     throw RipleyException(msg.str());
1312     }
1313     }
1314    
1315 caltinay 3691 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1316     bool reducedColOrder) const
1317     {
1318     if (reducedRowOrder || reducedColOrder)
1319     throw RipleyException("getPattern() not implemented for reduced order");
1320    
1321 caltinay 3756 return m_pattern;
1322     }
1323    
1324     void Brick::Print_Mesh_Info(const bool full) const
1325     {
1326     RipleyDomain::Print_Mesh_Info(full);
1327     if (full) {
1328     cout << " Id Coordinates" << endl;
1329     cout.precision(15);
1330     cout.setf(ios::scientific, ios::floatfield);
1331     pair<double,double> xdx = getFirstCoordAndSpacing(0);
1332     pair<double,double> ydy = getFirstCoordAndSpacing(1);
1333     pair<double,double> zdz = getFirstCoordAndSpacing(2);
1334     for (index_t i=0; i < getNumNodes(); i++) {
1335     cout << " " << setw(5) << m_nodeId[i]
1336     << " " << xdx.first+(i%m_N0)*xdx.second
1337     << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1338     << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1339     }
1340     }
1341     }
1342    
1343     IndexVector Brick::getNumNodesPerDim() const
1344     {
1345     IndexVector ret;
1346     ret.push_back(m_N0);
1347     ret.push_back(m_N1);
1348     ret.push_back(m_N2);
1349     return ret;
1350     }
1351    
1352     IndexVector Brick::getNumElementsPerDim() const
1353     {
1354     IndexVector ret;
1355     ret.push_back(m_NE0);
1356     ret.push_back(m_NE1);
1357     ret.push_back(m_NE2);
1358     return ret;
1359     }
1360    
1361     IndexVector Brick::getNumFacesPerBoundary() const
1362     {
1363     IndexVector ret(6, 0);
1364     //left
1365     if (m_offset0==0)
1366     ret[0]=m_NE1*m_NE2;
1367     //right
1368     if (m_mpiInfo->rank%m_NX==m_NX-1)
1369     ret[1]=m_NE1*m_NE2;
1370     //bottom
1371     if (m_offset1==0)
1372     ret[2]=m_NE0*m_NE2;
1373     //top
1374     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1375     ret[3]=m_NE0*m_NE2;
1376     //front
1377     if (m_offset2==0)
1378     ret[4]=m_NE0*m_NE1;
1379     //back
1380     if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1381     ret[5]=m_NE0*m_NE1;
1382     return ret;
1383     }
1384    
1385     pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1386     {
1387     if (dim==0)
1388     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1389     else if (dim==1)
1390     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1391     else if (dim==2)
1392     return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1393    
1394     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1395     }
1396    
1397     //protected
1398     dim_t Brick::getNumDOF() const
1399     {
1400     return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY*(m_gNE2+1)/m_NZ;
1401     }
1402    
1403     //protected
1404     dim_t Brick::getNumFaceElements() const
1405     {
1406     const IndexVector faces = getNumFacesPerBoundary();
1407     dim_t n=0;
1408     for (size_t i=0; i<faces.size(); i++)
1409     n+=faces[i];
1410     return n;
1411     }
1412    
1413     //protected
1414     void Brick::assembleCoordinates(escript::Data& arg) const
1415     {
1416     escriptDataC x = arg.getDataC();
1417     int numDim = m_numDim;
1418     if (!isDataPointShapeEqual(&x, 1, &numDim))
1419     throw RipleyException("setToX: Invalid Data object shape");
1420     if (!numSamplesEqual(&x, 1, getNumNodes()))
1421     throw RipleyException("setToX: Illegal number of samples in Data object");
1422    
1423     pair<double,double> xdx = getFirstCoordAndSpacing(0);
1424     pair<double,double> ydy = getFirstCoordAndSpacing(1);
1425     pair<double,double> zdz = getFirstCoordAndSpacing(2);
1426     arg.requireWrite();
1427     #pragma omp parallel for
1428     for (dim_t i2 = 0; i2 < m_N2; i2++) {
1429     for (dim_t i1 = 0; i1 < m_N1; i1++) {
1430     for (dim_t i0 = 0; i0 < m_N0; i0++) {
1431     double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1432     point[0] = xdx.first+i0*xdx.second;
1433     point[1] = ydy.first+i1*ydy.second;
1434     point[2] = zdz.first+i2*zdz.second;
1435     }
1436     }
1437     }
1438     }
1439    
1440     //protected
1441     dim_t Brick::insertNeighbourNodes(IndexVector& index, index_t node) const
1442     {
1443     const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1444     const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1445     const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1446     const int x=node%nDOF0;
1447     const int y=node%(nDOF0*nDOF1)/nDOF0;
1448     const int z=node/(nDOF0*nDOF1);
1449     int num=0;
1450     // loop through potential neighbours and add to index if positions are
1451     // within bounds
1452     for (int i2=-1; i2<2; i2++) {
1453     for (int i1=-1; i1<2; i1++) {
1454     for (int i0=-1; i0<2; i0++) {
1455     // skip node itself
1456     if (i0==0 && i1==0 && i2==0)
1457     continue;
1458     // location of neighbour node
1459     const int nx=x+i0;
1460     const int ny=y+i1;
1461     const int nz=z+i2;
1462     if (nx>=0 && ny>=0 && nz>=0
1463     && nx<nDOF0 && ny<nDOF1 && nz<nDOF2) {
1464     index.push_back(nz*nDOF0*nDOF1+ny*nDOF0+nx);
1465     num++;
1466     }
1467     }
1468     }
1469     }
1470    
1471     return num;
1472     }
1473    
1474     //protected
1475     void Brick::nodesToDOF(escript::Data& out, escript::Data& in) const
1476     {
1477     const dim_t numComp = in.getDataPointSize();
1478     out.requireWrite();
1479    
1480 caltinay 3754 const index_t left = (m_offset0==0 ? 0 : 1);
1481     const index_t bottom = (m_offset1==0 ? 0 : 1);
1482     const index_t front = (m_offset2==0 ? 0 : 1);
1483 caltinay 3756 const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1484     const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1485     const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1486     #pragma omp parallel for
1487     for (index_t i=0; i<nDOF2; i++) {
1488     for (index_t j=0; j<nDOF1; j++) {
1489     for (index_t k=0; k<nDOF0; k++) {
1490     const index_t n=k+left+(j+bottom)*m_N0+(i+front)*m_N0*m_N1;
1491     const double* src=in.getSampleDataRO(n);
1492     copy(src, src+numComp, out.getSampleDataRW(k+j*nDOF0+i*nDOF0*nDOF1));
1493     }
1494     }
1495     }
1496     }
1497 caltinay 3754
1498 caltinay 3756 //protected
1499     void Brick::dofToNodes(escript::Data& out, escript::Data& in) const
1500     {
1501     const dim_t numComp = in.getDataPointSize();
1502     Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp);
1503     in.requireWrite();
1504     Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0));
1505    
1506     const dim_t numDOF = getNumDOF();
1507     out.requireWrite();
1508     const double* buffer = Paso_Coupler_finishCollect(coupler);
1509    
1510     #pragma omp parallel for
1511     for (index_t i=0; i<getNumNodes(); i++) {
1512     const double* src=(m_dofMap[i]<numDOF ?
1513     in.getSampleDataRO(m_dofMap[i])
1514     : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1515     copy(src, src+numComp, out.getSampleDataRW(i));
1516     }
1517     }
1518    
1519     //private
1520     void Brick::populateSampleIds()
1521     {
1522     // identifiers are ordered from left to right, bottom to top, front to back
1523     // globally
1524    
1525     // build node distribution vector first.
1526     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1527     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1528     const dim_t numDOF=getNumDOF();
1529     for (dim_t k=1; k<m_mpiInfo->size; k++) {
1530     m_nodeDistribution[k]=k*numDOF;
1531     }
1532     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1533     m_nodeId.resize(getNumNodes());
1534     m_dofId.resize(numDOF);
1535     m_elementId.resize(getNumElements());
1536     m_faceId.resize(getNumFaceElements());
1537    
1538     #pragma omp parallel
1539     {
1540     #pragma omp for nowait
1541     // nodes
1542     for (dim_t i2=0; i2<m_N2; i2++) {
1543     for (dim_t i1=0; i1<m_N1; i1++) {
1544     for (dim_t i0=0; i0<m_N0; i0++) {
1545     m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] =
1546     (m_offset2+i2)*(m_gNE0+1)*(m_gNE1+1)
1547     +(m_offset1+i1)*(m_gNE0+1)
1548     +m_offset0+i0;
1549     }
1550     }
1551     }
1552    
1553     // degrees of freedom
1554     #pragma omp for nowait
1555     for (dim_t k=0; k<numDOF; k++)
1556     m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k;
1557    
1558     // elements
1559     #pragma omp for nowait
1560     for (dim_t i2=0; i2<m_NE2; i2++) {
1561     for (dim_t i1=0; i1<m_NE1; i1++) {
1562     for (dim_t i0=0; i0<m_NE0; i0++) {
1563     m_elementId[i0+i1*m_NE0+i2*m_NE0*m_NE1] =
1564     (m_offset2+i2)*m_gNE0*m_gNE1
1565     +(m_offset1+i1)*m_gNE0
1566     +m_offset0+i0;
1567     }
1568     }
1569     }
1570    
1571     // face elements
1572     #pragma omp for
1573     for (dim_t k=0; k<getNumFaceElements(); k++)
1574     m_faceId[k]=k;
1575     } // end parallel section
1576    
1577     m_nodeTags.assign(getNumNodes(), 0);
1578     updateTagsInUse(Nodes);
1579    
1580     m_elementTags.assign(getNumElements(), 0);
1581     updateTagsInUse(Elements);
1582    
1583     // generate face offset vector and set face tags
1584     const IndexVector facesPerEdge = getNumFacesPerBoundary();
1585     const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1586     const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1587     m_faceOffset.assign(facesPerEdge.size(), -1);
1588     m_faceTags.clear();
1589     index_t offset=0;
1590     for (size_t i=0; i<facesPerEdge.size(); i++) {
1591     if (facesPerEdge[i]>0) {
1592     m_faceOffset[i]=offset;
1593     offset+=facesPerEdge[i];
1594     m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1595     }
1596     }
1597     setTagMap("left", LEFT);
1598     setTagMap("right", RIGHT);
1599     setTagMap("bottom", BOTTOM);
1600     setTagMap("top", TOP);
1601     setTagMap("front", FRONT);
1602     setTagMap("back", BACK);
1603     updateTagsInUse(FaceElements);
1604     }
1605    
1606     //private
1607     void Brick::createPattern()
1608     {
1609     const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1610     const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1611     const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1612     const index_t left = (m_offset0==0 ? 0 : 1);
1613     const index_t bottom = (m_offset1==0 ? 0 : 1);
1614     const index_t front = (m_offset2==0 ? 0 : 1);
1615    
1616     // populate node->DOF mapping with own degrees of freedom.
1617     // The rest is assigned in the loop further down