/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3757 - (hide annotations)
Fri Jan 6 04:48:27 2012 UTC (7 years, 3 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Brick.cpp
File size: 378071 byte(s)
Implemented ownSample() for DOF and Elements.

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Brick.h>
15     extern "C" {
16 caltinay 3754 #include "paso/SystemMatrix.h"
17 caltinay 3691 }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33     int d1, int d2) :
34     RipleyDomain(3),
35     m_gNE0(n0),
36     m_gNE1(n1),
37     m_gNE2(n2),
38     m_l0(l0),
39     m_l1(l1),
40     m_l2(l2),
41     m_NX(d0),
42     m_NY(d1),
43     m_NZ(d2)
44     {
45     // ensure number of subdivisions is valid and nodes can be distributed
46     // among number of ranks
47     if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48     throw RipleyException("Invalid number of spatial subdivisions");
49    
50 caltinay 3753 if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0 || (n2+1)%m_NZ > 0)
51     throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension");
52 caltinay 3691
53 caltinay 3753 if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2) || (m_NZ > 1 && (n2+1)/m_NZ<2))
54     throw RipleyException("Too few elements for the number of ranks");
55    
56     // local number of elements (including overlap)
57     m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0);
58     if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1)
59     m_NE0++;
60     m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1);
61     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX>0 && m_mpiInfo->rank%(m_NX*m_NY)/m_NX<m_NY-1)
62     m_NE1++;
63     m_NE2 = (m_NZ>1 ? (n2+1)/m_NZ : n2);
64     if (m_mpiInfo->rank/(m_NX*m_NY)>0 && m_mpiInfo->rank/(m_NX*m_NY)<m_NZ-1)
65     m_NE2++;
66    
67     // local number of nodes
68 caltinay 3691 m_N0 = m_NE0+1;
69     m_N1 = m_NE1+1;
70     m_N2 = m_NE2+1;
71 caltinay 3753
72 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
73 caltinay 3753 m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX);
74     if (m_offset0 > 0)
75     m_offset0--;
76     m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
77     if (m_offset1 > 0)
78     m_offset1--;
79     m_offset2 = (n2+1)/m_NZ*(m_mpiInfo->rank/(m_NX*m_NY));
80     if (m_offset2 > 0)
81     m_offset2--;
82    
83 caltinay 3691 populateSampleIds();
84 caltinay 3756 createPattern();
85 caltinay 3691 }
86    
87    
88     Brick::~Brick()
89     {
90     }
91    
92     string Brick::getDescription() const
93     {
94     return "ripley::Brick";
95     }
96    
97     bool Brick::operator==(const AbstractDomain& other) const
98     {
99 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
100     if (o) {
101     return (RipleyDomain::operator==(other) &&
102     m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 && m_gNE2==o->m_gNE2
103     && m_l0==o->m_l0 && m_l1==o->m_l1 && m_l2==o->m_l2
104     && m_NX==o->m_NX && m_NY==o->m_NY && m_NZ==o->m_NZ);
105     }
106 caltinay 3691
107     return false;
108     }
109    
110     void Brick::dump(const string& fileName) const
111     {
112     #if USE_SILO
113     string fn(fileName);
114     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
115     fn+=".silo";
116     }
117    
118     const int NUM_SILO_FILES = 1;
119     const char* blockDirFmt = "/block%04d";
120     int driver=DB_HDF5;
121     string siloPath;
122     DBfile* dbfile = NULL;
123    
124     #ifdef ESYS_MPI
125     PMPIO_baton_t* baton = NULL;
126     #endif
127    
128     if (m_mpiInfo->size > 1) {
129     #ifdef ESYS_MPI
130     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
131     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
132     PMPIO_DefaultClose, (void*)&driver);
133     // try the fallback driver in case of error
134     if (!baton && driver != DB_PDB) {
135     driver = DB_PDB;
136     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
137     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
138     PMPIO_DefaultClose, (void*)&driver);
139     }
140     if (baton) {
141     char str[64];
142     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
143     siloPath = str;
144     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
145     }
146     #endif
147     } else {
148     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
149     getDescription().c_str(), driver);
150     // try the fallback driver in case of error
151     if (!dbfile && driver != DB_PDB) {
152     driver = DB_PDB;
153     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
154     getDescription().c_str(), driver);
155     }
156     }
157    
158     if (!dbfile)
159     throw RipleyException("dump: Could not create Silo file");
160    
161     /*
162     if (driver==DB_HDF5) {
163     // gzip level 1 already provides good compression with minimal
164     // performance penalty. Some tests showed that gzip levels >3 performed
165     // rather badly on escript data both in terms of time and space
166     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
167     }
168     */
169    
170     boost::scoped_ptr<double> x(new double[m_N0]);
171     boost::scoped_ptr<double> y(new double[m_N1]);
172     boost::scoped_ptr<double> z(new double[m_N2]);
173     double* coords[3] = { x.get(), y.get(), z.get() };
174 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
175     pair<double,double> ydy = getFirstCoordAndSpacing(1);
176     pair<double,double> zdz = getFirstCoordAndSpacing(2);
177 caltinay 3691 #pragma omp parallel
178     {
179     #pragma omp for
180     for (dim_t i0 = 0; i0 < m_N0; i0++) {
181 caltinay 3698 coords[0][i0]=xdx.first+i0*xdx.second;
182 caltinay 3691 }
183     #pragma omp for
184     for (dim_t i1 = 0; i1 < m_N1; i1++) {
185 caltinay 3698 coords[1][i1]=ydy.first+i1*ydy.second;
186 caltinay 3691 }
187     #pragma omp for
188     for (dim_t i2 = 0; i2 < m_N2; i2++) {
189 caltinay 3698 coords[2][i2]=zdz.first+i2*zdz.second;
190 caltinay 3691 }
191     }
192 caltinay 3698 IndexVector dims = getNumNodesPerDim();
193     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
194 caltinay 3691 DB_COLLINEAR, NULL);
195    
196 caltinay 3698 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
197     NULL, 0, DB_INT, DB_NODECENT, NULL);
198 caltinay 3691
199 caltinay 3698 // write element ids
200     dims = getNumElementsPerDim();
201     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
202     &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
203    
204     // rank 0 writes multimesh and multivar
205 caltinay 3691 if (m_mpiInfo->rank == 0) {
206     vector<string> tempstrings;
207     vector<char*> names;
208     for (dim_t i=0; i<m_mpiInfo->size; i++) {
209     stringstream path;
210     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
211     tempstrings.push_back(path.str());
212     names.push_back((char*)tempstrings.back().c_str());
213     }
214     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
215     DBSetDir(dbfile, "/");
216     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
217     &types[0], NULL);
218     tempstrings.clear();
219     names.clear();
220     for (dim_t i=0; i<m_mpiInfo->size; i++) {
221     stringstream path;
222     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
223     tempstrings.push_back(path.str());
224     names.push_back((char*)tempstrings.back().c_str());
225     }
226     types.assign(m_mpiInfo->size, DB_QUADVAR);
227     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
228     &types[0], NULL);
229 caltinay 3698 tempstrings.clear();
230     names.clear();
231     for (dim_t i=0; i<m_mpiInfo->size; i++) {
232     stringstream path;
233     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
234     tempstrings.push_back(path.str());
235     names.push_back((char*)tempstrings.back().c_str());
236     }
237     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
238     &types[0], NULL);
239 caltinay 3691 }
240    
241     if (m_mpiInfo->size > 1) {
242     #ifdef ESYS_MPI
243     PMPIO_HandOffBaton(baton, dbfile);
244     PMPIO_Finish(baton);
245     #endif
246     } else {
247     DBClose(dbfile);
248     }
249    
250     #else // USE_SILO
251     throw RipleyException("dump(): no Silo support");
252     #endif
253     }
254    
255     const int* Brick::borrowSampleReferenceIDs(int fsType) const
256     {
257 caltinay 3697 switch (fsType) {
258     case Nodes:
259 caltinay 3748 case ReducedNodes: //FIXME: reduced
260 caltinay 3753 return &m_nodeId[0];
261 caltinay 3757 case DegreesOfFreedom:
262     case ReducedDegreesOfFreedom: //FIXME: reduced
263 caltinay 3753 return &m_dofId[0];
264 caltinay 3697 case Elements:
265 caltinay 3733 case ReducedElements:
266 caltinay 3697 return &m_elementId[0];
267 caltinay 3757 case FaceElements:
268 caltinay 3733 case ReducedFaceElements:
269 caltinay 3697 return &m_faceId[0];
270     default:
271     break;
272     }
273 caltinay 3691
274 caltinay 3697 stringstream msg;
275     msg << "borrowSampleReferenceIDs() not implemented for function space type "
276     << fsType;
277     throw RipleyException(msg.str());
278 caltinay 3691 }
279    
280 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
281 caltinay 3691 {
282     #ifdef ESYS_MPI
283 caltinay 3757 switch (fsType) {
284     case Nodes:
285     case ReducedNodes: //FIXME: reduced
286     return (m_dofMap[id] < getNumDOF());
287     case DegreesOfFreedom:
288     case ReducedDegreesOfFreedom:
289     return true;
290     case Elements:
291     case ReducedElements:
292     {
293     // check ownership of element's _last_ node
294     const index_t x=id%m_NE0 + 1;
295     const index_t y=id%(m_NE0*m_NE1)/m_NE0 + 1;
296     const index_t z=id/(m_NE0*m_NE1) + 1;
297     return (m_dofMap[x + m_N0*y +m_N0*m_N1*z] < getNumDOF());
298     }
299     case FaceElements:
300     case ReducedFaceElements:
301     default:
302     break;
303 caltinay 3753 }
304 caltinay 3757
305     stringstream msg;
306     msg << "ownSample() not implemented for "
307     << functionSpaceTypeAsString(fsType);
308     throw RipleyException(msg.str());
309 caltinay 3691 #else
310     return true;
311     #endif
312     }
313    
314 caltinay 3703 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
315     {
316     escript::Data& in = *const_cast<escript::Data*>(&cIn);
317     const dim_t numComp = in.getDataPointSize();
318     const double h0 = m_l0/m_gNE0;
319     const double h1 = m_l1/m_gNE1;
320     const double h2 = m_l1/m_gNE2;
321 caltinay 3731 const double C0 = .044658198738520451079;
322     const double C1 = .16666666666666666667;
323     const double C2 = .21132486540518711775;
324     const double C3 = .25;
325     const double C4 = .5;
326     const double C5 = .62200846792814621559;
327     const double C6 = .78867513459481288225;
328    
329 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
330 caltinay 3731 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
331 caltinay 3703 #pragma omp parallel for
332 caltinay 3707 for (index_t k2=0; k2 < m_NE2; ++k2) {
333     for (index_t k1=0; k1 < m_NE1; ++k1) {
334     for (index_t k0=0; k0 < m_NE0; ++k0) {
335 caltinay 3703 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
336     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
337     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
338     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
339     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
340     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
341     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
342     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
343     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
344     for (index_t i=0; i < numComp; ++i) {
345 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
346     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
347     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
348     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
349     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
350     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
351     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
352     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
353     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
354     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
355     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
356     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
357     o[INDEX3(i,0,0,numComp,3)] = V0;
358     o[INDEX3(i,1,0,numComp,3)] = V4;
359     o[INDEX3(i,2,0,numComp,3)] = V8;
360     o[INDEX3(i,0,1,numComp,3)] = V0;
361     o[INDEX3(i,1,1,numComp,3)] = V5;
362     o[INDEX3(i,2,1,numComp,3)] = V9;
363     o[INDEX3(i,0,2,numComp,3)] = V1;
364     o[INDEX3(i,1,2,numComp,3)] = V4;
365     o[INDEX3(i,2,2,numComp,3)] = V10;
366     o[INDEX3(i,0,3,numComp,3)] = V1;
367     o[INDEX3(i,1,3,numComp,3)] = V5;
368     o[INDEX3(i,2,3,numComp,3)] = V11;
369     o[INDEX3(i,0,4,numComp,3)] = V2;
370     o[INDEX3(i,1,4,numComp,3)] = V6;
371     o[INDEX3(i,2,4,numComp,3)] = V8;
372     o[INDEX3(i,0,5,numComp,3)] = V2;
373     o[INDEX3(i,1,5,numComp,3)] = V7;
374     o[INDEX3(i,2,5,numComp,3)] = V9;
375     o[INDEX3(i,0,6,numComp,3)] = V3;
376     o[INDEX3(i,1,6,numComp,3)] = V6;
377     o[INDEX3(i,2,6,numComp,3)] = V10;
378     o[INDEX3(i,0,7,numComp,3)] = V3;
379     o[INDEX3(i,1,7,numComp,3)] = V7;
380     o[INDEX3(i,2,7,numComp,3)] = V11;
381 caltinay 3703 } /* end of component loop i */
382     } /* end of k0 loop */
383     } /* end of k1 loop */
384     } /* end of k2 loop */
385     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
386 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
387 caltinay 3731 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
388 caltinay 3711 #pragma omp parallel for
389     for (index_t k2=0; k2 < m_NE2; ++k2) {
390     for (index_t k1=0; k1 < m_NE1; ++k1) {
391     for (index_t k0=0; k0 < m_NE0; ++k0) {
392     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
393     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
394     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
395     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
396     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
397     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
398     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
399     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
400     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
401     for (index_t i=0; i < numComp; ++i) {
402 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
403     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
404     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
405 caltinay 3711 } /* end of component loop i */
406     } /* end of k0 loop */
407     } /* end of k1 loop */
408     } /* end of k2 loop */
409     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
410 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
411 caltinay 3731 /*** GENERATOR SNIP_GRAD_FACES TOP */
412 caltinay 3722 #pragma omp parallel
413     {
414     if (m_faceOffset[0] > -1) {
415     #pragma omp for nowait
416     for (index_t k2=0; k2 < m_NE2; ++k2) {
417     for (index_t k1=0; k1 < m_NE1; ++k1) {
418     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
419     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
420     const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
421     const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
422     const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
423     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
424     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
425     const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
426     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
427     for (index_t i=0; i < numComp; ++i) {
428 caltinay 3731 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
429     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
430     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
431     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
432     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
433     o[INDEX3(i,1,0,numComp,3)] = V0;
434     o[INDEX3(i,2,0,numComp,3)] = V2;
435     o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
436     o[INDEX3(i,1,1,numComp,3)] = V0;
437     o[INDEX3(i,2,1,numComp,3)] = V3;
438     o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
439     o[INDEX3(i,1,2,numComp,3)] = V1;
440     o[INDEX3(i,2,2,numComp,3)] = V2;
441     o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
442     o[INDEX3(i,1,3,numComp,3)] = V1;
443     o[INDEX3(i,2,3,numComp,3)] = V3;
444 caltinay 3722 } /* end of component loop i */
445     } /* end of k1 loop */
446     } /* end of k2 loop */
447     } /* end of face 0 */
448     if (m_faceOffset[1] > -1) {
449     #pragma omp for nowait
450     for (index_t k2=0; k2 < m_NE2; ++k2) {
451     for (index_t k1=0; k1 < m_NE1; ++k1) {
452     const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
453     const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
454     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
455     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
456     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
457     const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
458     const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
459     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
460     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
461     for (index_t i=0; i < numComp; ++i) {
462 caltinay 3731 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
463     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
464     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
465     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
466     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
467     o[INDEX3(i,1,0,numComp,3)] = V0;
468     o[INDEX3(i,2,0,numComp,3)] = V2;
469     o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
470     o[INDEX3(i,1,1,numComp,3)] = V0;
471     o[INDEX3(i,2,1,numComp,3)] = V3;
472     o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
473     o[INDEX3(i,1,2,numComp,3)] = V1;
474     o[INDEX3(i,2,2,numComp,3)] = V2;
475     o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
476     o[INDEX3(i,1,3,numComp,3)] = V1;
477     o[INDEX3(i,2,3,numComp,3)] = V3;
478 caltinay 3722 } /* end of component loop i */
479     } /* end of k1 loop */
480     } /* end of k2 loop */
481     } /* end of face 1 */
482     if (m_faceOffset[2] > -1) {
483     #pragma omp for nowait
484     for (index_t k2=0; k2 < m_NE2; ++k2) {
485     for (index_t k0=0; k0 < m_NE0; ++k0) {
486     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
487     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
488     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
489     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
490     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
491     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
492     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
493     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
494     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
495     for (index_t i=0; i < numComp; ++i) {
496 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
497     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
498     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
499     o[INDEX3(i,0,0,numComp,3)] = V0;
500     o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
501     o[INDEX3(i,2,0,numComp,3)] = V1;
502     o[INDEX3(i,0,1,numComp,3)] = V0;
503     o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
504     o[INDEX3(i,2,1,numComp,3)] = V2;
505     o[INDEX3(i,0,2,numComp,3)] = V0;
506     o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
507     o[INDEX3(i,2,2,numComp,3)] = V1;
508     o[INDEX3(i,0,3,numComp,3)] = V0;
509     o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
510     o[INDEX3(i,2,3,numComp,3)] = V2;
511 caltinay 3722 } /* end of component loop i */
512     } /* end of k0 loop */
513     } /* end of k2 loop */
514     } /* end of face 2 */
515     if (m_faceOffset[3] > -1) {
516     #pragma omp for nowait
517     for (index_t k2=0; k2 < m_NE2; ++k2) {
518     for (index_t k0=0; k0 < m_NE0; ++k0) {
519     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
520     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
521     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
522     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
523     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
524     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
525     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
526     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
527     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
528     for (index_t i=0; i < numComp; ++i) {
529 caltinay 3731 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
530     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
531     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
532     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
533     o[INDEX3(i,0,0,numComp,3)] = V0;
534     o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
535     o[INDEX3(i,2,0,numComp,3)] = V2;
536     o[INDEX3(i,0,1,numComp,3)] = V0;
537     o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
538     o[INDEX3(i,2,1,numComp,3)] = V3;
539     o[INDEX3(i,0,2,numComp,3)] = V1;
540     o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
541     o[INDEX3(i,2,2,numComp,3)] = V2;
542     o[INDEX3(i,0,3,numComp,3)] = V1;
543     o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
544     o[INDEX3(i,2,3,numComp,3)] = V3;
545 caltinay 3722 } /* end of component loop i */
546     } /* end of k0 loop */
547     } /* end of k2 loop */
548     } /* end of face 3 */
549     if (m_faceOffset[4] > -1) {
550     #pragma omp for nowait
551 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
552 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
553     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
554     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
555     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
556     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
557     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
558     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
559     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
560     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
561     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
562     for (index_t i=0; i < numComp; ++i) {
563 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
564     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
565     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
566     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
567     o[INDEX3(i,0,0,numComp,3)] = V0;
568     o[INDEX3(i,1,0,numComp,3)] = V2;
569     o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
570     o[INDEX3(i,0,1,numComp,3)] = V0;
571     o[INDEX3(i,1,1,numComp,3)] = V3;
572     o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
573     o[INDEX3(i,0,2,numComp,3)] = V1;
574     o[INDEX3(i,1,2,numComp,3)] = V2;
575     o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
576     o[INDEX3(i,0,3,numComp,3)] = V1;
577     o[INDEX3(i,1,3,numComp,3)] = V3;
578     o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
579 caltinay 3722 } /* end of component loop i */
580     } /* end of k0 loop */
581 caltinay 3707 } /* end of k1 loop */
582 caltinay 3722 } /* end of face 4 */
583     if (m_faceOffset[5] > -1) {
584     #pragma omp for nowait
585 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
586 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
587     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
588     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
589     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
590     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
591     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
592     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
593     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
594     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
595     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
596     for (index_t i=0; i < numComp; ++i) {
597 caltinay 3731 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
598     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
599     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
600     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
601     o[INDEX3(i,0,0,numComp,3)] = V0;
602     o[INDEX3(i,1,0,numComp,3)] = V2;
603     o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
604     o[INDEX3(i,0,1,numComp,3)] = V0;
605     o[INDEX3(i,1,1,numComp,3)] = V3;
606     o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
607     o[INDEX3(i,0,2,numComp,3)] = V1;
608     o[INDEX3(i,1,2,numComp,3)] = V2;
609     o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
610     o[INDEX3(i,0,3,numComp,3)] = V1;
611     o[INDEX3(i,1,3,numComp,3)] = V3;
612     o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
613 caltinay 3722 } /* end of component loop i */
614     } /* end of k0 loop */
615 caltinay 3707 } /* end of k1 loop */
616 caltinay 3722 } /* end of face 5 */
617     } // end of parallel section
618 caltinay 3707 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
619 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
620 caltinay 3731 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
621 caltinay 3722 #pragma omp parallel
622     {
623     if (m_faceOffset[0] > -1) {
624     #pragma omp for nowait
625     for (index_t k2=0; k2 < m_NE2; ++k2) {
626     for (index_t k1=0; k1 < m_NE1; ++k1) {
627     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
628     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
629     const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
630     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
631     const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
632     const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
633     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
634     const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
635     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
636     for (index_t i=0; i < numComp; ++i) {
637 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
638     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
639     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
640 caltinay 3722 } /* end of component loop i */
641     } /* end of k1 loop */
642     } /* end of k2 loop */
643     } /* end of face 0 */
644     if (m_faceOffset[1] > -1) {
645     #pragma omp for nowait
646     for (index_t k2=0; k2 < m_NE2; ++k2) {
647     for (index_t k1=0; k1 < m_NE1; ++k1) {
648     const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
649     const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
650     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
651     const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
652     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
653     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
654     const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
655     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
656     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
657     for (index_t i=0; i < numComp; ++i) {
658 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
659     o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
660     o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
661 caltinay 3722 } /* end of component loop i */
662     } /* end of k1 loop */
663     } /* end of k2 loop */
664     } /* end of face 1 */
665     if (m_faceOffset[2] > -1) {
666     #pragma omp for nowait
667     for (index_t k2=0; k2 < m_NE2; ++k2) {
668     for (index_t k0=0; k0 < m_NE0; ++k0) {
669     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
670     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
671     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
672     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
673     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
674     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
675     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
676     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
677     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
678     for (index_t i=0; i < numComp; ++i) {
679 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
680     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
681     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
682 caltinay 3722 } /* end of component loop i */
683     } /* end of k0 loop */
684     } /* end of k2 loop */
685     } /* end of face 2 */
686     if (m_faceOffset[3] > -1) {
687     #pragma omp for nowait
688     for (index_t k2=0; k2 < m_NE2; ++k2) {
689     for (index_t k0=0; k0 < m_NE0; ++k0) {
690     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
691     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
692     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
693     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
694     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
695     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
696     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
697     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
698     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
699     for (index_t i=0; i < numComp; ++i) {
700 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
701     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
702     o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
703 caltinay 3722 } /* end of component loop i */
704     } /* end of k0 loop */
705     } /* end of k2 loop */
706     } /* end of face 3 */
707     if (m_faceOffset[4] > -1) {
708     #pragma omp for nowait
709 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
710 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
711     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
712     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
713     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
714     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
715     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
716     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
717     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
718     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
719     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
720     for (index_t i=0; i < numComp; ++i) {
721 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
722     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
723     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
724 caltinay 3722 } /* end of component loop i */
725     } /* end of k0 loop */
726 caltinay 3711 } /* end of k1 loop */
727 caltinay 3722 } /* end of face 4 */
728     if (m_faceOffset[5] > -1) {
729     #pragma omp for nowait
730 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
731 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
732     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
733     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
734     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
735     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
736     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
737     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
738     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
739     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
740     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
741     for (index_t i=0; i < numComp; ++i) {
742 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
743     o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
744     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
745 caltinay 3722 } /* end of component loop i */
746     } /* end of k0 loop */
747 caltinay 3711 } /* end of k1 loop */
748 caltinay 3722 } /* end of face 5 */
749     } // end of parallel section
750 caltinay 3711 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
751 caltinay 3703 } else {
752 caltinay 3707 stringstream msg;
753     msg << "setToGradient() not implemented for "
754     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
755     throw RipleyException(msg.str());
756 caltinay 3703 }
757     }
758    
759 caltinay 3713 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
760     {
761     escript::Data& in = *const_cast<escript::Data*>(&arg);
762     const dim_t numComp = in.getDataPointSize();
763     const double h0 = m_l0/m_gNE0;
764     const double h1 = m_l1/m_gNE1;
765     const double h2 = m_l2/m_gNE2;
766     if (arg.getFunctionSpace().getTypeCode() == Elements) {
767     const double w_0 = h0*h1*h2/8.;
768     #pragma omp parallel
769     {
770     vector<double> int_local(numComp, 0);
771 caltinay 3722 #pragma omp for nowait
772 caltinay 3713 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
773     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
774     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
775     const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
776     for (index_t i=0; i < numComp; ++i) {
777     const register double f_0 = f[INDEX2(i,0,numComp)];
778     const register double f_1 = f[INDEX2(i,1,numComp)];
779     const register double f_2 = f[INDEX2(i,2,numComp)];
780     const register double f_3 = f[INDEX2(i,3,numComp)];
781     const register double f_4 = f[INDEX2(i,4,numComp)];
782     const register double f_5 = f[INDEX2(i,5,numComp)];
783     const register double f_6 = f[INDEX2(i,6,numComp)];
784     const register double f_7 = f[INDEX2(i,7,numComp)];
785     int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
786     } /* end of component loop i */
787     } /* end of k0 loop */
788     } /* end of k1 loop */
789     } /* end of k2 loop */
790    
791     #pragma omp critical
792     for (index_t i=0; i<numComp; i++)
793     integrals[i]+=int_local[i];
794 caltinay 3722 } // end of parallel section
795 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
796     const double w_0 = h0*h1*h2;
797     #pragma omp parallel
798     {
799     vector<double> int_local(numComp, 0);
800 caltinay 3722 #pragma omp for nowait
801 caltinay 3713 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
802     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
803     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
804     const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
805     for (index_t i=0; i < numComp; ++i) {
806     int_local[i]+=f[i]*w_0;
807     } /* end of component loop i */
808     } /* end of k0 loop */
809     } /* end of k1 loop */
810     } /* end of k2 loop */
811    
812     #pragma omp critical
813     for (index_t i=0; i<numComp; i++)
814     integrals[i]+=int_local[i];
815 caltinay 3722 } // end of parallel section
816 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
817     const double w_0 = h1*h2/4.;
818     const double w_1 = h0*h2/4.;
819     const double w_2 = h0*h1/4.;
820     #pragma omp parallel
821     {
822     vector<double> int_local(numComp, 0);
823     if (m_faceOffset[0] > -1) {
824 caltinay 3722 #pragma omp for nowait
825 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
826     for (index_t k1=0; k1 < m_NE1; ++k1) {
827     const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
828     for (index_t i=0; i < numComp; ++i) {
829     const register double f_0 = f[INDEX2(i,0,numComp)];
830     const register double f_1 = f[INDEX2(i,1,numComp)];
831     const register double f_2 = f[INDEX2(i,2,numComp)];
832     const register double f_3 = f[INDEX2(i,3,numComp)];
833     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
834     } /* end of component loop i */
835     } /* end of k1 loop */
836     } /* end of k2 loop */
837     }
838    
839     if (m_faceOffset[1] > -1) {
840 caltinay 3722 #pragma omp for nowait
841 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
842     for (index_t k1=0; k1 < m_NE1; ++k1) {
843     const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
844     for (index_t i=0; i < numComp; ++i) {
845     const register double f_0 = f[INDEX2(i,0,numComp)];
846     const register double f_1 = f[INDEX2(i,1,numComp)];
847     const register double f_2 = f[INDEX2(i,2,numComp)];
848     const register double f_3 = f[INDEX2(i,3,numComp)];
849     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
850     } /* end of component loop i */
851     } /* end of k1 loop */
852     } /* end of k2 loop */
853     }
854    
855     if (m_faceOffset[2] > -1) {
856 caltinay 3722 #pragma omp for nowait
857 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
858     for (index_t k0=0; k0 < m_NE0; ++k0) {
859     const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
860     for (index_t i=0; i < numComp; ++i) {
861     const register double f_0 = f[INDEX2(i,0,numComp)];
862     const register double f_1 = f[INDEX2(i,1,numComp)];
863     const register double f_2 = f[INDEX2(i,2,numComp)];
864     const register double f_3 = f[INDEX2(i,3,numComp)];
865     int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
866     } /* end of component loop i */
867     } /* end of k1 loop */
868     } /* end of k2 loop */
869     }
870    
871     if (m_faceOffset[3] > -1) {
872 caltinay 3722 #pragma omp for nowait
873 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
874     for (index_t k0=0; k0 < m_NE0; ++k0) {
875     const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
876     for (index_t i=0; i < numComp; ++i) {
877     const register double f_0 = f[INDEX2(i,0,numComp)];
878     const register double f_1 = f[INDEX2(i,1,numComp)];
879     const register double f_2 = f[INDEX2(i,2,numComp)];
880     const register double f_3 = f[INDEX2(i,3,numComp)];
881     int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
882     } /* end of component loop i */
883     } /* end of k1 loop */
884     } /* end of k2 loop */
885     }
886    
887     if (m_faceOffset[4] > -1) {
888 caltinay 3722 #pragma omp for nowait
889 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
890     for (index_t k0=0; k0 < m_NE0; ++k0) {
891     const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
892     for (index_t i=0; i < numComp; ++i) {
893     const register double f_0 = f[INDEX2(i,0,numComp)];
894     const register double f_1 = f[INDEX2(i,1,numComp)];
895     const register double f_2 = f[INDEX2(i,2,numComp)];
896     const register double f_3 = f[INDEX2(i,3,numComp)];
897     int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
898     } /* end of component loop i */
899     } /* end of k1 loop */
900     } /* end of k2 loop */
901     }
902    
903     if (m_faceOffset[5] > -1) {
904 caltinay 3722 #pragma omp for nowait
905 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
906     for (index_t k0=0; k0 < m_NE0; ++k0) {
907     const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
908     for (index_t i=0; i < numComp; ++i) {
909     const register double f_0 = f[INDEX2(i,0,numComp)];
910     const register double f_1 = f[INDEX2(i,1,numComp)];
911     const register double f_2 = f[INDEX2(i,2,numComp)];
912     const register double f_3 = f[INDEX2(i,3,numComp)];
913     int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
914     } /* end of component loop i */
915     } /* end of k1 loop */
916     } /* end of k2 loop */
917     }
918    
919     #pragma omp critical
920     for (index_t i=0; i<numComp; i++)
921     integrals[i]+=int_local[i];
922 caltinay 3722 } // end of parallel section
923 caltinay 3713
924     } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
925     const double w_0 = h1*h2;
926     const double w_1 = h0*h2;
927     const double w_2 = h0*h1;
928     #pragma omp parallel
929     {
930     vector<double> int_local(numComp, 0);
931     if (m_faceOffset[0] > -1) {
932 caltinay 3722 #pragma omp for nowait
933 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
934     for (index_t k1=0; k1 < m_NE1; ++k1) {
935     const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
936     for (index_t i=0; i < numComp; ++i) {
937     int_local[i]+=f[i]*w_0;
938     } /* end of component loop i */
939     } /* end of k1 loop */
940     } /* end of k2 loop */
941     }
942    
943     if (m_faceOffset[1] > -1) {
944 caltinay 3722 #pragma omp for nowait
945 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
946     for (index_t k1=0; k1 < m_NE1; ++k1) {
947     const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
948     for (index_t i=0; i < numComp; ++i) {
949     int_local[i]+=f[i]*w_0;
950     } /* end of component loop i */
951     } /* end of k1 loop */
952     } /* end of k2 loop */
953     }
954    
955     if (m_faceOffset[2] > -1) {
956 caltinay 3722 #pragma omp for nowait
957 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
958     for (index_t k0=0; k0 < m_NE0; ++k0) {
959     const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
960     for (index_t i=0; i < numComp; ++i) {
961     int_local[i]+=f[i]*w_1;
962     } /* end of component loop i */
963     } /* end of k1 loop */
964     } /* end of k2 loop */
965     }
966    
967     if (m_faceOffset[3] > -1) {
968 caltinay 3722 #pragma omp for nowait
969 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
970     for (index_t k0=0; k0 < m_NE0; ++k0) {
971     const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
972     for (index_t i=0; i < numComp; ++i) {
973     int_local[i]+=f[i]*w_1;
974     } /* end of component loop i */
975     } /* end of k1 loop */
976     } /* end of k2 loop */
977     }
978    
979     if (m_faceOffset[4] > -1) {
980 caltinay 3722 #pragma omp for nowait
981 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
982     for (index_t k0=0; k0 < m_NE0; ++k0) {
983     const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
984     for (index_t i=0; i < numComp; ++i) {
985     int_local[i]+=f[i]*w_2;
986     } /* end of component loop i */
987     } /* end of k1 loop */
988     } /* end of k2 loop */
989     }
990    
991     if (m_faceOffset[5] > -1) {
992 caltinay 3722 #pragma omp for nowait
993 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
994     for (index_t k0=0; k0 < m_NE0; ++k0) {
995     const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
996     for (index_t i=0; i < numComp; ++i) {
997     int_local[i]+=f[i]*w_2;
998     } /* end of component loop i */
999     } /* end of k1 loop */
1000     } /* end of k2 loop */
1001     }
1002    
1003     #pragma omp critical
1004     for (index_t i=0; i<numComp; i++)
1005     integrals[i]+=int_local[i];
1006 caltinay 3722 } // end of parallel section
1007 caltinay 3713
1008     } else {
1009     stringstream msg;
1010     msg << "setToIntegrals() not implemented for "
1011     << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
1012     throw RipleyException(msg.str());
1013     }
1014     }
1015    
1016 caltinay 3722 void Brick::setToNormal(escript::Data& out) const
1017     {
1018     if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1019     #pragma omp parallel
1020     {
1021     if (m_faceOffset[0] > -1) {
1022     #pragma omp for nowait
1023     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1024     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1025     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1026     // set vector at four quadrature points
1027     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1028     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1029     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1030     *o++ = -1.; *o++ = 0.; *o = 0.;
1031     }
1032     }
1033     }
1034    
1035     if (m_faceOffset[1] > -1) {
1036     #pragma omp for nowait
1037     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1038     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1039     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1040     // set vector at four quadrature points
1041     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1042     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1043     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1044     *o++ = 1.; *o++ = 0.; *o = 0.;
1045     }
1046     }
1047     }
1048    
1049     if (m_faceOffset[2] > -1) {
1050     #pragma omp for nowait
1051     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1052     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1053     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1054     // set vector at four quadrature points
1055     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1056     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1057     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1058     *o++ = 0.; *o++ = -1.; *o = 0.;
1059     }
1060     }
1061     }
1062    
1063     if (m_faceOffset[3] > -1) {
1064     #pragma omp for nowait
1065     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1066     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1067     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1068     // set vector at four quadrature points
1069     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1070     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1071     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1072     *o++ = 0.; *o++ = 1.; *o = 0.;
1073     }
1074     }
1075     }
1076    
1077     if (m_faceOffset[4] > -1) {
1078     #pragma omp for nowait
1079     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1080     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1081     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1082     // set vector at four quadrature points
1083     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1084     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1085     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1086     *o++ = 0.; *o++ = 0.; *o = -1.;
1087     }
1088     }
1089     }
1090    
1091     if (m_faceOffset[5] > -1) {
1092     #pragma omp for nowait
1093     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1094     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1095     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1096     // set vector at four quadrature points
1097     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1098     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1099     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1100     *o++ = 0.; *o++ = 0.; *o = 1.;
1101     }
1102     }
1103     }
1104     } // end of parallel section
1105     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1106     #pragma omp parallel
1107     {
1108     if (m_faceOffset[0] > -1) {
1109     #pragma omp for nowait
1110     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1111     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1112     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1113     *o++ = -1.;
1114     *o++ = 0.;
1115     *o = 0.;
1116     }
1117     }
1118     }
1119    
1120     if (m_faceOffset[1] > -1) {
1121     #pragma omp for nowait
1122     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1123     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1124     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1125     *o++ = 1.;
1126     *o++ = 0.;
1127     *o = 0.;
1128     }
1129     }
1130     }
1131    
1132     if (m_faceOffset[2] > -1) {
1133     #pragma omp for nowait
1134     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1135     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1136     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1137     *o++ = 0.;
1138     *o++ = -1.;
1139     *o = 0.;
1140     }
1141     }
1142     }
1143    
1144     if (m_faceOffset[3] > -1) {
1145     #pragma omp for nowait
1146     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1147     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1148     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1149     *o++ = 0.;
1150     *o++ = 1.;
1151     *o = 0.;
1152     }
1153     }
1154     }
1155    
1156     if (m_faceOffset[4] > -1) {
1157     #pragma omp for nowait
1158     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1159     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1160     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1161     *o++ = 0.;
1162     *o++ = 0.;
1163     *o = -1.;
1164     }
1165     }
1166     }
1167    
1168     if (m_faceOffset[5] > -1) {
1169     #pragma omp for nowait
1170     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1171     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1172     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1173     *o++ = 0.;
1174     *o++ = 0.;
1175     *o = 1.;
1176     }
1177     }
1178     }
1179     } // end of parallel section
1180    
1181     } else {
1182     stringstream msg;
1183     msg << "setToNormal() not implemented for "
1184     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1185     throw RipleyException(msg.str());
1186     }
1187     }
1188    
1189 caltinay 3691 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1190     bool reducedColOrder) const
1191     {
1192     if (reducedRowOrder || reducedColOrder)
1193     throw RipleyException("getPattern() not implemented for reduced order");
1194    
1195 caltinay 3756 return m_pattern;
1196     }
1197    
1198     void Brick::Print_Mesh_Info(const bool full) const
1199     {
1200     RipleyDomain::Print_Mesh_Info(full);
1201     if (full) {
1202     cout << " Id Coordinates" << endl;
1203     cout.precision(15);
1204     cout.setf(ios::scientific, ios::floatfield);
1205     pair<double,double> xdx = getFirstCoordAndSpacing(0);
1206     pair<double,double> ydy = getFirstCoordAndSpacing(1);
1207     pair<double,double> zdz = getFirstCoordAndSpacing(2);
1208     for (index_t i=0; i < getNumNodes(); i++) {
1209     cout << " " << setw(5) << m_nodeId[i]
1210     << " " << xdx.first+(i%m_N0)*xdx.second
1211     << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1212     << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1213     }
1214     }
1215     }
1216    
1217     IndexVector Brick::getNumNodesPerDim() const
1218     {
1219     IndexVector ret;
1220     ret.push_back(m_N0);
1221     ret.push_back(m_N1);
1222     ret.push_back(m_N2);
1223     return ret;
1224     }
1225    
1226     IndexVector Brick::getNumElementsPerDim() const
1227     {
1228     IndexVector ret;
1229     ret.push_back(m_NE0);
1230     ret.push_back(m_NE1);
1231     ret.push_back(m_NE2);
1232     return ret;
1233     }
1234    
1235     IndexVector Brick::getNumFacesPerBoundary() const
1236     {
1237     IndexVector ret(6, 0);
1238     //left
1239     if (m_offset0==0)
1240     ret[0]=m_NE1*m_NE2;
1241     //right
1242     if (m_mpiInfo->rank%m_NX==m_NX-1)
1243     ret[1]=m_NE1*m_NE2;
1244     //bottom
1245     if (m_offset1==0)
1246     ret[2]=m_NE0*m_NE2;
1247     //top
1248     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1249     ret[3]=m_NE0*m_NE2;
1250     //front
1251     if (m_offset2==0)
1252     ret[4]=m_NE0*m_NE1;
1253     //back
1254     if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1255     ret[5]=m_NE0*m_NE1;
1256     return ret;
1257     }
1258    
1259     pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1260     {
1261     if (dim==0)
1262     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1263     else if (dim==1)
1264     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1265     else if (dim==2)
1266     return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1267    
1268     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1269     }
1270    
1271     //protected
1272     dim_t Brick::getNumDOF() const
1273     {
1274     return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY*(m_gNE2+1)/m_NZ;
1275     }
1276    
1277     //protected
1278     dim_t Brick::getNumFaceElements() const
1279     {
1280     const IndexVector faces = getNumFacesPerBoundary();
1281     dim_t n=0;
1282     for (size_t i=0; i<faces.size(); i++)
1283     n+=faces[i];
1284     return n;
1285     }
1286    
1287     //protected
1288     void Brick::assembleCoordinates(escript::Data& arg) const
1289     {
1290     escriptDataC x = arg.getDataC();
1291     int numDim = m_numDim;
1292     if (!isDataPointShapeEqual(&x, 1, &numDim))
1293     throw RipleyException("setToX: Invalid Data object shape");
1294     if (!numSamplesEqual(&x, 1, getNumNodes()))
1295     throw RipleyException("setToX: Illegal number of samples in Data object");
1296    
1297     pair<double,double> xdx = getFirstCoordAndSpacing(0);
1298     pair<double,double> ydy = getFirstCoordAndSpacing(1);
1299     pair<double,double> zdz = getFirstCoordAndSpacing(2);
1300     arg.requireWrite();
1301     #pragma omp parallel for
1302     for (dim_t i2 = 0; i2 < m_N2; i2++) {
1303     for (dim_t i1 = 0; i1 < m_N1; i1++) {
1304     for (dim_t i0 = 0; i0 < m_N0; i0++) {
1305     double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1306     point[0] = xdx.first+i0*xdx.second;
1307     point[1] = ydy.first+i1*ydy.second;
1308     point[2] = zdz.first+i2*zdz.second;
1309     }
1310     }
1311     }
1312     }
1313    
1314     //protected
1315     dim_t Brick::insertNeighbourNodes(IndexVector& index, index_t node) const
1316     {
1317     const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1318     const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1319     const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1320     const int x=node%nDOF0;
1321     const int y=node%(nDOF0*nDOF1)/nDOF0;
1322     const int z=node/(nDOF0*nDOF1);
1323     int num=0;
1324     // loop through potential neighbours and add to index if positions are
1325     // within bounds
1326     for (int i2=-1; i2<2; i2++) {
1327     for (int i1=-1; i1<2; i1++) {
1328     for (int i0=-1; i0<2; i0++) {
1329     // skip node itself
1330     if (i0==0 && i1==0 && i2==0)
1331     continue;
1332     // location of neighbour node
1333     const int nx=x+i0;
1334     const int ny=y+i1;
1335     const int nz=z+i2;
1336     if (nx>=0 && ny>=0 && nz>=0
1337     && nx<nDOF0 && ny<nDOF1 && nz<nDOF2) {
1338     index.push_back(nz*nDOF0*nDOF1+ny*nDOF0+nx);
1339     num++;
1340     }
1341     }
1342     }
1343     }
1344    
1345     return num;
1346     }
1347    
1348     //protected
1349     void Brick::nodesToDOF(escript::Data& out, escript::Data& in) const
1350     {
1351     const dim_t numComp = in.getDataPointSize();
1352     out.requireWrite();
1353    
1354 caltinay 3754 const index_t left = (m_offset0==0 ? 0 : 1);
1355     const index_t bottom = (m_offset1==0 ? 0 : 1);
1356     const index_t front = (m_offset2==0 ? 0 : 1);
1357 caltinay 3756 const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1358     const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1359     const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1360     #pragma omp parallel for
1361     for (index_t i=0; i<nDOF2; i++) {
1362     for (index_t j=0; j<nDOF1; j++) {
1363     for (index_t k=0; k<nDOF0; k++) {
1364     const index_t n=k+left+(j+bottom)*m_N0+(i+front)*m_N0*m_N1;
1365     const double* src=in.getSampleDataRO(n);
1366     copy(src, src+numComp, out.getSampleDataRW(k+j*nDOF0+i*nDOF0*nDOF1));
1367     }
1368     }
1369     }
1370     }
1371 caltinay 3754
1372 caltinay 3756 //protected
1373     void Brick::dofToNodes(escript::Data& out, escript::Data& in) const
1374     {
1375     const dim_t numComp = in.getDataPointSize();
1376     Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp);
1377     in.requireWrite();
1378     Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0));
1379    
1380     const dim_t numDOF = getNumDOF();
1381     out.requireWrite();
1382     const double* buffer = Paso_Coupler_finishCollect(coupler);
1383    
1384     #pragma omp parallel for
1385     for (index_t i=0; i<getNumNodes(); i++) {
1386     const double* src=(m_dofMap[i]<numDOF ?
1387     in.getSampleDataRO(m_dofMap[i])
1388     : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1389     copy(src, src+numComp, out.getSampleDataRW(i));
1390     }
1391     }
1392    
1393     //private
1394     void Brick::populateSampleIds()
1395     {
1396     // identifiers are ordered from left to right, bottom to top, front to back
1397     // globally
1398    
1399     // build node distribution vector first.
1400     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1401     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1402     const dim_t numDOF=getNumDOF();
1403     for (dim_t k=1; k<m_mpiInfo->size; k++) {
1404     m_nodeDistribution[k]=k*numDOF;
1405     }
1406     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1407     m_nodeId.resize(getNumNodes());
1408     m_dofId.resize(numDOF);
1409     m_elementId.resize(getNumElements());
1410     m_faceId.resize(getNumFaceElements());
1411    
1412     #pragma omp parallel
1413     {
1414     #pragma omp for nowait
1415     // nodes
1416     for (dim_t i2=0; i2<m_N2; i2++) {
1417     for (dim_t i1=0; i1<m_N1; i1++) {
1418     for (dim_t i0=0; i0<m_N0; i0++) {
1419     m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] =
1420     (m_offset2+i2)*(m_gNE0+1)*(m_gNE1+1)
1421     +(m_offset1+i1)*(m_gNE0+1)
1422     +m_offset0+i0;
1423     }
1424     }
1425     }
1426    
1427     // degrees of freedom
1428     #pragma omp for nowait
1429     for (dim_t k=0; k<numDOF; k++)
1430     m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k;
1431    
1432     // elements
1433     #pragma omp for nowait
1434     for (dim_t i2=0; i2<m_NE2; i2++) {
1435     for (dim_t i1=0; i1<m_NE1; i1++) {
1436     for (dim_t i0=0; i0<m_NE0; i0++) {
1437     m_elementId[i0+i1*m_NE0+i2*m_NE0*m_NE1] =
1438     (m_offset2+i2)*m_gNE0*m_gNE1
1439     +(m_offset1+i1)*m_gNE0
1440     +m_offset0+i0;
1441     }
1442     }
1443     }
1444    
1445     // face elements
1446     #pragma omp for
1447     for (dim_t k=0; k<getNumFaceElements(); k++)
1448     m_faceId[k]=k;
1449     } // end parallel section
1450    
1451     m_nodeTags.assign(getNumNodes(), 0);
1452     updateTagsInUse(Nodes);
1453    
1454     m_elementTags.assign(getNumElements(), 0);
1455     updateTagsInUse(Elements);
1456    
1457     // generate face offset vector and set face tags
1458     const IndexVector facesPerEdge = getNumFacesPerBoundary();
1459     const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1460     const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1461     m_faceOffset.assign(facesPerEdge.size(), -1);
1462     m_faceTags.clear();
1463     index_t offset=0;
1464     for (size_t i=0; i<facesPerEdge.size(); i++) {
1465     if (facesPerEdge[i]>0) {
1466     m_faceOffset[i]=offset;
1467     offset+=facesPerEdge[i];
1468     m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1469     }
1470     }
1471     setTagMap("left", LEFT);
1472     setTagMap("right", RIGHT);
1473     setTagMap("bottom", BOTTOM);
1474     setTagMap("top", TOP);
1475     setTagMap("front", FRONT);
1476     setTagMap("back", BACK);
1477     updateTagsInUse(FaceElements);
1478     }
1479    
1480     //private
1481     void Brick::createPattern()
1482     {
1483     const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1484     const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1485     const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1486     const index_t left = (m_offset0==0 ? 0 : 1);
1487     const index_t bottom = (m_offset1==0 ? 0 : 1);
1488     const index_t front = (m_offset2==0 ? 0 : 1);
1489    
1490     // populate node->DOF mapping with own degrees of freedom.
1491     // The rest is assigned in the loop further down
1492 caltinay 3754 m_dofMap.assign(getNumNodes(), 0);
1493     #pragma omp parallel for
1494     for (index_t i=front; i<m_N2; i++) {
1495     for (index_t j=bottom; j<m_N1; j++) {
1496     for (index_t k=left; k<m_N0; k++) {
1497     m_dofMap[i*m_N0*m_N1+j*m_N0+k]=(i-front)*nDOF0*nDOF1+(j-bottom)*nDOF0+k-left;
1498     }
1499     }
1500     }
1501    
1502     // build list of shared components and neighbours by looping through
1503     // all potential neighbouring ranks and checking if positions are
1504     // within bounds
1505 caltinay 3756 const dim_t numDOF=nDOF0*nDOF1*nDOF2;
1506     vector<IndexVector> colIndices(numDOF); // for the couple blocks
1507     RankVector neighbour;
1508     IndexVector offsetInShared(1,0);
1509     IndexVector sendShared, recvShared;
1510     int numShared=0;
1511 caltinay 3754 const int x=m_mpiInfo->rank%m_NX;
1512     const int y=m_mpiInfo->rank%(m_NX*m_NY)/m_NX;
1513     const int z=m_mpiInfo->rank/(m_NX*m_NY);
1514     for (int i2=-1; i2<2; i2++) {
1515     for (int i1=-1; i1<2; i1++) {
1516     for (int i0=-1; i0<2; i0++) {
1517 caltinay 3756 // skip this rank
1518 caltinay 3754 if (i0==0 && i1==0 && i2==0)
1519     continue;
1520     // location of neighbour rank
1521     const int nx=x+i0;
1522     const int ny=y+i1;
1523     const int nz=z+i2;
1524     if (nx>=0 && ny>=0 && nz>=0 && nx<m_NX && ny<m_NY && nz<m_NZ) {
1525     neighbour.push_back(nz*m_NX*m_NY+ny*m_NX+nx);
1526     if (i0==0 && i1==0) {
1527     // sharing front or back plane
1528     offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF1);
1529     for (dim_t i=0; i<nDOF1; i++) {
1530 caltinay 3755 const int firstDOF=(i2==-1 ? i*nDOF0
1531     : i*nDOF0 + nDOF0*nDOF1*(nDOF2-1));
1532     const int firstNode=(i2==-1 ? left+(i+bottom)*m_N0
1533     : left+(i+bottom)*m_N0+m_N0*m_N1*(m_N2-1));
1534 caltinay 3754 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1535     sendShared.push_back(firstDOF+j);
1536     recvShared.push_back(numDOF+numShared);
1537     if (j>0) {
1538     if (i>0)
1539     colIndices[firstDOF+j-1-nDOF0].push_back(numShared);
1540     colIndices[firstDOF+j-1].push_back(numShared);
1541     if (i<nDOF1-1)
1542     colIndices[firstDOF+j-1+nDOF0].push_back(numShared);
1543     }
1544     if (i>0)
1545     colIndices[firstDOF+j-nDOF0].push_back(numShared);
1546     colIndices[firstDOF+j].push_back(numShared);
1547     if (i<nDOF1-1)
1548     colIndices[firstDOF+j+nDOF0].push_back(numShared);
1549     if (j<nDOF0-1) {
1550     if (i>0)
1551     colIndices[firstDOF+j+1-nDOF0].push_back(numShared);
1552     colIndices[firstDOF+j+1].push_back(numShared);
1553     if (i<nDOF1-1)
1554     colIndices[firstDOF+j+1+nDOF0].push_back(numShared);
1555     }
1556     m_dofMap[firstNode+j]=numDOF+numShared;
1557     }
1558     }
1559     } else if (i0==0 && i2==0) {
1560     // sharing top or bottom plane
1561     offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF2);
1562     for (dim_t i=0; i<nDOF2; i++) {
1563 caltinay 3755 const int firstDOF=(i1==-1 ? i*nDOF0*nDOF1
1564     : nDOF0*((i+1)*nDOF1-1));
1565     const int firstNode=(i1==-1 ?
1566     left+(i+front)*m_N0*m_N1
1567     : left+m_N0*((i+1+front)*m_N1-1));
1568 caltinay 3754 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1569     sendShared.push_back(firstDOF+j);
1570     recvShared.push_back(numDOF+numShared);
1571     if (j>0) {
1572     if (i>0)
1573     colIndices[firstDOF+j-1-nDOF0*nDOF1].push_back(numShared);
1574     colIndices[firstDOF+j-1].push_back(numShared);
1575     if (i<nDOF2-1)
1576     colIndices[firstDOF+j-1+nDOF0*nDOF1].push_back(numShared);
1577     }
1578     if (i>0)
1579     colIndices[firstDOF+j-nDOF0*nDOF1].push_back(numShared);
1580     colIndices[firstDOF+j].push_back(numShared);
1581     if (i<nDOF2-1)
1582     colIndices[firstDOF+j+nDOF0*nDOF1].push_back(numShared);
1583     if (j<nDOF0-1) {
1584     if (i>0)
1585     colIndices[firstDOF+j+1-nDOF0*nDOF1].push_back(numShared);
1586     colIndices[firstDOF+j+1].push_back(numShared);
1587     if (i<nDOF2-1)
1588     colIndices[firstDOF+j+1+nDOF0*nDOF1].push_back(numShared);
1589     }
1590     m_dofMap[firstNode+j]=numDOF+numShared;
1591     }
1592     }
1593     } else if (i1==0 && i2==0) {
1594     // sharing left or right plane
1595     offsetInShared.push_back(offsetInShared.back()+nDOF1*nDOF2);
1596     for (dim_t i=0; i<nDOF2; i++) {
1597 caltinay 3755 const int firstDOF=(i0==-1 ? i*nDOF0*nDOF1
1598     : nDOF0*(1+i*nDOF1)-1);
1599     const int firstNode=(i0==-1 ?
1600     (bottom+(i+front)*m_N1)*m_N0
1601     : (bottom+1+(i+front)*m_N1)*m_N0-1);
1602 caltinay 3754 for (dim_t j=0; j<nDOF1; j++, numShared++) {
1603     sendShared