/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3753 - (show annotations)
Tue Jan 3 09:01:49 2012 UTC (7 years, 3 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Brick.cpp
File size: 91022 byte(s)
Took over Node->DOF interpolation to Brick and parallelised some loops.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Brick.h>
15 extern "C" {
16 #include "paso/SystemMatrixPattern.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33 int d1, int d2) :
34 RipleyDomain(3),
35 m_gNE0(n0),
36 m_gNE1(n1),
37 m_gNE2(n2),
38 m_l0(l0),
39 m_l1(l1),
40 m_l2(l2),
41 m_NX(d0),
42 m_NY(d1),
43 m_NZ(d2)
44 {
45 // ensure number of subdivisions is valid and nodes can be distributed
46 // among number of ranks
47 if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48 throw RipleyException("Invalid number of spatial subdivisions");
49
50 if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0 || (n2+1)%m_NZ > 0)
51 throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension");
52
53 if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2) || (m_NZ > 1 && (n2+1)/m_NZ<2))
54 throw RipleyException("Too few elements for the number of ranks");
55
56 // local number of elements (including overlap)
57 m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0);
58 if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1)
59 m_NE0++;
60 m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1);
61 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX>0 && m_mpiInfo->rank%(m_NX*m_NY)/m_NX<m_NY-1)
62 m_NE1++;
63 m_NE2 = (m_NZ>1 ? (n2+1)/m_NZ : n2);
64 if (m_mpiInfo->rank/(m_NX*m_NY)>0 && m_mpiInfo->rank/(m_NX*m_NY)<m_NZ-1)
65 m_NE2++;
66
67 // local number of nodes
68 m_N0 = m_NE0+1;
69 m_N1 = m_NE1+1;
70 m_N2 = m_NE2+1;
71
72 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
73 m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX);
74 if (m_offset0 > 0)
75 m_offset0--;
76 m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
77 if (m_offset1 > 0)
78 m_offset1--;
79 m_offset2 = (n2+1)/m_NZ*(m_mpiInfo->rank/(m_NX*m_NY));
80 if (m_offset2 > 0)
81 m_offset2--;
82
83 populateSampleIds();
84 }
85
86
87 Brick::~Brick()
88 {
89 }
90
91 string Brick::getDescription() const
92 {
93 return "ripley::Brick";
94 }
95
96 bool Brick::operator==(const AbstractDomain& other) const
97 {
98 const Brick* o=dynamic_cast<const Brick*>(&other);
99 if (o) {
100 return (RipleyDomain::operator==(other) &&
101 m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 && m_gNE2==o->m_gNE2
102 && m_l0==o->m_l0 && m_l1==o->m_l1 && m_l2==o->m_l2
103 && m_NX==o->m_NX && m_NY==o->m_NY && m_NZ==o->m_NZ);
104 }
105
106 return false;
107 }
108
109 void Brick::dump(const string& fileName) const
110 {
111 #if USE_SILO
112 string fn(fileName);
113 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
114 fn+=".silo";
115 }
116
117 const int NUM_SILO_FILES = 1;
118 const char* blockDirFmt = "/block%04d";
119 int driver=DB_HDF5;
120 string siloPath;
121 DBfile* dbfile = NULL;
122
123 #ifdef ESYS_MPI
124 PMPIO_baton_t* baton = NULL;
125 #endif
126
127 if (m_mpiInfo->size > 1) {
128 #ifdef ESYS_MPI
129 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
130 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
131 PMPIO_DefaultClose, (void*)&driver);
132 // try the fallback driver in case of error
133 if (!baton && driver != DB_PDB) {
134 driver = DB_PDB;
135 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
136 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
137 PMPIO_DefaultClose, (void*)&driver);
138 }
139 if (baton) {
140 char str[64];
141 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
142 siloPath = str;
143 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
144 }
145 #endif
146 } else {
147 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
148 getDescription().c_str(), driver);
149 // try the fallback driver in case of error
150 if (!dbfile && driver != DB_PDB) {
151 driver = DB_PDB;
152 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
153 getDescription().c_str(), driver);
154 }
155 }
156
157 if (!dbfile)
158 throw RipleyException("dump: Could not create Silo file");
159
160 /*
161 if (driver==DB_HDF5) {
162 // gzip level 1 already provides good compression with minimal
163 // performance penalty. Some tests showed that gzip levels >3 performed
164 // rather badly on escript data both in terms of time and space
165 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
166 }
167 */
168
169 boost::scoped_ptr<double> x(new double[m_N0]);
170 boost::scoped_ptr<double> y(new double[m_N1]);
171 boost::scoped_ptr<double> z(new double[m_N2]);
172 double* coords[3] = { x.get(), y.get(), z.get() };
173 pair<double,double> xdx = getFirstCoordAndSpacing(0);
174 pair<double,double> ydy = getFirstCoordAndSpacing(1);
175 pair<double,double> zdz = getFirstCoordAndSpacing(2);
176 #pragma omp parallel
177 {
178 #pragma omp for
179 for (dim_t i0 = 0; i0 < m_N0; i0++) {
180 coords[0][i0]=xdx.first+i0*xdx.second;
181 }
182 #pragma omp for
183 for (dim_t i1 = 0; i1 < m_N1; i1++) {
184 coords[1][i1]=ydy.first+i1*ydy.second;
185 }
186 #pragma omp for
187 for (dim_t i2 = 0; i2 < m_N2; i2++) {
188 coords[2][i2]=zdz.first+i2*zdz.second;
189 }
190 }
191 IndexVector dims = getNumNodesPerDim();
192 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
193 DB_COLLINEAR, NULL);
194
195 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
196 NULL, 0, DB_INT, DB_NODECENT, NULL);
197
198 // write element ids
199 dims = getNumElementsPerDim();
200 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
201 &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
202
203 // rank 0 writes multimesh and multivar
204 if (m_mpiInfo->rank == 0) {
205 vector<string> tempstrings;
206 vector<char*> names;
207 for (dim_t i=0; i<m_mpiInfo->size; i++) {
208 stringstream path;
209 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
210 tempstrings.push_back(path.str());
211 names.push_back((char*)tempstrings.back().c_str());
212 }
213 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
214 DBSetDir(dbfile, "/");
215 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
216 &types[0], NULL);
217 tempstrings.clear();
218 names.clear();
219 for (dim_t i=0; i<m_mpiInfo->size; i++) {
220 stringstream path;
221 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
222 tempstrings.push_back(path.str());
223 names.push_back((char*)tempstrings.back().c_str());
224 }
225 types.assign(m_mpiInfo->size, DB_QUADVAR);
226 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
227 &types[0], NULL);
228 tempstrings.clear();
229 names.clear();
230 for (dim_t i=0; i<m_mpiInfo->size; i++) {
231 stringstream path;
232 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
233 tempstrings.push_back(path.str());
234 names.push_back((char*)tempstrings.back().c_str());
235 }
236 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
237 &types[0], NULL);
238 }
239
240 if (m_mpiInfo->size > 1) {
241 #ifdef ESYS_MPI
242 PMPIO_HandOffBaton(baton, dbfile);
243 PMPIO_Finish(baton);
244 #endif
245 } else {
246 DBClose(dbfile);
247 }
248
249 #else // USE_SILO
250 throw RipleyException("dump(): no Silo support");
251 #endif
252 }
253
254 const int* Brick::borrowSampleReferenceIDs(int fsType) const
255 {
256 switch (fsType) {
257 case Nodes:
258 case ReducedNodes: //FIXME: reduced
259 return &m_nodeId[0];
260 case DegreesOfFreedom: //FIXME
261 case ReducedDegreesOfFreedom: //FIXME
262 return &m_dofId[0];
263 case Elements:
264 case ReducedElements:
265 return &m_elementId[0];
266 case ReducedFaceElements:
267 return &m_faceId[0];
268 default:
269 break;
270 }
271
272 stringstream msg;
273 msg << "borrowSampleReferenceIDs() not implemented for function space type "
274 << fsType;
275 throw RipleyException(msg.str());
276 }
277
278 bool Brick::ownSample(int fsCode, index_t id) const
279 {
280 #ifdef ESYS_MPI
281 if (fsCode == Nodes) {
282 const index_t left = (m_offset0==0 ? 0 : 1);
283 const index_t bottom = (m_offset1==0 ? 0 : 1);
284 const index_t front = (m_offset2==0 ? 0 : 1);
285 const index_t right = (m_mpiInfo->rank%m_NX==m_NX-1 ? m_N0 : m_N0-1);
286 const index_t top = (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1 ? m_N1 : m_N1-1);
287 const index_t back = (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1 ? m_N2 : m_N2-1);
288 const index_t x=id%m_N0;
289 const index_t y=id%(m_N0*m_N1)/m_N0;
290 const index_t z=id/(m_N0*m_N1);
291 return (x>=left && x<right && y>=bottom && y<top && z>=front && z<back);
292
293 } else {
294 stringstream msg;
295 msg << "ownSample() not implemented for "
296 << functionSpaceTypeAsString(fsCode);
297 throw RipleyException(msg.str());
298 }
299 #else
300 return true;
301 #endif
302 }
303
304 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
305 {
306 escript::Data& in = *const_cast<escript::Data*>(&cIn);
307 const dim_t numComp = in.getDataPointSize();
308 const double h0 = m_l0/m_gNE0;
309 const double h1 = m_l1/m_gNE1;
310 const double h2 = m_l1/m_gNE2;
311 const double C0 = .044658198738520451079;
312 const double C1 = .16666666666666666667;
313 const double C2 = .21132486540518711775;
314 const double C3 = .25;
315 const double C4 = .5;
316 const double C5 = .62200846792814621559;
317 const double C6 = .78867513459481288225;
318
319 if (out.getFunctionSpace().getTypeCode() == Elements) {
320 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
321 #pragma omp parallel for
322 for (index_t k2=0; k2 < m_NE2; ++k2) {
323 for (index_t k1=0; k1 < m_NE1; ++k1) {
324 for (index_t k0=0; k0 < m_NE0; ++k0) {
325 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
326 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
327 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
328 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
329 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
330 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
331 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
332 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
333 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
334 for (index_t i=0; i < numComp; ++i) {
335 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
336 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
337 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
338 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
339 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
340 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
341 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
342 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
343 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
344 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
345 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
346 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
347 o[INDEX3(i,0,0,numComp,3)] = V0;
348 o[INDEX3(i,1,0,numComp,3)] = V4;
349 o[INDEX3(i,2,0,numComp,3)] = V8;
350 o[INDEX3(i,0,1,numComp,3)] = V0;
351 o[INDEX3(i,1,1,numComp,3)] = V5;
352 o[INDEX3(i,2,1,numComp,3)] = V9;
353 o[INDEX3(i,0,2,numComp,3)] = V1;
354 o[INDEX3(i,1,2,numComp,3)] = V4;
355 o[INDEX3(i,2,2,numComp,3)] = V10;
356 o[INDEX3(i,0,3,numComp,3)] = V1;
357 o[INDEX3(i,1,3,numComp,3)] = V5;
358 o[INDEX3(i,2,3,numComp,3)] = V11;
359 o[INDEX3(i,0,4,numComp,3)] = V2;
360 o[INDEX3(i,1,4,numComp,3)] = V6;
361 o[INDEX3(i,2,4,numComp,3)] = V8;
362 o[INDEX3(i,0,5,numComp,3)] = V2;
363 o[INDEX3(i,1,5,numComp,3)] = V7;
364 o[INDEX3(i,2,5,numComp,3)] = V9;
365 o[INDEX3(i,0,6,numComp,3)] = V3;
366 o[INDEX3(i,1,6,numComp,3)] = V6;
367 o[INDEX3(i,2,6,numComp,3)] = V10;
368 o[INDEX3(i,0,7,numComp,3)] = V3;
369 o[INDEX3(i,1,7,numComp,3)] = V7;
370 o[INDEX3(i,2,7,numComp,3)] = V11;
371 } /* end of component loop i */
372 } /* end of k0 loop */
373 } /* end of k1 loop */
374 } /* end of k2 loop */
375 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
376 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
377 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
378 #pragma omp parallel for
379 for (index_t k2=0; k2 < m_NE2; ++k2) {
380 for (index_t k1=0; k1 < m_NE1; ++k1) {
381 for (index_t k0=0; k0 < m_NE0; ++k0) {
382 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
383 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
384 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
385 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
386 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
387 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
388 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
389 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
390 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
391 for (index_t i=0; i < numComp; ++i) {
392 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
393 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
394 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
395 } /* end of component loop i */
396 } /* end of k0 loop */
397 } /* end of k1 loop */
398 } /* end of k2 loop */
399 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
400 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
401 /*** GENERATOR SNIP_GRAD_FACES TOP */
402 #pragma omp parallel
403 {
404 if (m_faceOffset[0] > -1) {
405 #pragma omp for nowait
406 for (index_t k2=0; k2 < m_NE2; ++k2) {
407 for (index_t k1=0; k1 < m_NE1; ++k1) {
408 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
409 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
410 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
411 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
412 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
413 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
414 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
415 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
416 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
417 for (index_t i=0; i < numComp; ++i) {
418 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
419 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
420 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
421 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
422 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
423 o[INDEX3(i,1,0,numComp,3)] = V0;
424 o[INDEX3(i,2,0,numComp,3)] = V2;
425 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
426 o[INDEX3(i,1,1,numComp,3)] = V0;
427 o[INDEX3(i,2,1,numComp,3)] = V3;
428 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
429 o[INDEX3(i,1,2,numComp,3)] = V1;
430 o[INDEX3(i,2,2,numComp,3)] = V2;
431 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
432 o[INDEX3(i,1,3,numComp,3)] = V1;
433 o[INDEX3(i,2,3,numComp,3)] = V3;
434 } /* end of component loop i */
435 } /* end of k1 loop */
436 } /* end of k2 loop */
437 } /* end of face 0 */
438 if (m_faceOffset[1] > -1) {
439 #pragma omp for nowait
440 for (index_t k2=0; k2 < m_NE2; ++k2) {
441 for (index_t k1=0; k1 < m_NE1; ++k1) {
442 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
443 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
444 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
445 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
446 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
447 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
448 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
449 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
450 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
451 for (index_t i=0; i < numComp; ++i) {
452 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
453 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
454 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
455 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
456 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
457 o[INDEX3(i,1,0,numComp,3)] = V0;
458 o[INDEX3(i,2,0,numComp,3)] = V2;
459 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
460 o[INDEX3(i,1,1,numComp,3)] = V0;
461 o[INDEX3(i,2,1,numComp,3)] = V3;
462 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
463 o[INDEX3(i,1,2,numComp,3)] = V1;
464 o[INDEX3(i,2,2,numComp,3)] = V2;
465 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
466 o[INDEX3(i,1,3,numComp,3)] = V1;
467 o[INDEX3(i,2,3,numComp,3)] = V3;
468 } /* end of component loop i */
469 } /* end of k1 loop */
470 } /* end of k2 loop */
471 } /* end of face 1 */
472 if (m_faceOffset[2] > -1) {
473 #pragma omp for nowait
474 for (index_t k2=0; k2 < m_NE2; ++k2) {
475 for (index_t k0=0; k0 < m_NE0; ++k0) {
476 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
477 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
478 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
479 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
480 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
481 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
482 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
483 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
484 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
485 for (index_t i=0; i < numComp; ++i) {
486 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
487 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
488 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
489 o[INDEX3(i,0,0,numComp,3)] = V0;
490 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
491 o[INDEX3(i,2,0,numComp,3)] = V1;
492 o[INDEX3(i,0,1,numComp,3)] = V0;
493 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
494 o[INDEX3(i,2,1,numComp,3)] = V2;
495 o[INDEX3(i,0,2,numComp,3)] = V0;
496 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
497 o[INDEX3(i,2,2,numComp,3)] = V1;
498 o[INDEX3(i,0,3,numComp,3)] = V0;
499 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
500 o[INDEX3(i,2,3,numComp,3)] = V2;
501 } /* end of component loop i */
502 } /* end of k0 loop */
503 } /* end of k2 loop */
504 } /* end of face 2 */
505 if (m_faceOffset[3] > -1) {
506 #pragma omp for nowait
507 for (index_t k2=0; k2 < m_NE2; ++k2) {
508 for (index_t k0=0; k0 < m_NE0; ++k0) {
509 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
510 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
511 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
512 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
513 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
514 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
515 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
516 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
517 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
518 for (index_t i=0; i < numComp; ++i) {
519 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
520 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
521 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
522 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
523 o[INDEX3(i,0,0,numComp,3)] = V0;
524 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
525 o[INDEX3(i,2,0,numComp,3)] = V2;
526 o[INDEX3(i,0,1,numComp,3)] = V0;
527 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
528 o[INDEX3(i,2,1,numComp,3)] = V3;
529 o[INDEX3(i,0,2,numComp,3)] = V1;
530 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
531 o[INDEX3(i,2,2,numComp,3)] = V2;
532 o[INDEX3(i,0,3,numComp,3)] = V1;
533 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
534 o[INDEX3(i,2,3,numComp,3)] = V3;
535 } /* end of component loop i */
536 } /* end of k0 loop */
537 } /* end of k2 loop */
538 } /* end of face 3 */
539 if (m_faceOffset[4] > -1) {
540 #pragma omp for nowait
541 for (index_t k1=0; k1 < m_NE1; ++k1) {
542 for (index_t k0=0; k0 < m_NE0; ++k0) {
543 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
544 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
545 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
546 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
547 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
548 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
549 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
550 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
551 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
552 for (index_t i=0; i < numComp; ++i) {
553 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
554 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
555 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
556 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
557 o[INDEX3(i,0,0,numComp,3)] = V0;
558 o[INDEX3(i,1,0,numComp,3)] = V2;
559 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
560 o[INDEX3(i,0,1,numComp,3)] = V0;
561 o[INDEX3(i,1,1,numComp,3)] = V3;
562 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
563 o[INDEX3(i,0,2,numComp,3)] = V1;
564 o[INDEX3(i,1,2,numComp,3)] = V2;
565 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
566 o[INDEX3(i,0,3,numComp,3)] = V1;
567 o[INDEX3(i,1,3,numComp,3)] = V3;
568 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
569 } /* end of component loop i */
570 } /* end of k0 loop */
571 } /* end of k1 loop */
572 } /* end of face 4 */
573 if (m_faceOffset[5] > -1) {
574 #pragma omp for nowait
575 for (index_t k1=0; k1 < m_NE1; ++k1) {
576 for (index_t k0=0; k0 < m_NE0; ++k0) {
577 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
578 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
579 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
580 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
581 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
582 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
583 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
584 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
585 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
586 for (index_t i=0; i < numComp; ++i) {
587 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
588 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
589 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
590 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
591 o[INDEX3(i,0,0,numComp,3)] = V0;
592 o[INDEX3(i,1,0,numComp,3)] = V2;
593 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
594 o[INDEX3(i,0,1,numComp,3)] = V0;
595 o[INDEX3(i,1,1,numComp,3)] = V3;
596 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
597 o[INDEX3(i,0,2,numComp,3)] = V1;
598 o[INDEX3(i,1,2,numComp,3)] = V2;
599 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
600 o[INDEX3(i,0,3,numComp,3)] = V1;
601 o[INDEX3(i,1,3,numComp,3)] = V3;
602 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
603 } /* end of component loop i */
604 } /* end of k0 loop */
605 } /* end of k1 loop */
606 } /* end of face 5 */
607 } // end of parallel section
608 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
609 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
610 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
611 #pragma omp parallel
612 {
613 if (m_faceOffset[0] > -1) {
614 #pragma omp for nowait
615 for (index_t k2=0; k2 < m_NE2; ++k2) {
616 for (index_t k1=0; k1 < m_NE1; ++k1) {
617 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
618 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
619 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
620 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
621 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
622 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
623 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
624 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
625 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
626 for (index_t i=0; i < numComp; ++i) {
627 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
628 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
629 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
630 } /* end of component loop i */
631 } /* end of k1 loop */
632 } /* end of k2 loop */
633 } /* end of face 0 */
634 if (m_faceOffset[1] > -1) {
635 #pragma omp for nowait
636 for (index_t k2=0; k2 < m_NE2; ++k2) {
637 for (index_t k1=0; k1 < m_NE1; ++k1) {
638 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
639 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
640 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
641 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
642 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
643 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
644 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
645 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
646 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
647 for (index_t i=0; i < numComp; ++i) {
648 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
649 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
650 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
651 } /* end of component loop i */
652 } /* end of k1 loop */
653 } /* end of k2 loop */
654 } /* end of face 1 */
655 if (m_faceOffset[2] > -1) {
656 #pragma omp for nowait
657 for (index_t k2=0; k2 < m_NE2; ++k2) {
658 for (index_t k0=0; k0 < m_NE0; ++k0) {
659 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
660 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
661 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
662 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
663 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
664 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
665 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
666 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
667 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
668 for (index_t i=0; i < numComp; ++i) {
669 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
670 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
671 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
672 } /* end of component loop i */
673 } /* end of k0 loop */
674 } /* end of k2 loop */
675 } /* end of face 2 */
676 if (m_faceOffset[3] > -1) {
677 #pragma omp for nowait
678 for (index_t k2=0; k2 < m_NE2; ++k2) {
679 for (index_t k0=0; k0 < m_NE0; ++k0) {
680 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
681 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
682 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
683 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
684 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
685 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
686 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
687 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
688 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
689 for (index_t i=0; i < numComp; ++i) {
690 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
691 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
692 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
693 } /* end of component loop i */
694 } /* end of k0 loop */
695 } /* end of k2 loop */
696 } /* end of face 3 */
697 if (m_faceOffset[4] > -1) {
698 #pragma omp for nowait
699 for (index_t k1=0; k1 < m_NE1; ++k1) {
700 for (index_t k0=0; k0 < m_NE0; ++k0) {
701 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
702 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
703 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
704 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
705 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
706 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
707 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
708 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
709 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
710 for (index_t i=0; i < numComp; ++i) {
711 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
712 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
713 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
714 } /* end of component loop i */
715 } /* end of k0 loop */
716 } /* end of k1 loop */
717 } /* end of face 4 */
718 if (m_faceOffset[5] > -1) {
719 #pragma omp for nowait
720 for (index_t k1=0; k1 < m_NE1; ++k1) {
721 for (index_t k0=0; k0 < m_NE0; ++k0) {
722 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
723 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
724 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
725 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
726 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
727 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
728 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
729 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
730 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
731 for (index_t i=0; i < numComp; ++i) {
732 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
733 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
734 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
735 } /* end of component loop i */
736 } /* end of k0 loop */
737 } /* end of k1 loop */
738 } /* end of face 5 */
739 } // end of parallel section
740 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
741 } else {
742 stringstream msg;
743 msg << "setToGradient() not implemented for "
744 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
745 throw RipleyException(msg.str());
746 }
747 }
748
749 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
750 {
751 escript::Data& in = *const_cast<escript::Data*>(&arg);
752 const dim_t numComp = in.getDataPointSize();
753 const double h0 = m_l0/m_gNE0;
754 const double h1 = m_l1/m_gNE1;
755 const double h2 = m_l2/m_gNE2;
756 if (arg.getFunctionSpace().getTypeCode() == Elements) {
757 const double w_0 = h0*h1*h2/8.;
758 #pragma omp parallel
759 {
760 vector<double> int_local(numComp, 0);
761 #pragma omp for nowait
762 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
763 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
764 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
765 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
766 for (index_t i=0; i < numComp; ++i) {
767 const register double f_0 = f[INDEX2(i,0,numComp)];
768 const register double f_1 = f[INDEX2(i,1,numComp)];
769 const register double f_2 = f[INDEX2(i,2,numComp)];
770 const register double f_3 = f[INDEX2(i,3,numComp)];
771 const register double f_4 = f[INDEX2(i,4,numComp)];
772 const register double f_5 = f[INDEX2(i,5,numComp)];
773 const register double f_6 = f[INDEX2(i,6,numComp)];
774 const register double f_7 = f[INDEX2(i,7,numComp)];
775 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
776 } /* end of component loop i */
777 } /* end of k0 loop */
778 } /* end of k1 loop */
779 } /* end of k2 loop */
780
781 #pragma omp critical
782 for (index_t i=0; i<numComp; i++)
783 integrals[i]+=int_local[i];
784 } // end of parallel section
785 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
786 const double w_0 = h0*h1*h2;
787 #pragma omp parallel
788 {
789 vector<double> int_local(numComp, 0);
790 #pragma omp for nowait
791 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
792 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
793 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
794 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
795 for (index_t i=0; i < numComp; ++i) {
796 int_local[i]+=f[i]*w_0;
797 } /* end of component loop i */
798 } /* end of k0 loop */
799 } /* end of k1 loop */
800 } /* end of k2 loop */
801
802 #pragma omp critical
803 for (index_t i=0; i<numComp; i++)
804 integrals[i]+=int_local[i];
805 } // end of parallel section
806 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
807 const double w_0 = h1*h2/4.;
808 const double w_1 = h0*h2/4.;
809 const double w_2 = h0*h1/4.;
810 #pragma omp parallel
811 {
812 vector<double> int_local(numComp, 0);
813 if (m_faceOffset[0] > -1) {
814 #pragma omp for nowait
815 for (index_t k2=0; k2 < m_NE2; ++k2) {
816 for (index_t k1=0; k1 < m_NE1; ++k1) {
817 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
818 for (index_t i=0; i < numComp; ++i) {
819 const register double f_0 = f[INDEX2(i,0,numComp)];
820 const register double f_1 = f[INDEX2(i,1,numComp)];
821 const register double f_2 = f[INDEX2(i,2,numComp)];
822 const register double f_3 = f[INDEX2(i,3,numComp)];
823 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
824 } /* end of component loop i */
825 } /* end of k1 loop */
826 } /* end of k2 loop */
827 }
828
829 if (m_faceOffset[1] > -1) {
830 #pragma omp for nowait
831 for (index_t k2=0; k2 < m_NE2; ++k2) {
832 for (index_t k1=0; k1 < m_NE1; ++k1) {
833 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
834 for (index_t i=0; i < numComp; ++i) {
835 const register double f_0 = f[INDEX2(i,0,numComp)];
836 const register double f_1 = f[INDEX2(i,1,numComp)];
837 const register double f_2 = f[INDEX2(i,2,numComp)];
838 const register double f_3 = f[INDEX2(i,3,numComp)];
839 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
840 } /* end of component loop i */
841 } /* end of k1 loop */
842 } /* end of k2 loop */
843 }
844
845 if (m_faceOffset[2] > -1) {
846 #pragma omp for nowait
847 for (index_t k2=0; k2 < m_NE2; ++k2) {
848 for (index_t k0=0; k0 < m_NE0; ++k0) {
849 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
850 for (index_t i=0; i < numComp; ++i) {
851 const register double f_0 = f[INDEX2(i,0,numComp)];
852 const register double f_1 = f[INDEX2(i,1,numComp)];
853 const register double f_2 = f[INDEX2(i,2,numComp)];
854 const register double f_3 = f[INDEX2(i,3,numComp)];
855 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
856 } /* end of component loop i */
857 } /* end of k1 loop */
858 } /* end of k2 loop */
859 }
860
861 if (m_faceOffset[3] > -1) {
862 #pragma omp for nowait
863 for (index_t k2=0; k2 < m_NE2; ++k2) {
864 for (index_t k0=0; k0 < m_NE0; ++k0) {
865 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
866 for (index_t i=0; i < numComp; ++i) {
867 const register double f_0 = f[INDEX2(i,0,numComp)];
868 const register double f_1 = f[INDEX2(i,1,numComp)];
869 const register double f_2 = f[INDEX2(i,2,numComp)];
870 const register double f_3 = f[INDEX2(i,3,numComp)];
871 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
872 } /* end of component loop i */
873 } /* end of k1 loop */
874 } /* end of k2 loop */
875 }
876
877 if (m_faceOffset[4] > -1) {
878 #pragma omp for nowait
879 for (index_t k1=0; k1 < m_NE1; ++k1) {
880 for (index_t k0=0; k0 < m_NE0; ++k0) {
881 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
882 for (index_t i=0; i < numComp; ++i) {
883 const register double f_0 = f[INDEX2(i,0,numComp)];
884 const register double f_1 = f[INDEX2(i,1,numComp)];
885 const register double f_2 = f[INDEX2(i,2,numComp)];
886 const register double f_3 = f[INDEX2(i,3,numComp)];
887 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
888 } /* end of component loop i */
889 } /* end of k1 loop */
890 } /* end of k2 loop */
891 }
892
893 if (m_faceOffset[5] > -1) {
894 #pragma omp for nowait
895 for (index_t k1=0; k1 < m_NE1; ++k1) {
896 for (index_t k0=0; k0 < m_NE0; ++k0) {
897 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
898 for (index_t i=0; i < numComp; ++i) {
899 const register double f_0 = f[INDEX2(i,0,numComp)];
900 const register double f_1 = f[INDEX2(i,1,numComp)];
901 const register double f_2 = f[INDEX2(i,2,numComp)];
902 const register double f_3 = f[INDEX2(i,3,numComp)];
903 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
904 } /* end of component loop i */
905 } /* end of k1 loop */
906 } /* end of k2 loop */
907 }
908
909 #pragma omp critical
910 for (index_t i=0; i<numComp; i++)
911 integrals[i]+=int_local[i];
912 } // end of parallel section
913
914 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
915 const double w_0 = h1*h2;
916 const double w_1 = h0*h2;
917 const double w_2 = h0*h1;
918 #pragma omp parallel
919 {
920 vector<double> int_local(numComp, 0);
921 if (m_faceOffset[0] > -1) {
922 #pragma omp for nowait
923 for (index_t k2=0; k2 < m_NE2; ++k2) {
924 for (index_t k1=0; k1 < m_NE1; ++k1) {
925 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
926 for (index_t i=0; i < numComp; ++i) {
927 int_local[i]+=f[i]*w_0;
928 } /* end of component loop i */
929 } /* end of k1 loop */
930 } /* end of k2 loop */
931 }
932
933 if (m_faceOffset[1] > -1) {
934 #pragma omp for nowait
935 for (index_t k2=0; k2 < m_NE2; ++k2) {
936 for (index_t k1=0; k1 < m_NE1; ++k1) {
937 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
938 for (index_t i=0; i < numComp; ++i) {
939 int_local[i]+=f[i]*w_0;
940 } /* end of component loop i */
941 } /* end of k1 loop */
942 } /* end of k2 loop */
943 }
944
945 if (m_faceOffset[2] > -1) {
946 #pragma omp for nowait
947 for (index_t k2=0; k2 < m_NE2; ++k2) {
948 for (index_t k0=0; k0 < m_NE0; ++k0) {
949 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
950 for (index_t i=0; i < numComp; ++i) {
951 int_local[i]+=f[i]*w_1;
952 } /* end of component loop i */
953 } /* end of k1 loop */
954 } /* end of k2 loop */
955 }
956
957 if (m_faceOffset[3] > -1) {
958 #pragma omp for nowait
959 for (index_t k2=0; k2 < m_NE2; ++k2) {
960 for (index_t k0=0; k0 < m_NE0; ++k0) {
961 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
962 for (index_t i=0; i < numComp; ++i) {
963 int_local[i]+=f[i]*w_1;
964 } /* end of component loop i */
965 } /* end of k1 loop */
966 } /* end of k2 loop */
967 }
968
969 if (m_faceOffset[4] > -1) {
970 #pragma omp for nowait
971 for (index_t k1=0; k1 < m_NE1; ++k1) {
972 for (index_t k0=0; k0 < m_NE0; ++k0) {
973 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
974 for (index_t i=0; i < numComp; ++i) {
975 int_local[i]+=f[i]*w_2;
976 } /* end of component loop i */
977 } /* end of k1 loop */
978 } /* end of k2 loop */
979 }
980
981 if (m_faceOffset[5] > -1) {
982 #pragma omp for nowait
983 for (index_t k1=0; k1 < m_NE1; ++k1) {
984 for (index_t k0=0; k0 < m_NE0; ++k0) {
985 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
986 for (index_t i=0; i < numComp; ++i) {
987 int_local[i]+=f[i]*w_2;
988 } /* end of component loop i */
989 } /* end of k1 loop */
990 } /* end of k2 loop */
991 }
992
993 #pragma omp critical
994 for (index_t i=0; i<numComp; i++)
995 integrals[i]+=int_local[i];
996 } // end of parallel section
997
998 } else {
999 stringstream msg;
1000 msg << "setToIntegrals() not implemented for "
1001 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
1002 throw RipleyException(msg.str());
1003 }
1004 }
1005
1006 void Brick::setToNormal(escript::Data& out) const
1007 {
1008 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1009 #pragma omp parallel
1010 {
1011 if (m_faceOffset[0] > -1) {
1012 #pragma omp for nowait
1013 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1014 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1015 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1016 // set vector at four quadrature points
1017 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1018 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1019 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1020 *o++ = -1.; *o++ = 0.; *o = 0.;
1021 }
1022 }
1023 }
1024
1025 if (m_faceOffset[1] > -1) {
1026 #pragma omp for nowait
1027 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1028 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1029 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1030 // set vector at four quadrature points
1031 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1032 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1033 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1034 *o++ = 1.; *o++ = 0.; *o = 0.;
1035 }
1036 }
1037 }
1038
1039 if (m_faceOffset[2] > -1) {
1040 #pragma omp for nowait
1041 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1042 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1043 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1044 // set vector at four quadrature points
1045 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1046 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1047 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1048 *o++ = 0.; *o++ = -1.; *o = 0.;
1049 }
1050 }
1051 }
1052
1053 if (m_faceOffset[3] > -1) {
1054 #pragma omp for nowait
1055 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1056 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1057 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1058 // set vector at four quadrature points
1059 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1060 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1061 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1062 *o++ = 0.; *o++ = 1.; *o = 0.;
1063 }
1064 }
1065 }
1066
1067 if (m_faceOffset[4] > -1) {
1068 #pragma omp for nowait
1069 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1070 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1071 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1072 // set vector at four quadrature points
1073 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1074 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1075 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1076 *o++ = 0.; *o++ = 0.; *o = -1.;
1077 }
1078 }
1079 }
1080
1081 if (m_faceOffset[5] > -1) {
1082 #pragma omp for nowait
1083 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1084 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1085 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1086 // set vector at four quadrature points
1087 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1088 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1089 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1090 *o++ = 0.; *o++ = 0.; *o = 1.;
1091 }
1092 }
1093 }
1094 } // end of parallel section
1095 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1096 #pragma omp parallel
1097 {
1098 if (m_faceOffset[0] > -1) {
1099 #pragma omp for nowait
1100 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1101 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1102 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1103 *o++ = -1.;
1104 *o++ = 0.;
1105 *o = 0.;
1106 }
1107 }
1108 }
1109
1110 if (m_faceOffset[1] > -1) {
1111 #pragma omp for nowait
1112 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1113 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1114 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1115 *o++ = 1.;
1116 *o++ = 0.;
1117 *o = 0.;
1118 }
1119 }
1120 }
1121
1122 if (m_faceOffset[2] > -1) {
1123 #pragma omp for nowait
1124 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1125 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1126 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1127 *o++ = 0.;
1128 *o++ = -1.;
1129 *o = 0.;
1130 }
1131 }
1132 }
1133
1134 if (m_faceOffset[3] > -1) {
1135 #pragma omp for nowait
1136 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1137 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1138 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1139 *o++ = 0.;
1140 *o++ = 1.;
1141 *o = 0.;
1142 }
1143 }
1144 }
1145
1146 if (m_faceOffset[4] > -1) {
1147 #pragma omp for nowait
1148 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1149 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1150 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1151 *o++ = 0.;
1152 *o++ = 0.;
1153 *o = -1.;
1154 }
1155 }
1156 }
1157
1158 if (m_faceOffset[5] > -1) {
1159 #pragma omp for nowait
1160 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1161 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1162 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1163 *o++ = 0.;
1164 *o++ = 0.;
1165 *o = 1.;
1166 }
1167 }
1168 }
1169 } // end of parallel section
1170
1171 } else {
1172 stringstream msg;
1173 msg << "setToNormal() not implemented for "
1174 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1175 throw RipleyException(msg.str());
1176 }
1177 }
1178
1179 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1180 bool reducedColOrder) const
1181 {
1182 if (reducedRowOrder || reducedColOrder)
1183 throw RipleyException("getPattern() not implemented for reduced order");
1184
1185 throw RipleyException("getPattern() not implemented");
1186 }
1187
1188 void Brick::Print_Mesh_Info(const bool full) const
1189 {
1190 RipleyDomain::Print_Mesh_Info(full);
1191 if (full) {
1192 cout << " Id Coordinates" << endl;
1193 cout.precision(15);
1194 cout.setf(ios::scientific, ios::floatfield);
1195 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1196 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1197 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1198 for (index_t i=0; i < getNumNodes(); i++) {
1199 cout << " " << setw(5) << m_nodeId[i]
1200 << " " << xdx.first+(i%m_N0)*xdx.second
1201 << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1202 << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1203 }
1204 }
1205 }
1206
1207 IndexVector Brick::getNumNodesPerDim() const
1208 {
1209 IndexVector ret;
1210 ret.push_back(m_N0);
1211 ret.push_back(m_N1);
1212 ret.push_back(m_N2);
1213 return ret;
1214 }
1215
1216 IndexVector Brick::getNumElementsPerDim() const
1217 {
1218 IndexVector ret;
1219 ret.push_back(m_NE0);
1220 ret.push_back(m_NE1);
1221 ret.push_back(m_NE2);
1222 return ret;
1223 }
1224
1225 IndexVector Brick::getNumFacesPerBoundary() const
1226 {
1227 IndexVector ret(6, 0);
1228 //left
1229 if (m_offset0==0)
1230 ret[0]=m_NE1*m_NE2;
1231 //right
1232 if (m_mpiInfo->rank%m_NX==m_NX-1)
1233 ret[1]=m_NE1*m_NE2;
1234 //bottom
1235 if (m_offset1==0)
1236 ret[2]=m_NE0*m_NE2;
1237 //top
1238 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1239 ret[3]=m_NE0*m_NE2;
1240 //front
1241 if (m_offset2==0)
1242 ret[4]=m_NE0*m_NE1;
1243 //back
1244 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1245 ret[5]=m_NE0*m_NE1;
1246 return ret;
1247 }
1248
1249 pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1250 {
1251 if (dim==0)
1252 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1253 else if (dim==1)
1254 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1255 else if (dim==2)
1256 return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1257
1258 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1259 }
1260
1261 //protected
1262 dim_t Brick::getNumDOF() const
1263 {
1264 return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY*(m_gNE2+1)/m_NZ;
1265 }
1266
1267 //protected
1268 dim_t Brick::getNumFaceElements() const
1269 {
1270 const IndexVector faces = getNumFacesPerBoundary();
1271 dim_t n=0;
1272 for (size_t i=0; i<faces.size(); i++)
1273 n+=faces[i];
1274 return n;
1275 }
1276
1277 //protected
1278 void Brick::assembleCoordinates(escript::Data& arg) const
1279 {
1280 escriptDataC x = arg.getDataC();
1281 int numDim = m_numDim;
1282 if (!isDataPointShapeEqual(&x, 1, &numDim))
1283 throw RipleyException("setToX: Invalid Data object shape");
1284 if (!numSamplesEqual(&x, 1, getNumNodes()))
1285 throw RipleyException("setToX: Illegal number of samples in Data object");
1286
1287 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1288 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1289 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1290 arg.requireWrite();
1291 #pragma omp parallel for
1292 for (dim_t i2 = 0; i2 < m_N2; i2++) {
1293 for (dim_t i1 = 0; i1 < m_N1; i1++) {
1294 for (dim_t i0 = 0; i0 < m_N0; i0++) {
1295 double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1296 point[0] = xdx.first+i0*xdx.second;
1297 point[1] = ydy.first+i1*ydy.second;
1298 point[2] = zdz.first+i2*zdz.second;
1299 }
1300 }
1301 }
1302 }
1303
1304 //private
1305 void Brick::populateSampleIds()
1306 {
1307 // identifiers are ordered from left to right, bottom to top, front to back
1308 // globally
1309
1310 // build node distribution vector first.
1311 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1312 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1313 const dim_t numDOF=getNumDOF();
1314 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1315 m_nodeDistribution[k]=k*numDOF;
1316 }
1317 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1318 m_nodeId.resize(getNumNodes());
1319 m_dofId.resize(numDOF);
1320 m_elementId.resize(getNumElements());
1321 m_faceId.resize(getNumFaceElements());
1322
1323 #pragma omp parallel
1324 {
1325 #pragma omp for nowait
1326 // nodes
1327 for (dim_t i2=0; i2<m_N2; i2++) {
1328 for (dim_t i1=0; i1<m_N1; i1++) {
1329 for (dim_t i0=0; i0<m_N0; i0++) {
1330 m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] =
1331 (m_offset2+i2)*(m_gNE0+1)*(m_gNE1+1)
1332 +(m_offset1+i1)*(m_gNE0+1)
1333 +m_offset0+i0;
1334 }
1335 }
1336 }
1337
1338 // degrees of freedom
1339 #pragma omp for nowait
1340 for (dim_t k=0; k<numDOF; k++)
1341 m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k;
1342
1343 // elements
1344 #pragma omp for nowait
1345 for (dim_t k=0; k<getNumElements(); k++)
1346 m_elementId[k]=k;
1347
1348 // face elements
1349 #pragma omp for
1350 for (dim_t k=0; k<getNumFaceElements(); k++)
1351 m_faceId[k]=k;
1352 } // end parallel section
1353
1354 m_nodeTags.assign(getNumNodes(), 0);
1355 updateTagsInUse(Nodes);
1356
1357 m_elementTags.assign(getNumElements(), 0);
1358 updateTagsInUse(Elements);
1359
1360 // generate face offset vector and set face tags
1361 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1362 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1363 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1364 m_faceOffset.assign(facesPerEdge.size(), -1);
1365 m_faceTags.clear();
1366 index_t offset=0;
1367 for (size_t i=0; i<facesPerEdge.size(); i++) {
1368 if (facesPerEdge[i]>0) {
1369 m_faceOffset[i]=offset;
1370 offset+=facesPerEdge[i];
1371 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1372 }
1373 }
1374 setTagMap("left", LEFT);
1375 setTagMap("right", RIGHT);
1376 setTagMap("bottom", BOTTOM);
1377 setTagMap("top", TOP);
1378 setTagMap("front", FRONT);
1379 setTagMap("back", BACK);
1380 updateTagsInUse(FaceElements);
1381 }
1382
1383 //protected
1384 void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in,
1385 bool reduced) const
1386 {
1387 const dim_t numComp = in.getDataPointSize();
1388 if (reduced) {
1389 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1390 const double c0 = .125;
1391 #pragma omp parallel for
1392 for (index_t k2=0; k2 < m_NE2; ++k2) {
1393 for (index_t k1=0; k1 < m_NE1; ++k1) {
1394 for (index_t k0=0; k0 < m_NE0; ++k0) {
1395 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1396 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1397 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1398 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1399 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1400 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1401 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1402 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1403 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1404 for (index_t i=0; i < numComp; ++i) {
1405 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i] + f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1406 } /* end of component loop i */
1407 } /* end of k0 loop */
1408 } /* end of k1 loop */
1409 } /* end of k2 loop */
1410 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1411 } else {
1412 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1413 const double c0 = .0094373878376559314545;
1414 const double c1 = .035220810900864519624;
1415 const double c2 = .13144585576580214704;
1416 const double c3 = .49056261216234406855;
1417 #pragma omp parallel for
1418 for (index_t k2=0; k2 < m_NE2; ++k2) {
1419 for (index_t k1=0; k1 < m_NE1; ++k1) {
1420 for (index_t k0=0; k0 < m_NE0; ++k0) {
1421 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1422 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1423 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1424 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1425 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1426 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1427 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1428 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1429 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1430 for (index_t i=0; i < numComp; ++i) {
1431 o[INDEX2(i,numComp,0)] = f_000[i]*c3 + f_111[i]*c0 + c2*(f_001[i] + f_010[i] + f_100[i]) + c1*(f_011[i] + f_101[i] + f_110[i]);
1432 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_100[i]*c3 + c2*(f_000[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_111[i]);
1433 o[INDEX2(i,numComp,2)] = f_010[i]*c3 + f_101[i]*c0 + c2*(f_000[i] + f_011[i] + f_110[i]) + c1*(f_001[i] + f_100[i] + f_111[i]);
1434 o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_110[i]*c3 + c2*(f_010[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_101[i]);
1435 o[INDEX2(i,numComp,4)] = f_001[i]*c3 + f_110[i]*c0 + c2*(f_000[i] + f_011[i] + f_101[i]) + c1*(f_010[i] + f_100[i] + f_111[i]);
1436 o[INDEX2(i,numComp,5)] = f_010[i]*c0 + f_101[i]*c3 + c2*(f_001[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_110[i]);
1437 o[INDEX2(i,numComp,6)] = f_011[i]*c3 + f_100[i]*c0 + c2*(f_001[i] + f_010[i] + f_111[i]) + c1*(f_000[i] + f_101[i] + f_110[i]);
1438 o[INDEX2(i,numComp,7)] = f_000[i]*c0 + f_111[i]*c3 + c2*(f_011[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_100[i]);
1439 } /* end of component loop i */
1440 } /* end of k0 loop */
1441 } /* end of k1 loop */
1442 } /* end of k2 loop */
1443 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1444 }
1445 }
1446
1447 //protected
1448 void Brick::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1449 bool reduced) const
1450 {
1451 const dim_t numComp = in.getDataPointSize();
1452 if (reduced) {
1453 const double c0 = .25;
1454 #pragma omp parallel
1455 {
1456 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1457 if (m_faceOffset[0] > -1) {
1458 #pragma omp for nowait
1459 for (index_t k2=0; k2 < m_NE2; ++k2) {
1460 for (index_t k1=0; k1 < m_NE1; ++k1) {
1461 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1462 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1463 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1464 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1465 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1466 for (index_t i=0; i < numComp; ++i) {
1467 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]);
1468 } /* end of component loop i */
1469 } /* end of k1 loop */
1470 } /* end of k2 loop */
1471 } /* end of face 0 */
1472 if (m_faceOffset[1] > -1) {
1473 #pragma omp for nowait
1474 for (index_t k2=0; k2 < m_NE2; ++k2) {
1475 for (index_t k1=0; k1 < m_NE1; ++k1) {
1476 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1477 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1478 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1479 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
1480 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1481 for (index_t i=0; i < numComp; ++i) {
1482 o[INDEX2(i,numComp,0)] = c0*(f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1483 } /* end of component loop i */
1484 } /* end of k1 loop */
1485 } /* end of k2 loop */
1486 } /* end of face 1 */
1487 if (m_faceOffset[2] > -1) {
1488 #pragma omp for nowait
1489 for (index_t k2=0; k2 < m_NE2; ++k2) {
1490 for (index_t k0=0; k0 < m_NE0; ++k0) {
1491 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
1492 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
1493 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
1494 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
1495 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1496 for (index_t i=0; i < numComp; ++i) {
1497 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_100[i] + f_101[i]);
1498 } /* end of component loop i */
1499 } /* end of k0 loop */
1500 } /* end of k2 loop */
1501 } /* end of face 2 */
1502 if (m_faceOffset[3] > -1) {
1503 #pragma omp for nowait
1504 for (index_t k2=0; k2 < m_NE2; ++k2) {
1505 for (index_t k0=0; k0 < m_NE0; ++k0) {
1506 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
1507 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
1508 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
1509 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
1510 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1511 for (index_t i=0; i < numComp; ++i) {
1512 o[INDEX2(i,numComp,0)] = c0*(f_010[i] + f_011[i] + f_110[i] + f_111[i]);
1513 } /* end of component loop i */
1514 } /* end of k0 loop */
1515 } /* end of k2 loop */
1516 } /* end of face 3 */
1517 if (m_faceOffset[4] > -1) {
1518 #pragma omp for nowait
1519 for (index_t k1=0; k1 < m_NE1; ++k1) {
1520 for (index_t k0=0; k0 < m_NE0; ++k0) {
1521 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
1522 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
1523 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
1524 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
1525 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1526 for (index_t i=0; i < numComp; ++i) {
1527 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_010[i] + f_100[i] + f_110[i]);
1528 } /* end of component loop i */
1529 } /* end of k0 loop */
1530 } /* end of k1 loop */
1531 } /* end of face 4 */
1532 if (m_faceOffset[5] > -1) {
1533 #pragma omp for nowait
1534 for (index_t k1=0; k1 < m_NE1; ++k1) {
1535 for (index_t k0=0; k0 < m_NE0; ++k0) {
1536 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
1537 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
1538 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
1539 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
1540 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1541 for (index_t i=0; i < numComp; ++i) {
1542 o[INDEX2(i,numComp,0)] = c0*(f_001[i] + f_011[i] + f_101[i] + f_111[i]);
1543 } /* end of component loop i */
1544 } /* end of k0 loop */
1545 } /* end of k1 loop */
1546 } /* end of face 5 */
1547 /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1548 } // end of parallel section
1549 } else {
1550 const double c0 = 0.044658198738520451079;
1551 const double c1 = 0.16666666666666666667;
1552 const double c2 = 0.62200846792814621559;
1553 #pragma omp parallel
1554 {
1555 /*** GENERATOR SNIP_INTERPOLATE_FACES TOP */
1556 if (m_faceOffset[0] > -1) {
1557 #pragma omp for nowait
1558 for (index_t k2=0; k2 < m_NE2; ++k2) {
1559 for (index_t k1=0; k1 < m_NE1; ++k1) {
1560 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1561 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1562 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1563 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1564 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1565 for (index_t i=0; i < numComp; ++i) {
1566 o[INDEX2(i,numComp,0)] = f_000[i]*c2 + f_011[i]*c0 + c1*(f_001[i] + f_010[i]);
1567 o[INDEX2(i,numComp,1)] = f_001[i]*c0 + f_010[i]*c2 + c1*(f_000[i] + f_011[i]);
1568 o[INDEX2(i,numComp,2)] = f_001[i]*c2 + f_010[i]*c0 + c1*(f_000[i] + f_011[i]);
1569 o[INDEX2(i,numComp,3)] = f_000[i]*c0 + f_011[i]*c2 + c1*(f_001[i] + f_010[i]);
1570 } /* end of component loop i */
1571 } /* end of k1 loop */
1572 } /* end of k2 loop */
1573 } /* end of face 0 */
1574 if (m_faceOffset[1] > -1) {
1575 #pragma omp for nowait
1576 for (index_t k2=0; k2 < m_NE2; ++k2) {
1577 for (index_t k1=0; k1 < m_NE1; ++k1) {
1578 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1579 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1580 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1581 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
1582 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1583 for (index_t i=0; i < numComp; ++i) {
1584 o[INDEX2(i,numComp,0)] = f_100[i]*c2 + f_111[i]*c0 + c1*(f_101[i] + f_110[i]);
1585 o[INDEX2(i,numComp,1)] = f_101[i]*c0 + f_110[i]*c2 + c1*(f_100[i] + f_111[i]);
1586 o[INDEX2(i,numComp,2)] = f_101[i]*c2 + f_110[i]*c0 + c1*(f_100[i] + f_111[i]);
1587 o[INDEX2(i,numComp,3)] = f_100[i]*c0 + f_111[i]*c2 + c1*(f_101[i] + f_110[i]);
1588 } /* end of component loop i */
1589 } /* end of k1 loop */
1590 } /* end of k2 loop */
1591 } /* end of face 1 */
1592 if (m_faceOffset[2] > -1) {
1593 #pragma omp for nowait
1594 for (index_t k2=0; k2 < m_NE2; ++k2) {
1595 for (index_t k0=0; k0 < m_NE0; ++k0) {
1596 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
1597 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
1598 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
1599 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
1600 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1601 for (index_t i=0; i < numComp; ++i) {
1602 o[INDEX2(i,numComp,0)] = f_000[i]*c2 + f_101[i]*c0 + c1*(f_001[i] + f_100[i]);
1603 o[INDEX2(i,numComp,1)] = f_001[i]*c0 + f_100[i]*c2 + c1*(f_000[i] + f_101[i]);
1604 o[INDEX2(i,numComp,2)] = f_001[i]*c2 + f_100[i]*c0 + c1*(f_000[i] + f_101[i]);
1605 o[INDEX2(i,numComp,3)] = f_000[i]*c0 + f_101[i]*c2 + c1*(f_001[i] + f_100[i]);
1606 } /* end of component loop i */
1607 } /* end of k0 loop */
1608 } /* end of k2 loop */
1609 } /* end of face 2 */
1610 if (m_faceOffset[3] > -1) {
1611 #pragma omp for nowait
1612 for (index_t k2=0; k2 < m_NE2; ++k2) {
1613 for (index_t k0=0; k0 < m_NE0; ++k0) {
1614 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
1615 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
1616 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
1617 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
1618 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1619 for (index_t i=0; i < numComp; ++i) {
1620 o[INDEX2(i,numComp,0)] = f_010[i]*c2 + f_111[i]*c0 + c1*(f_011[i] + f_110[i]);
1621 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_110[i]*c2 + c1*(f_010[i] + f_111[i]);
1622 o[INDEX2(i,numComp,2)] = f_011[i]*c2 + f_110[i]*c0 + c1*(f_010[i] + f_111[i]);
1623 o[INDEX2(i,numComp,3)] = f_010[i]*c0 + f_111[i]*c2 + c1*(f_011[i] + f_110[i]);
1624 } /* end of component loop i */
1625 } /* end of k0 loop */
1626 } /* end of k2 loop */
1627 } /* end of face 3 */
1628 if (m_faceOffset[4] > -1) {
1629 #pragma omp for nowait
1630 for (index_t k1=0; k1 < m_NE1; ++k1) {
1631 for (index_t k0=0; k0 < m_NE0; ++k0) {
1632 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
1633 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
1634 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
1635 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
1636 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1637 for (index_t i=0; i < numComp; ++i) {
1638 o[INDEX2(i,numComp,0)] = f_000[i]*c2 + f_110[i]*c0 + c1*(f_010[i] + f_100[i]);
1639 o[INDEX2(i,numComp,1)] = f_010[i]*c0 + f_100[i]*c2 + c1*(f_000[i] + f_110[i]);
1640 o[INDEX2(i,numComp,2)] = f_010[i]*c2 + f_100[i]*c0 + c1*(f_000[i] + f_110[i]);
1641 o[INDEX2(i,numComp,3)] = f_000[i]*c0 + f_110[i]*c2 + c1*(f_010[i] + f_100[i]);
1642 } /* end of component loop i */
1643 } /* end of k0 loop */
1644 } /* end of k1 loop */
1645 } /* end of face 4 */
1646 if (m_faceOffset[5] > -1) {
1647 #pragma omp for nowait
1648 for (index_t k1=0; k1 < m_NE1; ++k1) {
1649 for (index_t k0=0; k0 < m_NE0; ++k0) {
1650 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
1651 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
1652 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
1653 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
1654 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1655 for (index_t i=0; i < numComp; ++i) {
1656 o[INDEX2(i,numComp,0)] = f_001[i]*c2 + f_111[i]*c0 + c1*(f_011[i] + f_101[i]);
1657 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_101[i]*c2 + c1*(f_001[i] + f_111[i]);
1658 o[INDEX2(i,numComp,2)] = f_011[i]*c2 + f_101[i]*c0 + c1*(f_001[i] + f_111[i]);
1659 o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_111[i]*c2 + c1*(f_011[i] + f_101[i]);
1660 } /* end of component loop i */
1661 } /* end of k0 loop */
1662 } /* end of k1 loop */
1663 } /* end of face 5 */
1664 /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1665 } // end of parallel section
1666 }
1667 }
1668
1669 //protected
1670 void Brick::nodesToDOF(escript::Data& out, escript::Data& in) const
1671 {
1672 const dim_t numComp = in.getDataPointSize();
1673 out.requireWrite();
1674
1675 const index_t left = (m_offset0==0 ? 0 : 1);
1676 const index_t bottom = (m_offset1==0 ? 0 : 1);
1677 const index_t front = (m_offset2==0 ? 0 : 1);
1678 const index_t right = (m_mpiInfo->rank%m_NX==m_NX-1 ? m_N0 : m_N0-1);
1679 const index_t top = (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1 ? m_N1 : m_N1-1);
1680 const index_t back = (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1 ? m_N2 : m_N2-1);
1681 index_t n=0;
1682 for (index_t i=front; i<back; i++) {
1683 for (index_t j=bottom; j<top; j++) {
1684 for (index_t k=left; k<right; k++, n++) {
1685 const double* src=in.getSampleDataRO(k+j*m_N0+i*m_N0*m_N1);
1686 copy(src, src+numComp, out.getSampleDataRW(n));
1687 }
1688 }
1689 }
1690 }
1691
1692
1693 } // end of namespace ripley
1694

  ViewVC Help
Powered by ViewVC 1.1.26