/[escript]/branches/ripleygmg_from_3668/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/ripleygmg_from_3668/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3731 - (show annotations)
Fri Dec 9 01:22:44 2011 UTC (7 years, 4 months ago) by caltinay
File size: 93416 byte(s)
More hand optimization with noticeable runtime reduction for tests.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Brick.h>
15 extern "C" {
16 #include "paso/SystemMatrixPattern.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33 int d1, int d2) :
34 RipleyDomain(3),
35 m_gNE0(n0),
36 m_gNE1(n1),
37 m_gNE2(n2),
38 m_l0(l0),
39 m_l1(l1),
40 m_l2(l2),
41 m_NX(d0),
42 m_NY(d1),
43 m_NZ(d2)
44 {
45 // ensure number of subdivisions is valid and nodes can be distributed
46 // among number of ranks
47 if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48 throw RipleyException("Invalid number of spatial subdivisions");
49
50 if (n0%m_NX > 0 || n1%m_NY > 0 || n2%m_NZ > 0)
51 throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
52
53 // local number of elements
54 m_NE0 = n0/m_NX;
55 m_NE1 = n1/m_NY;
56 m_NE2 = n2/m_NZ;
57 // local number of nodes (not necessarily owned)
58 m_N0 = m_NE0+1;
59 m_N1 = m_NE1+1;
60 m_N2 = m_NE2+1;
61 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
62 m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
63 m_offset1 = m_NE1*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
64 m_offset2 = m_NE2*(m_mpiInfo->rank/(m_NX*m_NY));
65 populateSampleIds();
66 }
67
68
69 Brick::~Brick()
70 {
71 }
72
73 string Brick::getDescription() const
74 {
75 return "ripley::Brick";
76 }
77
78 bool Brick::operator==(const AbstractDomain& other) const
79 {
80 if (dynamic_cast<const Brick*>(&other))
81 return this==&other;
82
83 return false;
84 }
85
86 void Brick::dump(const string& fileName) const
87 {
88 #if USE_SILO
89 string fn(fileName);
90 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
91 fn+=".silo";
92 }
93
94 const int NUM_SILO_FILES = 1;
95 const char* blockDirFmt = "/block%04d";
96 int driver=DB_HDF5;
97 string siloPath;
98 DBfile* dbfile = NULL;
99
100 #ifdef ESYS_MPI
101 PMPIO_baton_t* baton = NULL;
102 #endif
103
104 if (m_mpiInfo->size > 1) {
105 #ifdef ESYS_MPI
106 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
107 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
108 PMPIO_DefaultClose, (void*)&driver);
109 // try the fallback driver in case of error
110 if (!baton && driver != DB_PDB) {
111 driver = DB_PDB;
112 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
113 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
114 PMPIO_DefaultClose, (void*)&driver);
115 }
116 if (baton) {
117 char str[64];
118 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
119 siloPath = str;
120 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
121 }
122 #endif
123 } else {
124 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
125 getDescription().c_str(), driver);
126 // try the fallback driver in case of error
127 if (!dbfile && driver != DB_PDB) {
128 driver = DB_PDB;
129 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
130 getDescription().c_str(), driver);
131 }
132 }
133
134 if (!dbfile)
135 throw RipleyException("dump: Could not create Silo file");
136
137 /*
138 if (driver==DB_HDF5) {
139 // gzip level 1 already provides good compression with minimal
140 // performance penalty. Some tests showed that gzip levels >3 performed
141 // rather badly on escript data both in terms of time and space
142 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
143 }
144 */
145
146 boost::scoped_ptr<double> x(new double[m_N0]);
147 boost::scoped_ptr<double> y(new double[m_N1]);
148 boost::scoped_ptr<double> z(new double[m_N2]);
149 double* coords[3] = { x.get(), y.get(), z.get() };
150 pair<double,double> xdx = getFirstCoordAndSpacing(0);
151 pair<double,double> ydy = getFirstCoordAndSpacing(1);
152 pair<double,double> zdz = getFirstCoordAndSpacing(2);
153 #pragma omp parallel
154 {
155 #pragma omp for
156 for (dim_t i0 = 0; i0 < m_N0; i0++) {
157 coords[0][i0]=xdx.first+i0*xdx.second;
158 }
159 #pragma omp for
160 for (dim_t i1 = 0; i1 < m_N1; i1++) {
161 coords[1][i1]=ydy.first+i1*ydy.second;
162 }
163 #pragma omp for
164 for (dim_t i2 = 0; i2 < m_N2; i2++) {
165 coords[2][i2]=zdz.first+i2*zdz.second;
166 }
167 }
168 IndexVector dims = getNumNodesPerDim();
169 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
170 DB_COLLINEAR, NULL);
171
172 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
173 NULL, 0, DB_INT, DB_NODECENT, NULL);
174
175 // write element ids
176 dims = getNumElementsPerDim();
177 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
178 &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
179
180 // rank 0 writes multimesh and multivar
181 if (m_mpiInfo->rank == 0) {
182 vector<string> tempstrings;
183 vector<char*> names;
184 for (dim_t i=0; i<m_mpiInfo->size; i++) {
185 stringstream path;
186 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
187 tempstrings.push_back(path.str());
188 names.push_back((char*)tempstrings.back().c_str());
189 }
190 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
191 DBSetDir(dbfile, "/");
192 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
193 &types[0], NULL);
194 tempstrings.clear();
195 names.clear();
196 for (dim_t i=0; i<m_mpiInfo->size; i++) {
197 stringstream path;
198 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
199 tempstrings.push_back(path.str());
200 names.push_back((char*)tempstrings.back().c_str());
201 }
202 types.assign(m_mpiInfo->size, DB_QUADVAR);
203 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
204 &types[0], NULL);
205 tempstrings.clear();
206 names.clear();
207 for (dim_t i=0; i<m_mpiInfo->size; i++) {
208 stringstream path;
209 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
210 tempstrings.push_back(path.str());
211 names.push_back((char*)tempstrings.back().c_str());
212 }
213 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
214 &types[0], NULL);
215 }
216
217 if (m_mpiInfo->size > 1) {
218 #ifdef ESYS_MPI
219 PMPIO_HandOffBaton(baton, dbfile);
220 PMPIO_Finish(baton);
221 #endif
222 } else {
223 DBClose(dbfile);
224 }
225
226 #else // USE_SILO
227 throw RipleyException("dump(): no Silo support");
228 #endif
229 }
230
231 const int* Brick::borrowSampleReferenceIDs(int fsType) const
232 {
233 switch (fsType) {
234 case Nodes:
235 return &m_nodeId[0];
236 case Elements:
237 return &m_elementId[0];
238 case FaceElements:
239 return &m_faceId[0];
240 default:
241 break;
242 }
243
244 stringstream msg;
245 msg << "borrowSampleReferenceIDs() not implemented for function space type "
246 << fsType;
247 throw RipleyException(msg.str());
248 }
249
250 bool Brick::ownSample(int fsCode, index_t id) const
251 {
252 #ifdef ESYS_MPI
253 if (fsCode == Nodes) {
254 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
255 const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
256 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
257 } else
258 throw RipleyException("ownSample() only implemented for Nodes");
259 #else
260 return true;
261 #endif
262 }
263
264 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
265 {
266 escript::Data& in = *const_cast<escript::Data*>(&cIn);
267 const dim_t numComp = in.getDataPointSize();
268 const double h0 = m_l0/m_gNE0;
269 const double h1 = m_l1/m_gNE1;
270 const double h2 = m_l1/m_gNE2;
271 const double C0 = .044658198738520451079;
272 const double C1 = .16666666666666666667;
273 const double C2 = .21132486540518711775;
274 const double C3 = .25;
275 const double C4 = .5;
276 const double C5 = .62200846792814621559;
277 const double C6 = .78867513459481288225;
278
279 if (out.getFunctionSpace().getTypeCode() == Elements) {
280 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
281 #pragma omp parallel for
282 for (index_t k2=0; k2 < m_NE2; ++k2) {
283 for (index_t k1=0; k1 < m_NE1; ++k1) {
284 for (index_t k0=0; k0 < m_NE0; ++k0) {
285 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
286 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
287 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
288 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
289 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
290 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
291 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
292 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
293 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
294 for (index_t i=0; i < numComp; ++i) {
295 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
296 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
297 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
298 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
299 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
300 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
301 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
302 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
303 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
304 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
305 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
306 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
307 o[INDEX3(i,0,0,numComp,3)] = V0;
308 o[INDEX3(i,1,0,numComp,3)] = V4;
309 o[INDEX3(i,2,0,numComp,3)] = V8;
310 o[INDEX3(i,0,1,numComp,3)] = V0;
311 o[INDEX3(i,1,1,numComp,3)] = V5;
312 o[INDEX3(i,2,1,numComp,3)] = V9;
313 o[INDEX3(i,0,2,numComp,3)] = V1;
314 o[INDEX3(i,1,2,numComp,3)] = V4;
315 o[INDEX3(i,2,2,numComp,3)] = V10;
316 o[INDEX3(i,0,3,numComp,3)] = V1;
317 o[INDEX3(i,1,3,numComp,3)] = V5;
318 o[INDEX3(i,2,3,numComp,3)] = V11;
319 o[INDEX3(i,0,4,numComp,3)] = V2;
320 o[INDEX3(i,1,4,numComp,3)] = V6;
321 o[INDEX3(i,2,4,numComp,3)] = V8;
322 o[INDEX3(i,0,5,numComp,3)] = V2;
323 o[INDEX3(i,1,5,numComp,3)] = V7;
324 o[INDEX3(i,2,5,numComp,3)] = V9;
325 o[INDEX3(i,0,6,numComp,3)] = V3;
326 o[INDEX3(i,1,6,numComp,3)] = V6;
327 o[INDEX3(i,2,6,numComp,3)] = V10;
328 o[INDEX3(i,0,7,numComp,3)] = V3;
329 o[INDEX3(i,1,7,numComp,3)] = V7;
330 o[INDEX3(i,2,7,numComp,3)] = V11;
331 } /* end of component loop i */
332 } /* end of k0 loop */
333 } /* end of k1 loop */
334 } /* end of k2 loop */
335 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
336 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
337 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
338 #pragma omp parallel for
339 for (index_t k2=0; k2 < m_NE2; ++k2) {
340 for (index_t k1=0; k1 < m_NE1; ++k1) {
341 for (index_t k0=0; k0 < m_NE0; ++k0) {
342 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
343 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
344 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
345 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
346 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
347 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
348 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
349 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
350 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
351 for (index_t i=0; i < numComp; ++i) {
352 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
353 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
354 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
355 } /* end of component loop i */
356 } /* end of k0 loop */
357 } /* end of k1 loop */
358 } /* end of k2 loop */
359 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
360 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
361 /*** GENERATOR SNIP_GRAD_FACES TOP */
362 #pragma omp parallel
363 {
364 if (m_faceOffset[0] > -1) {
365 #pragma omp for nowait
366 for (index_t k2=0; k2 < m_NE2; ++k2) {
367 for (index_t k1=0; k1 < m_NE1; ++k1) {
368 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
369 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
370 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
371 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
372 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
373 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
374 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
375 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
376 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
377 for (index_t i=0; i < numComp; ++i) {
378 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
379 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
380 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
381 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
382 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
383 o[INDEX3(i,1,0,numComp,3)] = V0;
384 o[INDEX3(i,2,0,numComp,3)] = V2;
385 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
386 o[INDEX3(i,1,1,numComp,3)] = V0;
387 o[INDEX3(i,2,1,numComp,3)] = V3;
388 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
389 o[INDEX3(i,1,2,numComp,3)] = V1;
390 o[INDEX3(i,2,2,numComp,3)] = V2;
391 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
392 o[INDEX3(i,1,3,numComp,3)] = V1;
393 o[INDEX3(i,2,3,numComp,3)] = V3;
394 } /* end of component loop i */
395 } /* end of k1 loop */
396 } /* end of k2 loop */
397 } /* end of face 0 */
398 if (m_faceOffset[1] > -1) {
399 #pragma omp for nowait
400 for (index_t k2=0; k2 < m_NE2; ++k2) {
401 for (index_t k1=0; k1 < m_NE1; ++k1) {
402 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
403 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
404 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
405 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
406 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
407 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
408 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
409 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
410 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
411 for (index_t i=0; i < numComp; ++i) {
412 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
413 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
414 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
415 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
416 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
417 o[INDEX3(i,1,0,numComp,3)] = V0;
418 o[INDEX3(i,2,0,numComp,3)] = V2;
419 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
420 o[INDEX3(i,1,1,numComp,3)] = V0;
421 o[INDEX3(i,2,1,numComp,3)] = V3;
422 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
423 o[INDEX3(i,1,2,numComp,3)] = V1;
424 o[INDEX3(i,2,2,numComp,3)] = V2;
425 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
426 o[INDEX3(i,1,3,numComp,3)] = V1;
427 o[INDEX3(i,2,3,numComp,3)] = V3;
428 } /* end of component loop i */
429 } /* end of k1 loop */
430 } /* end of k2 loop */
431 } /* end of face 1 */
432 if (m_faceOffset[2] > -1) {
433 #pragma omp for nowait
434 for (index_t k2=0; k2 < m_NE2; ++k2) {
435 for (index_t k0=0; k0 < m_NE0; ++k0) {
436 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
437 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
438 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
439 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
440 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
441 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
442 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
443 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
444 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
445 for (index_t i=0; i < numComp; ++i) {
446 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
447 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
448 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
449 o[INDEX3(i,0,0,numComp,3)] = V0;
450 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
451 o[INDEX3(i,2,0,numComp,3)] = V1;
452 o[INDEX3(i,0,1,numComp,3)] = V0;
453 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
454 o[INDEX3(i,2,1,numComp,3)] = V2;
455 o[INDEX3(i,0,2,numComp,3)] = V0;
456 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
457 o[INDEX3(i,2,2,numComp,3)] = V1;
458 o[INDEX3(i,0,3,numComp,3)] = V0;
459 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
460 o[INDEX3(i,2,3,numComp,3)] = V2;
461 } /* end of component loop i */
462 } /* end of k0 loop */
463 } /* end of k2 loop */
464 } /* end of face 2 */
465 if (m_faceOffset[3] > -1) {
466 #pragma omp for nowait
467 for (index_t k2=0; k2 < m_NE2; ++k2) {
468 for (index_t k0=0; k0 < m_NE0; ++k0) {
469 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
470 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
471 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
472 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
473 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
474 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
475 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
476 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
477 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
478 for (index_t i=0; i < numComp; ++i) {
479 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
480 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
481 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
482 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
483 o[INDEX3(i,0,0,numComp,3)] = V0;
484 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
485 o[INDEX3(i,2,0,numComp,3)] = V2;
486 o[INDEX3(i,0,1,numComp,3)] = V0;
487 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
488 o[INDEX3(i,2,1,numComp,3)] = V3;
489 o[INDEX3(i,0,2,numComp,3)] = V1;
490 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
491 o[INDEX3(i,2,2,numComp,3)] = V2;
492 o[INDEX3(i,0,3,numComp,3)] = V1;
493 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
494 o[INDEX3(i,2,3,numComp,3)] = V3;
495 } /* end of component loop i */
496 } /* end of k0 loop */
497 } /* end of k2 loop */
498 } /* end of face 3 */
499 if (m_faceOffset[4] > -1) {
500 #pragma omp for nowait
501 for (index_t k1=0; k1 < m_NE1; ++k1) {
502 for (index_t k0=0; k0 < m_NE0; ++k0) {
503 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
504 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
505 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
506 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
507 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
508 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
509 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
510 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
511 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
512 for (index_t i=0; i < numComp; ++i) {
513 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
514 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
515 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
516 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
517 o[INDEX3(i,0,0,numComp,3)] = V0;
518 o[INDEX3(i,1,0,numComp,3)] = V2;
519 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
520 o[INDEX3(i,0,1,numComp,3)] = V0;
521 o[INDEX3(i,1,1,numComp,3)] = V3;
522 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
523 o[INDEX3(i,0,2,numComp,3)] = V1;
524 o[INDEX3(i,1,2,numComp,3)] = V2;
525 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
526 o[INDEX3(i,0,3,numComp,3)] = V1;
527 o[INDEX3(i,1,3,numComp,3)] = V3;
528 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
529 } /* end of component loop i */
530 } /* end of k0 loop */
531 } /* end of k1 loop */
532 } /* end of face 4 */
533 if (m_faceOffset[5] > -1) {
534 #pragma omp for nowait
535 for (index_t k1=0; k1 < m_NE1; ++k1) {
536 for (index_t k0=0; k0 < m_NE0; ++k0) {
537 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
538 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
539 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
540 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
541 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
542 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
543 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
544 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
545 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
546 for (index_t i=0; i < numComp; ++i) {
547 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
548 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
549 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
550 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
551 o[INDEX3(i,0,0,numComp,3)] = V0;
552 o[INDEX3(i,1,0,numComp,3)] = V2;
553 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
554 o[INDEX3(i,0,1,numComp,3)] = V0;
555 o[INDEX3(i,1,1,numComp,3)] = V3;
556 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
557 o[INDEX3(i,0,2,numComp,3)] = V1;
558 o[INDEX3(i,1,2,numComp,3)] = V2;
559 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
560 o[INDEX3(i,0,3,numComp,3)] = V1;
561 o[INDEX3(i,1,3,numComp,3)] = V3;
562 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
563 } /* end of component loop i */
564 } /* end of k0 loop */
565 } /* end of k1 loop */
566 } /* end of face 5 */
567 } // end of parallel section
568 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
569 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
570 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
571 #pragma omp parallel
572 {
573 if (m_faceOffset[0] > -1) {
574 #pragma omp for nowait
575 for (index_t k2=0; k2 < m_NE2; ++k2) {
576 for (index_t k1=0; k1 < m_NE1; ++k1) {
577 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
578 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
579 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
580 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
581 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
582 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
583 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
584 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
585 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
586 for (index_t i=0; i < numComp; ++i) {
587 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
588 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
589 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
590 } /* end of component loop i */
591 } /* end of k1 loop */
592 } /* end of k2 loop */
593 } /* end of face 0 */
594 if (m_faceOffset[1] > -1) {
595 #pragma omp for nowait
596 for (index_t k2=0; k2 < m_NE2; ++k2) {
597 for (index_t k1=0; k1 < m_NE1; ++k1) {
598 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
599 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
600 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
601 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
602 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
603 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
604 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
605 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
606 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
607 for (index_t i=0; i < numComp; ++i) {
608 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
609 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
610 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
611 } /* end of component loop i */
612 } /* end of k1 loop */
613 } /* end of k2 loop */
614 } /* end of face 1 */
615 if (m_faceOffset[2] > -1) {
616 #pragma omp for nowait
617 for (index_t k2=0; k2 < m_NE2; ++k2) {
618 for (index_t k0=0; k0 < m_NE0; ++k0) {
619 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
620 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
621 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
622 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
623 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
624 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
625 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
626 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
627 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
628 for (index_t i=0; i < numComp; ++i) {
629 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
630 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
631 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
632 } /* end of component loop i */
633 } /* end of k0 loop */
634 } /* end of k2 loop */
635 } /* end of face 2 */
636 if (m_faceOffset[3] > -1) {
637 #pragma omp for nowait
638 for (index_t k2=0; k2 < m_NE2; ++k2) {
639 for (index_t k0=0; k0 < m_NE0; ++k0) {
640 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
641 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
642 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
643 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
644 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
645 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
646 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
647 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
648 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
649 for (index_t i=0; i < numComp; ++i) {
650 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
651 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
652 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
653 } /* end of component loop i */
654 } /* end of k0 loop */
655 } /* end of k2 loop */
656 } /* end of face 3 */
657 if (m_faceOffset[4] > -1) {
658 #pragma omp for nowait
659 for (index_t k1=0; k1 < m_NE1; ++k1) {
660 for (index_t k0=0; k0 < m_NE0; ++k0) {
661 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
662 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
663 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
664 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
665 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
666 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
667 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
668 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
669 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
670 for (index_t i=0; i < numComp; ++i) {
671 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
672 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
673 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
674 } /* end of component loop i */
675 } /* end of k0 loop */
676 } /* end of k1 loop */
677 } /* end of face 4 */
678 if (m_faceOffset[5] > -1) {
679 #pragma omp for nowait
680 for (index_t k1=0; k1 < m_NE1; ++k1) {
681 for (index_t k0=0; k0 < m_NE0; ++k0) {
682 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
683 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
684 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
685 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
686 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
687 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
688 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
689 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
690 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
691 for (index_t i=0; i < numComp; ++i) {
692 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
693 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
694 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
695 } /* end of component loop i */
696 } /* end of k0 loop */
697 } /* end of k1 loop */
698 } /* end of face 5 */
699 } // end of parallel section
700 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
701 } else {
702 stringstream msg;
703 msg << "setToGradient() not implemented for "
704 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
705 throw RipleyException(msg.str());
706 }
707 }
708
709 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
710 {
711 escript::Data& in = *const_cast<escript::Data*>(&arg);
712 const dim_t numComp = in.getDataPointSize();
713 const double h0 = m_l0/m_gNE0;
714 const double h1 = m_l1/m_gNE1;
715 const double h2 = m_l2/m_gNE2;
716 if (arg.getFunctionSpace().getTypeCode() == Elements) {
717 const double w_0 = h0*h1*h2/8.;
718 #pragma omp parallel
719 {
720 vector<double> int_local(numComp, 0);
721 #pragma omp for nowait
722 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
723 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
724 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
725 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
726 for (index_t i=0; i < numComp; ++i) {
727 const register double f_0 = f[INDEX2(i,0,numComp)];
728 const register double f_1 = f[INDEX2(i,1,numComp)];
729 const register double f_2 = f[INDEX2(i,2,numComp)];
730 const register double f_3 = f[INDEX2(i,3,numComp)];
731 const register double f_4 = f[INDEX2(i,4,numComp)];
732 const register double f_5 = f[INDEX2(i,5,numComp)];
733 const register double f_6 = f[INDEX2(i,6,numComp)];
734 const register double f_7 = f[INDEX2(i,7,numComp)];
735 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
736 } /* end of component loop i */
737 } /* end of k0 loop */
738 } /* end of k1 loop */
739 } /* end of k2 loop */
740
741 #pragma omp critical
742 for (index_t i=0; i<numComp; i++)
743 integrals[i]+=int_local[i];
744 } // end of parallel section
745 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
746 const double w_0 = h0*h1*h2;
747 #pragma omp parallel
748 {
749 vector<double> int_local(numComp, 0);
750 #pragma omp for nowait
751 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
752 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
753 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
754 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
755 for (index_t i=0; i < numComp; ++i) {
756 int_local[i]+=f[i]*w_0;
757 } /* end of component loop i */
758 } /* end of k0 loop */
759 } /* end of k1 loop */
760 } /* end of k2 loop */
761
762 #pragma omp critical
763 for (index_t i=0; i<numComp; i++)
764 integrals[i]+=int_local[i];
765 } // end of parallel section
766 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
767 const double w_0 = h1*h2/4.;
768 const double w_1 = h0*h2/4.;
769 const double w_2 = h0*h1/4.;
770 #pragma omp parallel
771 {
772 vector<double> int_local(numComp, 0);
773 if (m_faceOffset[0] > -1) {
774 #pragma omp for nowait
775 for (index_t k2=0; k2 < m_NE2; ++k2) {
776 for (index_t k1=0; k1 < m_NE1; ++k1) {
777 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
778 for (index_t i=0; i < numComp; ++i) {
779 const register double f_0 = f[INDEX2(i,0,numComp)];
780 const register double f_1 = f[INDEX2(i,1,numComp)];
781 const register double f_2 = f[INDEX2(i,2,numComp)];
782 const register double f_3 = f[INDEX2(i,3,numComp)];
783 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
784 } /* end of component loop i */
785 } /* end of k1 loop */
786 } /* end of k2 loop */
787 }
788
789 if (m_faceOffset[1] > -1) {
790 #pragma omp for nowait
791 for (index_t k2=0; k2 < m_NE2; ++k2) {
792 for (index_t k1=0; k1 < m_NE1; ++k1) {
793 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
794 for (index_t i=0; i < numComp; ++i) {
795 const register double f_0 = f[INDEX2(i,0,numComp)];
796 const register double f_1 = f[INDEX2(i,1,numComp)];
797 const register double f_2 = f[INDEX2(i,2,numComp)];
798 const register double f_3 = f[INDEX2(i,3,numComp)];
799 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
800 } /* end of component loop i */
801 } /* end of k1 loop */
802 } /* end of k2 loop */
803 }
804
805 if (m_faceOffset[2] > -1) {
806 #pragma omp for nowait
807 for (index_t k2=0; k2 < m_NE2; ++k2) {
808 for (index_t k0=0; k0 < m_NE0; ++k0) {
809 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
810 for (index_t i=0; i < numComp; ++i) {
811 const register double f_0 = f[INDEX2(i,0,numComp)];
812 const register double f_1 = f[INDEX2(i,1,numComp)];
813 const register double f_2 = f[INDEX2(i,2,numComp)];
814 const register double f_3 = f[INDEX2(i,3,numComp)];
815 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
816 } /* end of component loop i */
817 } /* end of k1 loop */
818 } /* end of k2 loop */
819 }
820
821 if (m_faceOffset[3] > -1) {
822 #pragma omp for nowait
823 for (index_t k2=0; k2 < m_NE2; ++k2) {
824 for (index_t k0=0; k0 < m_NE0; ++k0) {
825 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
826 for (index_t i=0; i < numComp; ++i) {
827 const register double f_0 = f[INDEX2(i,0,numComp)];
828 const register double f_1 = f[INDEX2(i,1,numComp)];
829 const register double f_2 = f[INDEX2(i,2,numComp)];
830 const register double f_3 = f[INDEX2(i,3,numComp)];
831 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
832 } /* end of component loop i */
833 } /* end of k1 loop */
834 } /* end of k2 loop */
835 }
836
837 if (m_faceOffset[4] > -1) {
838 #pragma omp for nowait
839 for (index_t k1=0; k1 < m_NE1; ++k1) {
840 for (index_t k0=0; k0 < m_NE0; ++k0) {
841 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
842 for (index_t i=0; i < numComp; ++i) {
843 const register double f_0 = f[INDEX2(i,0,numComp)];
844 const register double f_1 = f[INDEX2(i,1,numComp)];
845 const register double f_2 = f[INDEX2(i,2,numComp)];
846 const register double f_3 = f[INDEX2(i,3,numComp)];
847 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
848 } /* end of component loop i */
849 } /* end of k1 loop */
850 } /* end of k2 loop */
851 }
852
853 if (m_faceOffset[5] > -1) {
854 #pragma omp for nowait
855 for (index_t k1=0; k1 < m_NE1; ++k1) {
856 for (index_t k0=0; k0 < m_NE0; ++k0) {
857 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
858 for (index_t i=0; i < numComp; ++i) {
859 const register double f_0 = f[INDEX2(i,0,numComp)];
860 const register double f_1 = f[INDEX2(i,1,numComp)];
861 const register double f_2 = f[INDEX2(i,2,numComp)];
862 const register double f_3 = f[INDEX2(i,3,numComp)];
863 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
864 } /* end of component loop i */
865 } /* end of k1 loop */
866 } /* end of k2 loop */
867 }
868
869 #pragma omp critical
870 for (index_t i=0; i<numComp; i++)
871 integrals[i]+=int_local[i];
872 } // end of parallel section
873
874 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
875 const double w_0 = h1*h2;
876 const double w_1 = h0*h2;
877 const double w_2 = h0*h1;
878 #pragma omp parallel
879 {
880 vector<double> int_local(numComp, 0);
881 if (m_faceOffset[0] > -1) {
882 #pragma omp for nowait
883 for (index_t k2=0; k2 < m_NE2; ++k2) {
884 for (index_t k1=0; k1 < m_NE1; ++k1) {
885 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
886 for (index_t i=0; i < numComp; ++i) {
887 int_local[i]+=f[i]*w_0;
888 } /* end of component loop i */
889 } /* end of k1 loop */
890 } /* end of k2 loop */
891 }
892
893 if (m_faceOffset[1] > -1) {
894 #pragma omp for nowait
895 for (index_t k2=0; k2 < m_NE2; ++k2) {
896 for (index_t k1=0; k1 < m_NE1; ++k1) {
897 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
898 for (index_t i=0; i < numComp; ++i) {
899 int_local[i]+=f[i]*w_0;
900 } /* end of component loop i */
901 } /* end of k1 loop */
902 } /* end of k2 loop */
903 }
904
905 if (m_faceOffset[2] > -1) {
906 #pragma omp for nowait
907 for (index_t k2=0; k2 < m_NE2; ++k2) {
908 for (index_t k0=0; k0 < m_NE0; ++k0) {
909 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
910 for (index_t i=0; i < numComp; ++i) {
911 int_local[i]+=f[i]*w_1;
912 } /* end of component loop i */
913 } /* end of k1 loop */
914 } /* end of k2 loop */
915 }
916
917 if (m_faceOffset[3] > -1) {
918 #pragma omp for nowait
919 for (index_t k2=0; k2 < m_NE2; ++k2) {
920 for (index_t k0=0; k0 < m_NE0; ++k0) {
921 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
922 for (index_t i=0; i < numComp; ++i) {
923 int_local[i]+=f[i]*w_1;
924 } /* end of component loop i */
925 } /* end of k1 loop */
926 } /* end of k2 loop */
927 }
928
929 if (m_faceOffset[4] > -1) {
930 #pragma omp for nowait
931 for (index_t k1=0; k1 < m_NE1; ++k1) {
932 for (index_t k0=0; k0 < m_NE0; ++k0) {
933 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
934 for (index_t i=0; i < numComp; ++i) {
935 int_local[i]+=f[i]*w_2;
936 } /* end of component loop i */
937 } /* end of k1 loop */
938 } /* end of k2 loop */
939 }
940
941 if (m_faceOffset[5] > -1) {
942 #pragma omp for nowait
943 for (index_t k1=0; k1 < m_NE1; ++k1) {
944 for (index_t k0=0; k0 < m_NE0; ++k0) {
945 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
946 for (index_t i=0; i < numComp; ++i) {
947 int_local[i]+=f[i]*w_2;
948 } /* end of component loop i */
949 } /* end of k1 loop */
950 } /* end of k2 loop */
951 }
952
953 #pragma omp critical
954 for (index_t i=0; i<numComp; i++)
955 integrals[i]+=int_local[i];
956 } // end of parallel section
957
958 } else {
959 stringstream msg;
960 msg << "setToIntegrals() not implemented for "
961 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
962 throw RipleyException(msg.str());
963 }
964 }
965
966 void Brick::setToNormal(escript::Data& out) const
967 {
968 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
969 #pragma omp parallel
970 {
971 if (m_faceOffset[0] > -1) {
972 #pragma omp for nowait
973 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
974 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
975 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
976 // set vector at four quadrature points
977 *o++ = -1.; *o++ = 0.; *o++ = 0.;
978 *o++ = -1.; *o++ = 0.; *o++ = 0.;
979 *o++ = -1.; *o++ = 0.; *o++ = 0.;
980 *o++ = -1.; *o++ = 0.; *o = 0.;
981 }
982 }
983 }
984
985 if (m_faceOffset[1] > -1) {
986 #pragma omp for nowait
987 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
988 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
989 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
990 // set vector at four quadrature points
991 *o++ = 1.; *o++ = 0.; *o++ = 0.;
992 *o++ = 1.; *o++ = 0.; *o++ = 0.;
993 *o++ = 1.; *o++ = 0.; *o++ = 0.;
994 *o++ = 1.; *o++ = 0.; *o = 0.;
995 }
996 }
997 }
998
999 if (m_faceOffset[2] > -1) {
1000 #pragma omp for nowait
1001 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1002 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1003 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1004 // set vector at four quadrature points
1005 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1006 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1007 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1008 *o++ = 0.; *o++ = -1.; *o = 0.;
1009 }
1010 }
1011 }
1012
1013 if (m_faceOffset[3] > -1) {
1014 #pragma omp for nowait
1015 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1016 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1017 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1018 // set vector at four quadrature points
1019 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1020 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1021 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1022 *o++ = 0.; *o++ = 1.; *o = 0.;
1023 }
1024 }
1025 }
1026
1027 if (m_faceOffset[4] > -1) {
1028 #pragma omp for nowait
1029 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1030 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1031 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1032 // set vector at four quadrature points
1033 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1034 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1035 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1036 *o++ = 0.; *o++ = 0.; *o = -1.;
1037 }
1038 }
1039 }
1040
1041 if (m_faceOffset[5] > -1) {
1042 #pragma omp for nowait
1043 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1044 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1045 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1046 // set vector at four quadrature points
1047 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1048 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1049 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1050 *o++ = 0.; *o++ = 0.; *o = 1.;
1051 }
1052 }
1053 }
1054 } // end of parallel section
1055 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1056 #pragma omp parallel
1057 {
1058 if (m_faceOffset[0] > -1) {
1059 #pragma omp for nowait
1060 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1061 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1062 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1063 *o++ = -1.;
1064 *o++ = 0.;
1065 *o = 0.;
1066 }
1067 }
1068 }
1069
1070 if (m_faceOffset[1] > -1) {
1071 #pragma omp for nowait
1072 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1073 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1074 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1075 *o++ = 1.;
1076 *o++ = 0.;
1077 *o = 0.;
1078 }
1079 }
1080 }
1081
1082 if (m_faceOffset[2] > -1) {
1083 #pragma omp for nowait
1084 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1085 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1086 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1087 *o++ = 0.;
1088 *o++ = -1.;
1089 *o = 0.;
1090 }
1091 }
1092 }
1093
1094 if (m_faceOffset[3] > -1) {
1095 #pragma omp for nowait
1096 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1097 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1098 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1099 *o++ = 0.;
1100 *o++ = 1.;
1101 *o = 0.;
1102 }
1103 }
1104 }
1105
1106 if (m_faceOffset[4] > -1) {
1107 #pragma omp for nowait
1108 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1109 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1110 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1111 *o++ = 0.;
1112 *o++ = 0.;
1113 *o = -1.;
1114 }
1115 }
1116 }
1117
1118 if (m_faceOffset[5] > -1) {
1119 #pragma omp for nowait
1120 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1121 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1122 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1123 *o++ = 0.;
1124 *o++ = 0.;
1125 *o = 1.;
1126 }
1127 }
1128 }
1129 } // end of parallel section
1130
1131 } else {
1132 stringstream msg;
1133 msg << "setToNormal() not implemented for "
1134 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1135 throw RipleyException(msg.str());
1136 }
1137 }
1138
1139 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1140 bool reducedColOrder) const
1141 {
1142 if (reducedRowOrder || reducedColOrder)
1143 throw RipleyException("getPattern() not implemented for reduced order");
1144
1145 throw RipleyException("getPattern() not implemented");
1146 }
1147
1148 void Brick::Print_Mesh_Info(const bool full) const
1149 {
1150 RipleyDomain::Print_Mesh_Info(full);
1151 if (full) {
1152 cout << " Id Coordinates" << endl;
1153 cout.precision(15);
1154 cout.setf(ios::scientific, ios::floatfield);
1155 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1156 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1157 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1158 for (index_t i=0; i < getNumNodes(); i++) {
1159 cout << " " << setw(5) << m_nodeId[i]
1160 << " " << xdx.first+(i%m_N0)*xdx.second
1161 << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1162 << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1163 }
1164 }
1165 }
1166
1167 IndexVector Brick::getNumNodesPerDim() const
1168 {
1169 IndexVector ret;
1170 ret.push_back(m_N0);
1171 ret.push_back(m_N1);
1172 ret.push_back(m_N2);
1173 return ret;
1174 }
1175
1176 IndexVector Brick::getNumElementsPerDim() const
1177 {
1178 IndexVector ret;
1179 ret.push_back(m_NE0);
1180 ret.push_back(m_NE1);
1181 ret.push_back(m_NE2);
1182 return ret;
1183 }
1184
1185 IndexVector Brick::getNumFacesPerBoundary() const
1186 {
1187 IndexVector ret(6, 0);
1188 //left
1189 if (m_offset0==0)
1190 ret[0]=m_NE1*m_NE2;
1191 //right
1192 if (m_mpiInfo->rank%m_NX==m_NX-1)
1193 ret[1]=m_NE1*m_NE2;
1194 //bottom
1195 if (m_offset1==0)
1196 ret[2]=m_NE0*m_NE2;
1197 //top
1198 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1199 ret[3]=m_NE0*m_NE2;
1200 //front
1201 if (m_offset2==0)
1202 ret[4]=m_NE0*m_NE1;
1203 //back
1204 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1205 ret[5]=m_NE0*m_NE1;
1206 return ret;
1207 }
1208
1209 pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1210 {
1211 if (dim==0)
1212 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1213 else if (dim==1)
1214 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1215 else if (dim==2)
1216 return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1217
1218 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1219 }
1220
1221
1222 //protected
1223 dim_t Brick::getNumFaceElements() const
1224 {
1225 dim_t n=0;
1226 //left
1227 if (m_offset0==0)
1228 n+=m_NE1*m_NE2;
1229 //right
1230 if (m_mpiInfo->rank%m_NX==m_NX-1)
1231 n+=m_NE1*m_NE2;
1232 //bottom
1233 if (m_offset1==0)
1234 n+=m_NE0*m_NE2;
1235 //top
1236 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1237 n+=m_NE0*m_NE2;
1238 //front
1239 if (m_offset2==0)
1240 n+=m_NE0*m_NE1;
1241 //back
1242 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1243 n+=m_NE0*m_NE1;
1244
1245 return n;
1246 }
1247
1248 //protected
1249 void Brick::assembleCoordinates(escript::Data& arg) const
1250 {
1251 escriptDataC x = arg.getDataC();
1252 int numDim = m_numDim;
1253 if (!isDataPointShapeEqual(&x, 1, &numDim))
1254 throw RipleyException("setToX: Invalid Data object shape");
1255 if (!numSamplesEqual(&x, 1, getNumNodes()))
1256 throw RipleyException("setToX: Illegal number of samples in Data object");
1257
1258 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1259 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1260 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1261 arg.requireWrite();
1262 #pragma omp parallel for
1263 for (dim_t i2 = 0; i2 < m_N2; i2++) {
1264 for (dim_t i1 = 0; i1 < m_N1; i1++) {
1265 for (dim_t i0 = 0; i0 < m_N0; i0++) {
1266 double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1267 point[0] = xdx.first+i0*xdx.second;
1268 point[1] = ydy.first+i1*ydy.second;
1269 point[2] = zdz.first+i2*zdz.second;
1270 }
1271 }
1272 }
1273 }
1274
1275 //private
1276 void Brick::populateSampleIds()
1277 {
1278 // identifiers are ordered from left to right, bottom to top, front to back
1279 // on each rank, except for the shared nodes which are owned by the rank
1280 // below / to the left / to the front of the current rank
1281
1282 // build node distribution vector first.
1283 // m_nodeDistribution[i] is the first node id on rank i, that is
1284 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1285 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1286 m_nodeDistribution[1]=getNumNodes();
1287 for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1288 const index_t x = k%m_NX;
1289 const index_t y = k%(m_NX*m_NY)/m_NX;
1290 const index_t z = k/(m_NX*m_NY);
1291 index_t numNodes=getNumNodes();
1292 if (x>0)
1293 numNodes-=m_N1*m_N2;
1294 if (y>0)
1295 numNodes-=m_N0*m_N2;
1296 if (z>0)
1297 numNodes-=m_N0*m_N1;
1298 // if an edge was subtracted twice add it back
1299 if (x>0 && y>0)
1300 numNodes+=m_N2;
1301 if (x>0 && z>0)
1302 numNodes+=m_N1;
1303 if (y>0 && z>0)
1304 numNodes+=m_N0;
1305 // the corner node was removed 3x and added back 3x, so subtract it
1306 if (x>0 && y>0 && z>0)
1307 numNodes--;
1308 m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1309 }
1310 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1311
1312 m_nodeId.resize(getNumNodes());
1313
1314 // the bottom, left and front planes are not owned by this rank so the
1315 // identifiers need to be computed accordingly
1316 const index_t left = (m_offset0==0 ? 0 : 1);
1317 const index_t bottom = (m_offset1==0 ? 0 : 1);
1318 const index_t front = (m_offset2==0 ? 0 : 1);
1319
1320 // case 1: all nodes on left plane are owned by rank on the left
1321 if (left>0) {
1322 const int neighbour=m_mpiInfo->rank-1;
1323 const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1324 const index_t leftN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1325 #pragma omp parallel for
1326 for (dim_t i2=front; i2<m_N2; i2++) {
1327 for (dim_t i1=bottom; i1<m_N1; i1++) {
1328 m_nodeId[i1*m_N0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1329 + (i1-bottom+1)*leftN0
1330 + (i2-front)*leftN0*leftN1 - 1;
1331 }
1332 }
1333 }
1334 // case 2: all nodes on bottom plane are owned by rank below
1335 if (bottom>0) {
1336 const int neighbour=m_mpiInfo->rank-m_NX;
1337 const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1338 const index_t bottomN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1339 #pragma omp parallel for
1340 for (dim_t i2=front; i2<m_N2; i2++) {
1341 for (dim_t i0=left; i0<m_N0; i0++) {
1342 m_nodeId[i0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1343 + bottomN0*(bottomN1-1)
1344 + (i2-front)*bottomN0*bottomN1 + i0-left;
1345 }
1346 }
1347 }
1348 // case 3: all nodes on front plane are owned by rank in front
1349 if (front>0) {
1350 const int neighbour=m_mpiInfo->rank-m_NX*m_NY;
1351 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1352 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1353 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1354 #pragma omp parallel for
1355 for (dim_t i1=bottom; i1<m_N1; i1++) {
1356 for (dim_t i0=left; i0<m_N0; i0++) {
1357 m_nodeId[i0+i1*m_N0]=m_nodeDistribution[neighbour]
1358 + N0*N1*(N2-1)+(i1-bottom)*N0 + i0-left;
1359 }
1360 }
1361 }
1362 // case 4: nodes on front bottom edge are owned by the corresponding rank
1363 if (front>0 && bottom>0) {
1364 const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1);
1365 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1366 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1367 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1368 #pragma omp parallel for
1369 for (dim_t i0=left; i0<m_N0; i0++) {
1370 m_nodeId[i0]=m_nodeDistribution[neighbour]
1371 + N0*N1*(N2-1)+(N1-1)*N0 + i0-left;
1372 }
1373 }
1374 // case 5: nodes on left bottom edge are owned by the corresponding rank
1375 if (left>0 && bottom>0) {
1376 const int neighbour=m_mpiInfo->rank-m_NX-1;
1377 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1378 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1379 #pragma omp parallel for
1380 for (dim_t i2=front; i2<m_N2; i2++) {
1381 m_nodeId[i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1382 + (1+i2-front)*N0*N1-1;
1383 }
1384 }
1385 // case 6: nodes on left front edge are owned by the corresponding rank
1386 if (left>0 && front>0) {
1387 const int neighbour=m_mpiInfo->rank-m_NX*m_NY-1;
1388 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1389 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1390 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1391 #pragma omp parallel for
1392 for (dim_t i1=bottom; i1<m_N1; i1++) {
1393 m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1394 + N0*N1*(N2-1)+N0-1+(i1-bottom)*N0;
1395 }
1396 }
1397 // case 7: bottom-left-front corner node owned by corresponding rank
1398 if (left>0 && bottom>0 && front>0) {
1399 const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1)-1;
1400 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1401 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1402 const index_t N2=(neighbour/(m_NX*m_NY) == 0 ? m_N2 : m_N2-1);
1403 m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1*N2-1;
1404 }
1405
1406 // the rest of the id's are contiguous
1407 const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1408 #pragma omp parallel for
1409 for (dim_t i2=front; i2<m_N2; i2++) {
1410 for (dim_t i1=bottom; i1<m_N1; i1++) {
1411 for (dim_t i0=left; i0<m_N0; i0++) {
1412 m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] = firstId+i0-left
1413 +(i1-bottom)*(m_N0-left)
1414 +(i2-front)*(m_N0-left)*(m_N1-bottom);
1415 }
1416 }
1417 }
1418
1419 // elements
1420 m_elementId.resize(getNumElements());
1421 #pragma omp parallel for
1422 for (dim_t k=0; k<getNumElements(); k++) {
1423 m_elementId[k]=k;
1424 }
1425
1426 // face elements
1427 m_faceId.resize(getNumFaceElements());
1428 #pragma omp parallel for
1429 for (dim_t k=0; k<getNumFaceElements(); k++) {
1430 m_faceId[k]=k;
1431 }
1432
1433 // generate face offset vector and set face tags
1434 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1435 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1436 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1437 m_faceOffset.assign(facesPerEdge.size(), -1);
1438 m_faceTags.clear();
1439 index_t offset=0;
1440 for (size_t i=0; i<facesPerEdge.size(); i++) {
1441 if (facesPerEdge[i]>0) {
1442 m_faceOffset[i]=offset;
1443 offset+=facesPerEdge[i];
1444 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1445 }
1446 }
1447 setTagMap("left", LEFT);
1448 setTagMap("right", RIGHT);
1449 setTagMap("bottom", BOTTOM);
1450 setTagMap("top", TOP);
1451 setTagMap("front", FRONT);
1452 setTagMap("back", BACK);
1453 updateTagsInUse(FaceElements);
1454 }
1455
1456 //protected
1457 void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in,
1458 bool reduced) const
1459 {
1460 const dim_t numComp = in.getDataPointSize();
1461 if (reduced) {
1462 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1463 const double c0 = .125;
1464 #pragma omp parallel for
1465 for (index_t k2=0; k2 < m_NE2; ++k2) {
1466 for (index_t k1=0; k1 < m_NE1; ++k1) {
1467 for (index_t k0=0; k0 < m_NE0; ++k0) {
1468 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1469 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1470 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1471 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1472 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1473 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1474 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1475 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1476 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1477 for (index_t i=0; i < numComp; ++i) {
1478 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i] + f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1479 } /* end of component loop i */
1480 } /* end of k0 loop */
1481 } /* end of k1 loop */
1482 } /* end of k2 loop */
1483 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1484 } else {
1485 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1486 const double c0 = .0094373878376559314545;
1487 const double c1 = .035220810900864519624;
1488 const double c2 = .13144585576580214704;
1489 const double c3 = .49056261216234406855;
1490 #pragma omp parallel for
1491 for (index_t k2=0; k2 < m_NE2; ++k2) {
1492 for (index_t k1=0; k1 < m_NE1; ++k1) {
1493 for (index_t k0=0; k0 < m_NE0; ++k0) {
1494 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1495 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1496 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1497 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1498 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1499 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1500 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1501 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1502 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1503 for (index_t i=0; i < numComp; ++i) {
1504 o[INDEX2(i,numComp,0)] = f_000[i]*c3 + f_111[i]*c0 + c2*(f_001[i] + f_010[i] + f_100[i]) + c1*(f_011[i] + f_101[i] + f_110[i]);
1505 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_100[i]*c3 + c2*(f_000[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_111[i]);
1506 o[INDEX2(i,numComp,2)] = f_010[i]*c3 + f_101[i]*c0 + c2*(f_000[i] + f_011[i] + f_110[i]) + c1*(f_001[i] + f_100[i] + f_111[i]);
1507 o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_110[i]*c3 + c2*(f_010[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_101[i]);
1508 o[INDEX2(i,numComp,4)] = f_001[i]*c3 + f_110[i]*c0 + c2*(f_000[i] + f_011[i] + f_101[i]) + c1*(f_010[i] + f_100[i] + f_111[i]);
1509 o[INDEX2(i,numComp,5)] = f_010[i]*c0 + f_101[i]*c3 + c2*(f_001[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_110[i]);
1510 o[INDEX2(i,numComp,6)] = f_011[i]*c3 + f_100[i]*c0 + c2*(f_001[i] + f_010[i] + f_111[i]) + c1*(f_000[i] + f_101[i] + f_110[i]);
1511 o[INDEX2(i,numComp,7)] = f_000[i]*c0 + f_111[i]*c3 + c2*(f_011[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_100[i]);
1512 } /* end of component loop i */
1513 } /* end of k0 loop */
1514 } /* end of k1 loop */
1515 } /* end of k2 loop */
1516 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1517 }
1518 }
1519
1520 //protected
1521 void Brick::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1522 bool reduced) const
1523 {
1524 const dim_t numComp = in.getDataPointSize();
1525 if (reduced) {
1526 const double c0 = .25;
1527 #pragma omp parallel
1528 {
1529 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1530 if (m_faceOffset[0] > -1) {
1531 #pragma omp for nowait
1532 for (index_t k2=0; k2 < m_NE2; ++k2) {
1533 for (index_t k1=0; k1 < m_NE1; ++k1) {
1534 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1535 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1536 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1537 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1538 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1539 for (index_t i=0; i < numComp; ++i) {
1540 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]);
1541 } /* end of component loop i */
1542 } /* end of k1 loop */
1543 } /* end of k2 loop */
1544 } /* end of face 0 */
1545 if (m_faceOffset[1] > -1) {
1546 #pragma omp for nowait
1547 for (index_t k2=0; k2 < m_NE2; ++k2) {
1548 for (index_t k1=0; k1 < m_NE1; ++k1) {
1549 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1550 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1551 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1552 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
1553 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1554 for (index_t i=0; i < numComp; ++i) {
1555 o[INDEX2(i,numComp,0)] = c0*(f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1556 } /* end of component loop i */
1557 } /* end of k1 loop */
1558 } /* end of k2 loop */
1559 } /* end of face 1 */
1560 if (m_faceOffset[2] > -1) {
1561 #pragma omp for nowait
1562 for (index_t k2=0; k2 < m_NE2; ++k2) {
1563 for (index_t k0=0; k0 < m_NE0; ++k0) {
1564 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
1565 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
1566 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
1567 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
1568 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1569 for (index_t i=0; i < numComp; ++i) {
1570 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_100[i] + f_101[i]);
1571 } /* end of component loop i */
1572 } /* end of k0 loop */
1573 } /* end of k2 loop */
1574 } /* end of face 2 */
1575 if (m_faceOffset[3] > -1) {
1576 #pragma omp for nowait
1577 for (index_t k2=0; k2 < m_NE2; ++k2) {
1578 for (index_t k0=0; k0 < m_NE0; ++k0) {
1579 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
1580 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
1581 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
1582 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
1583 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1584 for (index_t i=0; i < numComp; ++i) {
1585 o[INDEX2(i,numComp,0)] = c0*(f_010[i] + f_011[i] + f_110[i] + f_111[i]);
1586 } /* end of component loop i */
1587 } /* end of k0 loop */
1588 } /* end of k2 loop */
1589 } /* end of face 3 */
1590 if (m_faceOffset[4] > -1) {
1591 #pragma omp for nowait
1592 for (index_t k1=0; k1 < m_NE1; ++k1) {
1593 for (index_t k0=0; k0 < m_NE0; ++k0) {
1594 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
1595 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
1596 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
1597 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
1598 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1599 for (index_t i=0; i < numComp; ++i) {
1600 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_010[i] + f_100[i] + f_110[i]);
1601 } /* end of component loop i */
1602 } /* end of k0 loop */
1603 } /* end of k1 loop */
1604 } /* end of face 4 */
1605 if (m_faceOffset[5] > -1) {
1606 #pragma omp for nowait
1607 for (index_t k1=0; k1 < m_NE1; ++k1) {
1608 for (index_t k0=0; k0 < m_NE0; ++k0) {
1609 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
1610 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
1611 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
1612 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
1613 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1614 for (index_t i=0; i < numComp; ++i) {
1615 o[INDEX2(i,numComp,0)] = c0*(f_001[i] + f_011[i] + f_101[i] + f_111[i]);
1616 } /* end of component loop i */
1617 } /* end of k0 loop */
1618 } /* end of k1 loop */
1619 } /* end of face 5 */
1620 /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1621 } // end of parallel section
1622 } else {
1623 const double c0 = 0.044658198738520451079;
1624 const double c1 = 0.16666666666666666667;
1625 const double c2 = 0.62200846792814621559;
1626 #pragma omp parallel
1627 {
1628 /*** GENERATOR SNIP_INTERPOLATE_FACES TOP */
1629 if (m_faceOffset[0] > -1) {
1630 #pragma omp for nowait
1631 for (index_t k2=0; k2 < m_NE2; ++k2) {
1632 for (index_t k1=0; k1 < m_NE1; ++k1) {
1633 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1634 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1635 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1636 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1637 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1638 for (index_t i=0; i < numComp; ++i) {
1639 o[INDEX2(i,numComp,0)] = f_000[i]*c2 + f_011[i]*c0 + c1*(f_001[i] + f_010[i]);
1640 o[INDEX2(i,numComp,1)] = f_001[i]*c0 + f_010[i]*c2 + c1*(f_000[i] + f_011[i]);
1641 o[INDEX2(i,numComp,2)] = f_001[i]*c2 + f_010[i]*c0 + c1*(f_000[i] + f_011[i]);
1642 o[INDEX2(i,numComp,3)] = f_000[i]*c0 + f_011[i]*c2 + c1*(f_001[i] + f_010[i]);
1643 } /* end of component loop i */
1644 } /* end of k1 loop */
1645 } /* end of k2 loop */
1646 } /* end of face 0 */
1647 if (m_faceOffset[1] > -1) {
1648 #pragma omp for nowait
1649 for (index_t k2=0; k2 < m_NE2; ++k2) {
1650 for (index_t k1=0; k1 < m_NE1; ++k1) {
1651 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1652 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1653 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1654 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
1655 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1656 for (index_t i=0; i < numComp; ++i) {
1657 o[INDEX2(i,numComp,0)] = f_100[i]*c2 + f_111[i]*c0 + c1*(f_101[i] + f_110[i]);
1658 o[INDEX2(i,numComp,1)] = f_101[i]*c0 + f_110[i]*c2 + c1*(f_100[i] + f_111[i]);
1659 o[INDEX2(i,numComp,2)] = f_101[i]*c2 + f_110[i]*c0 + c1*(f_100[i] + f_111[i]);
1660 o[INDEX2(i,numComp,3)] = f_100[i]*c0 + f_111[i]*c2 + c1*(f_101[i] + f_110[i]);
1661 } /* end of component loop i */
1662 } /* end of k1 loop */
1663 } /* end of k2 loop */
1664 } /* end of face 1 */
1665 if (m_faceOffset[2] > -1) {
1666 #pragma omp for nowait
1667 for (index_t k2=0; k2 < m_NE2; ++k2) {
1668 for (index_t k0=0; k0 < m_NE0; ++k0) {
1669 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
1670 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
1671 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
1672 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
1673 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1674 for (index_t i=0; i < numComp; ++i) {
1675 o[INDEX2(i,numComp,0)] = f_000[i]*c2 + f_101[i]*c0 + c1*(f_001[i] + f_100[i]);
1676 o[INDEX2(i,numComp,1)] = f_001[i]*c0 + f_100[i]*c2 + c1*(f_000[i] + f_101[i]);
1677 o[INDEX2(i,numComp,2)] = f_001[i]*c2 + f_100[i]*c0 + c1*(f_000[i] + f_101[i]);
1678 o[INDEX2(i,numComp,3)] = f_000[i]*c0 + f_101[i]*c2 + c1*(f_001[i] + f_100[i]);
1679 } /* end of component loop i */
1680 } /* end of k0 loop */
1681 } /* end of k2 loop */
1682 } /* end of face 2 */
1683 if (m_faceOffset[3] > -1) {
1684 #pragma omp for nowait
1685 for (index_t k2=0; k2 < m_NE2; ++k2) {
1686 for (index_t k0=0; k0 < m_NE0; ++k0) {
1687 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
1688 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
1689 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
1690 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
1691 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1692 for (index_t i=0; i < numComp; ++i) {
1693 o[INDEX2(i,numComp,0)] = f_010[i]*c2 + f_111[i]*c0 + c1*(f_011[i] + f_110[i]);
1694 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_110[i]*c2 + c1*(f_010[i] + f_111[i]);
1695 o[INDEX2(i,numComp,2)] = f_011[i]*c2 + f_110[i]*c0 + c1*(f_010[i] + f_111[i]);
1696 o[INDEX2(i,numComp,3)] = f_010[i]*c0 + f_111[i]*c2 + c1*(f_011[i] + f_110[i]);
1697 } /* end of component loop i */
1698 } /* end of k0 loop */
1699 } /* end of k2 loop */
1700 } /* end of face 3 */
1701 if (m_faceOffset[4] > -1) {
1702 #pragma omp for nowait
1703 for (index_t k1=0; k1 < m_NE1; ++k1) {
1704 for (index_t k0=0; k0 < m_NE0; ++k0) {
1705 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
1706 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
1707 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
1708 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
1709 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1710 for (index_t i=0; i < numComp; ++i) {
1711 o[INDEX2(i,numComp,0)] = f_000[i]*c2 + f_110[i]*c0 + c1*(f_010[i] + f_100[i]);
1712 o[INDEX2(i,numComp,1)] = f_010[i]*c0 + f_100[i]*c2 + c1*(f_000[i] + f_110[i]);
1713 o[INDEX2(i,numComp,2)] = f_010[i]*c2 + f_100[i]*c0 + c1*(f_000[i] + f_110[i]);
1714 o[INDEX2(i,numComp,3)] = f_000[i]*c0 + f_110[i]*c2 + c1*(f_010[i] + f_100[i]);
1715 } /* end of component loop i */
1716 } /* end of k0 loop */
1717 } /* end of k1 loop */
1718 } /* end of face 4 */
1719 if (m_faceOffset[5] > -1) {
1720 #pragma omp for nowait
1721 for (index_t k1=0; k1 < m_NE1; ++k1) {
1722 for (index_t k0=0; k0 < m_NE0; ++k0) {
1723 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
1724 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
1725 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
1726 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
1727 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1728 for (index_t i=0; i < numComp; ++i) {
1729 o[INDEX2(i,numComp,0)] = f_001[i]*c2 + f_111[i]*c0 + c1*(f_011[i] + f_101[i]);
1730 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_101[i]*c2 + c1*(f_001[i] + f_111[i]);
1731 o[INDEX2(i,numComp,2)] = f_011[i]*c2 + f_101[i]*c0 + c1*(f_001[i] + f_111[i]);
1732 o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_111[i]*c2 + c1*(f_011[i] + f_101[i]);
1733 } /* end of component loop i */
1734 } /* end of k0 loop */
1735 } /* end of k1 loop */
1736 } /* end of face 5 */
1737 /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1738 } // end of parallel section
1739 }
1740 }
1741
1742 } // end of namespace ripley
1743

  ViewVC Help
Powered by ViewVC 1.1.26