/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3750 - (show annotations)
Fri Dec 23 01:20:34 2011 UTC (7 years, 2 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Brick.cpp
File size: 94135 byte(s)
Checkpoint - reinstated DOF FS and tried to fix couple blocks...

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Brick.h>
15 extern "C" {
16 #include "paso/SystemMatrixPattern.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33 int d1, int d2) :
34 RipleyDomain(3),
35 m_gNE0(n0),
36 m_gNE1(n1),
37 m_gNE2(n2),
38 m_l0(l0),
39 m_l1(l1),
40 m_l2(l2),
41 m_NX(d0),
42 m_NY(d1),
43 m_NZ(d2)
44 {
45 // ensure number of subdivisions is valid and nodes can be distributed
46 // among number of ranks
47 if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48 throw RipleyException("Invalid number of spatial subdivisions");
49
50 if (n0%m_NX > 0 || n1%m_NY > 0 || n2%m_NZ > 0)
51 throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
52
53 // local number of elements
54 m_NE0 = n0/m_NX;
55 m_NE1 = n1/m_NY;
56 m_NE2 = n2/m_NZ;
57 // local number of nodes (not necessarily owned)
58 m_N0 = m_NE0+1;
59 m_N1 = m_NE1+1;
60 m_N2 = m_NE2+1;
61 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
62 m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
63 m_offset1 = m_NE1*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
64 m_offset2 = m_NE2*(m_mpiInfo->rank/(m_NX*m_NY));
65 populateSampleIds();
66 }
67
68
69 Brick::~Brick()
70 {
71 }
72
73 string Brick::getDescription() const
74 {
75 return "ripley::Brick";
76 }
77
78 bool Brick::operator==(const AbstractDomain& other) const
79 {
80 const Brick* o=dynamic_cast<const Brick*>(&other);
81 if (o) {
82 return (RipleyDomain::operator==(other) &&
83 m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 && m_gNE2==o->m_gNE2
84 && m_l0==o->m_l0 && m_l1==o->m_l1 && m_l2==o->m_l2
85 && m_NX==o->m_NX && m_NY==o->m_NY && m_NZ==o->m_NZ);
86 }
87
88 return false;
89 }
90
91 void Brick::dump(const string& fileName) const
92 {
93 #if USE_SILO
94 string fn(fileName);
95 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
96 fn+=".silo";
97 }
98
99 const int NUM_SILO_FILES = 1;
100 const char* blockDirFmt = "/block%04d";
101 int driver=DB_HDF5;
102 string siloPath;
103 DBfile* dbfile = NULL;
104
105 #ifdef ESYS_MPI
106 PMPIO_baton_t* baton = NULL;
107 #endif
108
109 if (m_mpiInfo->size > 1) {
110 #ifdef ESYS_MPI
111 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
112 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
113 PMPIO_DefaultClose, (void*)&driver);
114 // try the fallback driver in case of error
115 if (!baton && driver != DB_PDB) {
116 driver = DB_PDB;
117 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
118 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
119 PMPIO_DefaultClose, (void*)&driver);
120 }
121 if (baton) {
122 char str[64];
123 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
124 siloPath = str;
125 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
126 }
127 #endif
128 } else {
129 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
130 getDescription().c_str(), driver);
131 // try the fallback driver in case of error
132 if (!dbfile && driver != DB_PDB) {
133 driver = DB_PDB;
134 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
135 getDescription().c_str(), driver);
136 }
137 }
138
139 if (!dbfile)
140 throw RipleyException("dump: Could not create Silo file");
141
142 /*
143 if (driver==DB_HDF5) {
144 // gzip level 1 already provides good compression with minimal
145 // performance penalty. Some tests showed that gzip levels >3 performed
146 // rather badly on escript data both in terms of time and space
147 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
148 }
149 */
150
151 boost::scoped_ptr<double> x(new double[m_N0]);
152 boost::scoped_ptr<double> y(new double[m_N1]);
153 boost::scoped_ptr<double> z(new double[m_N2]);
154 double* coords[3] = { x.get(), y.get(), z.get() };
155 pair<double,double> xdx = getFirstCoordAndSpacing(0);
156 pair<double,double> ydy = getFirstCoordAndSpacing(1);
157 pair<double,double> zdz = getFirstCoordAndSpacing(2);
158 #pragma omp parallel
159 {
160 #pragma omp for
161 for (dim_t i0 = 0; i0 < m_N0; i0++) {
162 coords[0][i0]=xdx.first+i0*xdx.second;
163 }
164 #pragma omp for
165 for (dim_t i1 = 0; i1 < m_N1; i1++) {
166 coords[1][i1]=ydy.first+i1*ydy.second;
167 }
168 #pragma omp for
169 for (dim_t i2 = 0; i2 < m_N2; i2++) {
170 coords[2][i2]=zdz.first+i2*zdz.second;
171 }
172 }
173 IndexVector dims = getNumNodesPerDim();
174 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
175 DB_COLLINEAR, NULL);
176
177 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
178 NULL, 0, DB_INT, DB_NODECENT, NULL);
179
180 // write element ids
181 dims = getNumElementsPerDim();
182 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
183 &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
184
185 // rank 0 writes multimesh and multivar
186 if (m_mpiInfo->rank == 0) {
187 vector<string> tempstrings;
188 vector<char*> names;
189 for (dim_t i=0; i<m_mpiInfo->size; i++) {
190 stringstream path;
191 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
192 tempstrings.push_back(path.str());
193 names.push_back((char*)tempstrings.back().c_str());
194 }
195 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
196 DBSetDir(dbfile, "/");
197 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
198 &types[0], NULL);
199 tempstrings.clear();
200 names.clear();
201 for (dim_t i=0; i<m_mpiInfo->size; i++) {
202 stringstream path;
203 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
204 tempstrings.push_back(path.str());
205 names.push_back((char*)tempstrings.back().c_str());
206 }
207 types.assign(m_mpiInfo->size, DB_QUADVAR);
208 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
209 &types[0], NULL);
210 tempstrings.clear();
211 names.clear();
212 for (dim_t i=0; i<m_mpiInfo->size; i++) {
213 stringstream path;
214 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
215 tempstrings.push_back(path.str());
216 names.push_back((char*)tempstrings.back().c_str());
217 }
218 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
219 &types[0], NULL);
220 }
221
222 if (m_mpiInfo->size > 1) {
223 #ifdef ESYS_MPI
224 PMPIO_HandOffBaton(baton, dbfile);
225 PMPIO_Finish(baton);
226 #endif
227 } else {
228 DBClose(dbfile);
229 }
230
231 #else // USE_SILO
232 throw RipleyException("dump(): no Silo support");
233 #endif
234 }
235
236 const int* Brick::borrowSampleReferenceIDs(int fsType) const
237 {
238 switch (fsType) {
239 case Nodes:
240 case ReducedNodes: //FIXME: reduced
241 case DegreesOfFreedom: //FIXME
242 case ReducedDegreesOfFreedom: //FIXME
243 return &m_nodeId[0];
244 case Elements:
245 case ReducedElements:
246 return &m_elementId[0];
247 case ReducedFaceElements:
248 return &m_faceId[0];
249 default:
250 break;
251 }
252
253 stringstream msg;
254 msg << "borrowSampleReferenceIDs() not implemented for function space type "
255 << fsType;
256 throw RipleyException(msg.str());
257 }
258
259 bool Brick::ownSample(int fsCode, index_t id) const
260 {
261 #ifdef ESYS_MPI
262 if (fsCode == Nodes) {
263 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
264 const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
265 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
266 } else
267 throw RipleyException("ownSample() only implemented for Nodes");
268 #else
269 return true;
270 #endif
271 }
272
273 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
274 {
275 escript::Data& in = *const_cast<escript::Data*>(&cIn);
276 const dim_t numComp = in.getDataPointSize();
277 const double h0 = m_l0/m_gNE0;
278 const double h1 = m_l1/m_gNE1;
279 const double h2 = m_l1/m_gNE2;
280 const double C0 = .044658198738520451079;
281 const double C1 = .16666666666666666667;
282 const double C2 = .21132486540518711775;
283 const double C3 = .25;
284 const double C4 = .5;
285 const double C5 = .62200846792814621559;
286 const double C6 = .78867513459481288225;
287
288 if (out.getFunctionSpace().getTypeCode() == Elements) {
289 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
290 #pragma omp parallel for
291 for (index_t k2=0; k2 < m_NE2; ++k2) {
292 for (index_t k1=0; k1 < m_NE1; ++k1) {
293 for (index_t k0=0; k0 < m_NE0; ++k0) {
294 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
295 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
296 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
297 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
298 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
299 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
300 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
301 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
302 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
303 for (index_t i=0; i < numComp; ++i) {
304 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
305 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
306 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
307 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
308 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
309 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
310 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
311 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
312 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
313 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
314 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
315 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
316 o[INDEX3(i,0,0,numComp,3)] = V0;
317 o[INDEX3(i,1,0,numComp,3)] = V4;
318 o[INDEX3(i,2,0,numComp,3)] = V8;
319 o[INDEX3(i,0,1,numComp,3)] = V0;
320 o[INDEX3(i,1,1,numComp,3)] = V5;
321 o[INDEX3(i,2,1,numComp,3)] = V9;
322 o[INDEX3(i,0,2,numComp,3)] = V1;
323 o[INDEX3(i,1,2,numComp,3)] = V4;
324 o[INDEX3(i,2,2,numComp,3)] = V10;
325 o[INDEX3(i,0,3,numComp,3)] = V1;
326 o[INDEX3(i,1,3,numComp,3)] = V5;
327 o[INDEX3(i,2,3,numComp,3)] = V11;
328 o[INDEX3(i,0,4,numComp,3)] = V2;
329 o[INDEX3(i,1,4,numComp,3)] = V6;
330 o[INDEX3(i,2,4,numComp,3)] = V8;
331 o[INDEX3(i,0,5,numComp,3)] = V2;
332 o[INDEX3(i,1,5,numComp,3)] = V7;
333 o[INDEX3(i,2,5,numComp,3)] = V9;
334 o[INDEX3(i,0,6,numComp,3)] = V3;
335 o[INDEX3(i,1,6,numComp,3)] = V6;
336 o[INDEX3(i,2,6,numComp,3)] = V10;
337 o[INDEX3(i,0,7,numComp,3)] = V3;
338 o[INDEX3(i,1,7,numComp,3)] = V7;
339 o[INDEX3(i,2,7,numComp,3)] = V11;
340 } /* end of component loop i */
341 } /* end of k0 loop */
342 } /* end of k1 loop */
343 } /* end of k2 loop */
344 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
345 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
346 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
347 #pragma omp parallel for
348 for (index_t k2=0; k2 < m_NE2; ++k2) {
349 for (index_t k1=0; k1 < m_NE1; ++k1) {
350 for (index_t k0=0; k0 < m_NE0; ++k0) {
351 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
352 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
353 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
354 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
355 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
356 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
357 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
358 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
359 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
360 for (index_t i=0; i < numComp; ++i) {
361 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
362 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
363 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
364 } /* end of component loop i */
365 } /* end of k0 loop */
366 } /* end of k1 loop */
367 } /* end of k2 loop */
368 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
369 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
370 /*** GENERATOR SNIP_GRAD_FACES TOP */
371 #pragma omp parallel
372 {
373 if (m_faceOffset[0] > -1) {
374 #pragma omp for nowait
375 for (index_t k2=0; k2 < m_NE2; ++k2) {
376 for (index_t k1=0; k1 < m_NE1; ++k1) {
377 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
378 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
379 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
380 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
381 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
382 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
383 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
384 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
385 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
386 for (index_t i=0; i < numComp; ++i) {
387 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
388 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
389 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
390 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
391 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
392 o[INDEX3(i,1,0,numComp,3)] = V0;
393 o[INDEX3(i,2,0,numComp,3)] = V2;
394 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
395 o[INDEX3(i,1,1,numComp,3)] = V0;
396 o[INDEX3(i,2,1,numComp,3)] = V3;
397 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
398 o[INDEX3(i,1,2,numComp,3)] = V1;
399 o[INDEX3(i,2,2,numComp,3)] = V2;
400 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
401 o[INDEX3(i,1,3,numComp,3)] = V1;
402 o[INDEX3(i,2,3,numComp,3)] = V3;
403 } /* end of component loop i */
404 } /* end of k1 loop */
405 } /* end of k2 loop */
406 } /* end of face 0 */
407 if (m_faceOffset[1] > -1) {
408 #pragma omp for nowait
409 for (index_t k2=0; k2 < m_NE2; ++k2) {
410 for (index_t k1=0; k1 < m_NE1; ++k1) {
411 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
412 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
413 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
414 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
415 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
416 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
417 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
418 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
419 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
420 for (index_t i=0; i < numComp; ++i) {
421 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
422 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
423 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
424 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
425 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
426 o[INDEX3(i,1,0,numComp,3)] = V0;
427 o[INDEX3(i,2,0,numComp,3)] = V2;
428 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
429 o[INDEX3(i,1,1,numComp,3)] = V0;
430 o[INDEX3(i,2,1,numComp,3)] = V3;
431 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
432 o[INDEX3(i,1,2,numComp,3)] = V1;
433 o[INDEX3(i,2,2,numComp,3)] = V2;
434 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
435 o[INDEX3(i,1,3,numComp,3)] = V1;
436 o[INDEX3(i,2,3,numComp,3)] = V3;
437 } /* end of component loop i */
438 } /* end of k1 loop */
439 } /* end of k2 loop */
440 } /* end of face 1 */
441 if (m_faceOffset[2] > -1) {
442 #pragma omp for nowait
443 for (index_t k2=0; k2 < m_NE2; ++k2) {
444 for (index_t k0=0; k0 < m_NE0; ++k0) {
445 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
446 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
447 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
448 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
449 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
450 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
451 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
452 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
453 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
454 for (index_t i=0; i < numComp; ++i) {
455 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
456 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
457 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
458 o[INDEX3(i,0,0,numComp,3)] = V0;
459 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
460 o[INDEX3(i,2,0,numComp,3)] = V1;
461 o[INDEX3(i,0,1,numComp,3)] = V0;
462 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
463 o[INDEX3(i,2,1,numComp,3)] = V2;
464 o[INDEX3(i,0,2,numComp,3)] = V0;
465 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
466 o[INDEX3(i,2,2,numComp,3)] = V1;
467 o[INDEX3(i,0,3,numComp,3)] = V0;
468 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
469 o[INDEX3(i,2,3,numComp,3)] = V2;
470 } /* end of component loop i */
471 } /* end of k0 loop */
472 } /* end of k2 loop */
473 } /* end of face 2 */
474 if (m_faceOffset[3] > -1) {
475 #pragma omp for nowait
476 for (index_t k2=0; k2 < m_NE2; ++k2) {
477 for (index_t k0=0; k0 < m_NE0; ++k0) {
478 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
479 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
480 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
481 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
482 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
483 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
484 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
485 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
486 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
487 for (index_t i=0; i < numComp; ++i) {
488 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
489 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
490 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
491 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
492 o[INDEX3(i,0,0,numComp,3)] = V0;
493 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
494 o[INDEX3(i,2,0,numComp,3)] = V2;
495 o[INDEX3(i,0,1,numComp,3)] = V0;
496 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
497 o[INDEX3(i,2,1,numComp,3)] = V3;
498 o[INDEX3(i,0,2,numComp,3)] = V1;
499 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
500 o[INDEX3(i,2,2,numComp,3)] = V2;
501 o[INDEX3(i,0,3,numComp,3)] = V1;
502 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
503 o[INDEX3(i,2,3,numComp,3)] = V3;
504 } /* end of component loop i */
505 } /* end of k0 loop */
506 } /* end of k2 loop */
507 } /* end of face 3 */
508 if (m_faceOffset[4] > -1) {
509 #pragma omp for nowait
510 for (index_t k1=0; k1 < m_NE1; ++k1) {
511 for (index_t k0=0; k0 < m_NE0; ++k0) {
512 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
513 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
514 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
515 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
516 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
517 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
518 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
519 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
520 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
521 for (index_t i=0; i < numComp; ++i) {
522 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
523 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
524 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
525 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
526 o[INDEX3(i,0,0,numComp,3)] = V0;
527 o[INDEX3(i,1,0,numComp,3)] = V2;
528 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
529 o[INDEX3(i,0,1,numComp,3)] = V0;
530 o[INDEX3(i,1,1,numComp,3)] = V3;
531 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
532 o[INDEX3(i,0,2,numComp,3)] = V1;
533 o[INDEX3(i,1,2,numComp,3)] = V2;
534 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
535 o[INDEX3(i,0,3,numComp,3)] = V1;
536 o[INDEX3(i,1,3,numComp,3)] = V3;
537 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
538 } /* end of component loop i */
539 } /* end of k0 loop */
540 } /* end of k1 loop */
541 } /* end of face 4 */
542 if (m_faceOffset[5] > -1) {
543 #pragma omp for nowait
544 for (index_t k1=0; k1 < m_NE1; ++k1) {
545 for (index_t k0=0; k0 < m_NE0; ++k0) {
546 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
547 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
548 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
549 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
550 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
551 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
552 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
553 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
554 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
555 for (index_t i=0; i < numComp; ++i) {
556 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
557 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
558 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
559 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
560 o[INDEX3(i,0,0,numComp,3)] = V0;
561 o[INDEX3(i,1,0,numComp,3)] = V2;
562 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
563 o[INDEX3(i,0,1,numComp,3)] = V0;
564 o[INDEX3(i,1,1,numComp,3)] = V3;
565 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
566 o[INDEX3(i,0,2,numComp,3)] = V1;
567 o[INDEX3(i,1,2,numComp,3)] = V2;
568 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
569 o[INDEX3(i,0,3,numComp,3)] = V1;
570 o[INDEX3(i,1,3,numComp,3)] = V3;
571 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
572 } /* end of component loop i */
573 } /* end of k0 loop */
574 } /* end of k1 loop */
575 } /* end of face 5 */
576 } // end of parallel section
577 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
578 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
579 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
580 #pragma omp parallel
581 {
582 if (m_faceOffset[0] > -1) {
583 #pragma omp for nowait
584 for (index_t k2=0; k2 < m_NE2; ++k2) {
585 for (index_t k1=0; k1 < m_NE1; ++k1) {
586 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
587 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
588 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
589 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
590 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
591 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
592 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
593 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
594 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
595 for (index_t i=0; i < numComp; ++i) {
596 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
597 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
598 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
599 } /* end of component loop i */
600 } /* end of k1 loop */
601 } /* end of k2 loop */
602 } /* end of face 0 */
603 if (m_faceOffset[1] > -1) {
604 #pragma omp for nowait
605 for (index_t k2=0; k2 < m_NE2; ++k2) {
606 for (index_t k1=0; k1 < m_NE1; ++k1) {
607 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
608 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
609 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
610 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
611 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
612 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
613 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
614 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
615 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
616 for (index_t i=0; i < numComp; ++i) {
617 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
618 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
619 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
620 } /* end of component loop i */
621 } /* end of k1 loop */
622 } /* end of k2 loop */
623 } /* end of face 1 */
624 if (m_faceOffset[2] > -1) {
625 #pragma omp for nowait
626 for (index_t k2=0; k2 < m_NE2; ++k2) {
627 for (index_t k0=0; k0 < m_NE0; ++k0) {
628 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
629 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
630 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
631 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
632 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
633 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
634 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
635 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
636 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
637 for (index_t i=0; i < numComp; ++i) {
638 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
639 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
640 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
641 } /* end of component loop i */
642 } /* end of k0 loop */
643 } /* end of k2 loop */
644 } /* end of face 2 */
645 if (m_faceOffset[3] > -1) {
646 #pragma omp for nowait
647 for (index_t k2=0; k2 < m_NE2; ++k2) {
648 for (index_t k0=0; k0 < m_NE0; ++k0) {
649 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
650 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
651 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
652 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
653 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
654 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
655 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
656 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
657 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
658 for (index_t i=0; i < numComp; ++i) {
659 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
660 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
661 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
662 } /* end of component loop i */
663 } /* end of k0 loop */
664 } /* end of k2 loop */
665 } /* end of face 3 */
666 if (m_faceOffset[4] > -1) {
667 #pragma omp for nowait
668 for (index_t k1=0; k1 < m_NE1; ++k1) {
669 for (index_t k0=0; k0 < m_NE0; ++k0) {
670 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
671 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
672 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
673 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
674 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
675 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
676 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
677 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
678 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
679 for (index_t i=0; i < numComp; ++i) {
680 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
681 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
682 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
683 } /* end of component loop i */
684 } /* end of k0 loop */
685 } /* end of k1 loop */
686 } /* end of face 4 */
687 if (m_faceOffset[5] > -1) {
688 #pragma omp for nowait
689 for (index_t k1=0; k1 < m_NE1; ++k1) {
690 for (index_t k0=0; k0 < m_NE0; ++k0) {
691 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
692 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
693 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
694 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
695 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
696 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
697 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
698 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
699 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
700 for (index_t i=0; i < numComp; ++i) {
701 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
702 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
703 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
704 } /* end of component loop i */
705 } /* end of k0 loop */
706 } /* end of k1 loop */
707 } /* end of face 5 */
708 } // end of parallel section
709 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
710 } else {
711 stringstream msg;
712 msg << "setToGradient() not implemented for "
713 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
714 throw RipleyException(msg.str());
715 }
716 }
717
718 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
719 {
720 escript::Data& in = *const_cast<escript::Data*>(&arg);
721 const dim_t numComp = in.getDataPointSize();
722 const double h0 = m_l0/m_gNE0;
723 const double h1 = m_l1/m_gNE1;
724 const double h2 = m_l2/m_gNE2;
725 if (arg.getFunctionSpace().getTypeCode() == Elements) {
726 const double w_0 = h0*h1*h2/8.;
727 #pragma omp parallel
728 {
729 vector<double> int_local(numComp, 0);
730 #pragma omp for nowait
731 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
732 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
733 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
734 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
735 for (index_t i=0; i < numComp; ++i) {
736 const register double f_0 = f[INDEX2(i,0,numComp)];
737 const register double f_1 = f[INDEX2(i,1,numComp)];
738 const register double f_2 = f[INDEX2(i,2,numComp)];
739 const register double f_3 = f[INDEX2(i,3,numComp)];
740 const register double f_4 = f[INDEX2(i,4,numComp)];
741 const register double f_5 = f[INDEX2(i,5,numComp)];
742 const register double f_6 = f[INDEX2(i,6,numComp)];
743 const register double f_7 = f[INDEX2(i,7,numComp)];
744 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
745 } /* end of component loop i */
746 } /* end of k0 loop */
747 } /* end of k1 loop */
748 } /* end of k2 loop */
749
750 #pragma omp critical
751 for (index_t i=0; i<numComp; i++)
752 integrals[i]+=int_local[i];
753 } // end of parallel section
754 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
755 const double w_0 = h0*h1*h2;
756 #pragma omp parallel
757 {
758 vector<double> int_local(numComp, 0);
759 #pragma omp for nowait
760 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
761 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
762 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
763 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
764 for (index_t i=0; i < numComp; ++i) {
765 int_local[i]+=f[i]*w_0;
766 } /* end of component loop i */
767 } /* end of k0 loop */
768 } /* end of k1 loop */
769 } /* end of k2 loop */
770
771 #pragma omp critical
772 for (index_t i=0; i<numComp; i++)
773 integrals[i]+=int_local[i];
774 } // end of parallel section
775 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
776 const double w_0 = h1*h2/4.;
777 const double w_1 = h0*h2/4.;
778 const double w_2 = h0*h1/4.;
779 #pragma omp parallel
780 {
781 vector<double> int_local(numComp, 0);
782 if (m_faceOffset[0] > -1) {
783 #pragma omp for nowait
784 for (index_t k2=0; k2 < m_NE2; ++k2) {
785 for (index_t k1=0; k1 < m_NE1; ++k1) {
786 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
787 for (index_t i=0; i < numComp; ++i) {
788 const register double f_0 = f[INDEX2(i,0,numComp)];
789 const register double f_1 = f[INDEX2(i,1,numComp)];
790 const register double f_2 = f[INDEX2(i,2,numComp)];
791 const register double f_3 = f[INDEX2(i,3,numComp)];
792 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
793 } /* end of component loop i */
794 } /* end of k1 loop */
795 } /* end of k2 loop */
796 }
797
798 if (m_faceOffset[1] > -1) {
799 #pragma omp for nowait
800 for (index_t k2=0; k2 < m_NE2; ++k2) {
801 for (index_t k1=0; k1 < m_NE1; ++k1) {
802 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
803 for (index_t i=0; i < numComp; ++i) {
804 const register double f_0 = f[INDEX2(i,0,numComp)];
805 const register double f_1 = f[INDEX2(i,1,numComp)];
806 const register double f_2 = f[INDEX2(i,2,numComp)];
807 const register double f_3 = f[INDEX2(i,3,numComp)];
808 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
809 } /* end of component loop i */
810 } /* end of k1 loop */
811 } /* end of k2 loop */
812 }
813
814 if (m_faceOffset[2] > -1) {
815 #pragma omp for nowait
816 for (index_t k2=0; k2 < m_NE2; ++k2) {
817 for (index_t k0=0; k0 < m_NE0; ++k0) {
818 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
819 for (index_t i=0; i < numComp; ++i) {
820 const register double f_0 = f[INDEX2(i,0,numComp)];
821 const register double f_1 = f[INDEX2(i,1,numComp)];
822 const register double f_2 = f[INDEX2(i,2,numComp)];
823 const register double f_3 = f[INDEX2(i,3,numComp)];
824 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
825 } /* end of component loop i */
826 } /* end of k1 loop */
827 } /* end of k2 loop */
828 }
829
830 if (m_faceOffset[3] > -1) {
831 #pragma omp for nowait
832 for (index_t k2=0; k2 < m_NE2; ++k2) {
833 for (index_t k0=0; k0 < m_NE0; ++k0) {
834 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
835 for (index_t i=0; i < numComp; ++i) {
836 const register double f_0 = f[INDEX2(i,0,numComp)];
837 const register double f_1 = f[INDEX2(i,1,numComp)];
838 const register double f_2 = f[INDEX2(i,2,numComp)];
839 const register double f_3 = f[INDEX2(i,3,numComp)];
840 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
841 } /* end of component loop i */
842 } /* end of k1 loop */
843 } /* end of k2 loop */
844 }
845
846 if (m_faceOffset[4] > -1) {
847 #pragma omp for nowait
848 for (index_t k1=0; k1 < m_NE1; ++k1) {
849 for (index_t k0=0; k0 < m_NE0; ++k0) {
850 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
851 for (index_t i=0; i < numComp; ++i) {
852 const register double f_0 = f[INDEX2(i,0,numComp)];
853 const register double f_1 = f[INDEX2(i,1,numComp)];
854 const register double f_2 = f[INDEX2(i,2,numComp)];
855 const register double f_3 = f[INDEX2(i,3,numComp)];
856 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
857 } /* end of component loop i */
858 } /* end of k1 loop */
859 } /* end of k2 loop */
860 }
861
862 if (m_faceOffset[5] > -1) {
863 #pragma omp for nowait
864 for (index_t k1=0; k1 < m_NE1; ++k1) {
865 for (index_t k0=0; k0 < m_NE0; ++k0) {
866 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
867 for (index_t i=0; i < numComp; ++i) {
868 const register double f_0 = f[INDEX2(i,0,numComp)];
869 const register double f_1 = f[INDEX2(i,1,numComp)];
870 const register double f_2 = f[INDEX2(i,2,numComp)];
871 const register double f_3 = f[INDEX2(i,3,numComp)];
872 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
873 } /* end of component loop i */
874 } /* end of k1 loop */
875 } /* end of k2 loop */
876 }
877
878 #pragma omp critical
879 for (index_t i=0; i<numComp; i++)
880 integrals[i]+=int_local[i];
881 } // end of parallel section
882
883 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
884 const double w_0 = h1*h2;
885 const double w_1 = h0*h2;
886 const double w_2 = h0*h1;
887 #pragma omp parallel
888 {
889 vector<double> int_local(numComp, 0);
890 if (m_faceOffset[0] > -1) {
891 #pragma omp for nowait
892 for (index_t k2=0; k2 < m_NE2; ++k2) {
893 for (index_t k1=0; k1 < m_NE1; ++k1) {
894 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
895 for (index_t i=0; i < numComp; ++i) {
896 int_local[i]+=f[i]*w_0;
897 } /* end of component loop i */
898 } /* end of k1 loop */
899 } /* end of k2 loop */
900 }
901
902 if (m_faceOffset[1] > -1) {
903 #pragma omp for nowait
904 for (index_t k2=0; k2 < m_NE2; ++k2) {
905 for (index_t k1=0; k1 < m_NE1; ++k1) {
906 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
907 for (index_t i=0; i < numComp; ++i) {
908 int_local[i]+=f[i]*w_0;
909 } /* end of component loop i */
910 } /* end of k1 loop */
911 } /* end of k2 loop */
912 }
913
914 if (m_faceOffset[2] > -1) {
915 #pragma omp for nowait
916 for (index_t k2=0; k2 < m_NE2; ++k2) {
917 for (index_t k0=0; k0 < m_NE0; ++k0) {
918 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
919 for (index_t i=0; i < numComp; ++i) {
920 int_local[i]+=f[i]*w_1;
921 } /* end of component loop i */
922 } /* end of k1 loop */
923 } /* end of k2 loop */
924 }
925
926 if (m_faceOffset[3] > -1) {
927 #pragma omp for nowait
928 for (index_t k2=0; k2 < m_NE2; ++k2) {
929 for (index_t k0=0; k0 < m_NE0; ++k0) {
930 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
931 for (index_t i=0; i < numComp; ++i) {
932 int_local[i]+=f[i]*w_1;
933 } /* end of component loop i */
934 } /* end of k1 loop */
935 } /* end of k2 loop */
936 }
937
938 if (m_faceOffset[4] > -1) {
939 #pragma omp for nowait
940 for (index_t k1=0; k1 < m_NE1; ++k1) {
941 for (index_t k0=0; k0 < m_NE0; ++k0) {
942 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
943 for (index_t i=0; i < numComp; ++i) {
944 int_local[i]+=f[i]*w_2;
945 } /* end of component loop i */
946 } /* end of k1 loop */
947 } /* end of k2 loop */
948 }
949
950 if (m_faceOffset[5] > -1) {
951 #pragma omp for nowait
952 for (index_t k1=0; k1 < m_NE1; ++k1) {
953 for (index_t k0=0; k0 < m_NE0; ++k0) {
954 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
955 for (index_t i=0; i < numComp; ++i) {
956 int_local[i]+=f[i]*w_2;
957 } /* end of component loop i */
958 } /* end of k1 loop */
959 } /* end of k2 loop */
960 }
961
962 #pragma omp critical
963 for (index_t i=0; i<numComp; i++)
964 integrals[i]+=int_local[i];
965 } // end of parallel section
966
967 } else {
968 stringstream msg;
969 msg << "setToIntegrals() not implemented for "
970 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
971 throw RipleyException(msg.str());
972 }
973 }
974
975 void Brick::setToNormal(escript::Data& out) const
976 {
977 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
978 #pragma omp parallel
979 {
980 if (m_faceOffset[0] > -1) {
981 #pragma omp for nowait
982 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
983 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
984 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
985 // set vector at four quadrature points
986 *o++ = -1.; *o++ = 0.; *o++ = 0.;
987 *o++ = -1.; *o++ = 0.; *o++ = 0.;
988 *o++ = -1.; *o++ = 0.; *o++ = 0.;
989 *o++ = -1.; *o++ = 0.; *o = 0.;
990 }
991 }
992 }
993
994 if (m_faceOffset[1] > -1) {
995 #pragma omp for nowait
996 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
997 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
998 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
999 // set vector at four quadrature points
1000 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1001 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1002 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1003 *o++ = 1.; *o++ = 0.; *o = 0.;
1004 }
1005 }
1006 }
1007
1008 if (m_faceOffset[2] > -1) {
1009 #pragma omp for nowait
1010 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1011 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1012 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1013 // set vector at four quadrature points
1014 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1015 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1016 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1017 *o++ = 0.; *o++ = -1.; *o = 0.;
1018 }
1019 }
1020 }
1021
1022 if (m_faceOffset[3] > -1) {
1023 #pragma omp for nowait
1024 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1025 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1026 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1027 // set vector at four quadrature points
1028 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1029 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1030 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1031 *o++ = 0.; *o++ = 1.; *o = 0.;
1032 }
1033 }
1034 }
1035
1036 if (m_faceOffset[4] > -1) {
1037 #pragma omp for nowait
1038 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1039 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1040 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1041 // set vector at four quadrature points
1042 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1043 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1044 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1045 *o++ = 0.; *o++ = 0.; *o = -1.;
1046 }
1047 }
1048 }
1049
1050 if (m_faceOffset[5] > -1) {
1051 #pragma omp for nowait
1052 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1053 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1054 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1055 // set vector at four quadrature points
1056 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1057 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1058 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1059 *o++ = 0.; *o++ = 0.; *o = 1.;
1060 }
1061 }
1062 }
1063 } // end of parallel section
1064 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1065 #pragma omp parallel
1066 {
1067 if (m_faceOffset[0] > -1) {
1068 #pragma omp for nowait
1069 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1070 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1071 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1072 *o++ = -1.;
1073 *o++ = 0.;
1074 *o = 0.;
1075 }
1076 }
1077 }
1078
1079 if (m_faceOffset[1] > -1) {
1080 #pragma omp for nowait
1081 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1082 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1083 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1084 *o++ = 1.;
1085 *o++ = 0.;
1086 *o = 0.;
1087 }
1088 }
1089 }
1090
1091 if (m_faceOffset[2] > -1) {
1092 #pragma omp for nowait
1093 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1094 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1095 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1096 *o++ = 0.;
1097 *o++ = -1.;
1098 *o = 0.;
1099 }
1100 }
1101 }
1102
1103 if (m_faceOffset[3] > -1) {
1104 #pragma omp for nowait
1105 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1106 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1107 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1108 *o++ = 0.;
1109 *o++ = 1.;
1110 *o = 0.;
1111 }
1112 }
1113 }
1114
1115 if (m_faceOffset[4] > -1) {
1116 #pragma omp for nowait
1117 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1118 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1119 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1120 *o++ = 0.;
1121 *o++ = 0.;
1122 *o = -1.;
1123 }
1124 }
1125 }
1126
1127 if (m_faceOffset[5] > -1) {
1128 #pragma omp for nowait
1129 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1130 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1131 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1132 *o++ = 0.;
1133 *o++ = 0.;
1134 *o = 1.;
1135 }
1136 }
1137 }
1138 } // end of parallel section
1139
1140 } else {
1141 stringstream msg;
1142 msg << "setToNormal() not implemented for "
1143 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1144 throw RipleyException(msg.str());
1145 }
1146 }
1147
1148 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1149 bool reducedColOrder) const
1150 {
1151 if (reducedRowOrder || reducedColOrder)
1152 throw RipleyException("getPattern() not implemented for reduced order");
1153
1154 throw RipleyException("getPattern() not implemented");
1155 }
1156
1157 void Brick::Print_Mesh_Info(const bool full) const
1158 {
1159 RipleyDomain::Print_Mesh_Info(full);
1160 if (full) {
1161 cout << " Id Coordinates" << endl;
1162 cout.precision(15);
1163 cout.setf(ios::scientific, ios::floatfield);
1164 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1165 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1166 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1167 for (index_t i=0; i < getNumNodes(); i++) {
1168 cout << " " << setw(5) << m_nodeId[i]
1169 << " " << xdx.first+(i%m_N0)*xdx.second
1170 << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1171 << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1172 }
1173 }
1174 }
1175
1176 IndexVector Brick::getNumNodesPerDim() const
1177 {
1178 IndexVector ret;
1179 ret.push_back(m_N0);
1180 ret.push_back(m_N1);
1181 ret.push_back(m_N2);
1182 return ret;
1183 }
1184
1185 IndexVector Brick::getNumElementsPerDim() const
1186 {
1187 IndexVector ret;
1188 ret.push_back(m_NE0);
1189 ret.push_back(m_NE1);
1190 ret.push_back(m_NE2);
1191 return ret;
1192 }
1193
1194 IndexVector Brick::getNumFacesPerBoundary() const
1195 {
1196 IndexVector ret(6, 0);
1197 //left
1198 if (m_offset0==0)
1199 ret[0]=m_NE1*m_NE2;
1200 //right
1201 if (m_mpiInfo->rank%m_NX==m_NX-1)
1202 ret[1]=m_NE1*m_NE2;
1203 //bottom
1204 if (m_offset1==0)
1205 ret[2]=m_NE0*m_NE2;
1206 //top
1207 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1208 ret[3]=m_NE0*m_NE2;
1209 //front
1210 if (m_offset2==0)
1211 ret[4]=m_NE0*m_NE1;
1212 //back
1213 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1214 ret[5]=m_NE0*m_NE1;
1215 return ret;
1216 }
1217
1218 pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1219 {
1220 if (dim==0)
1221 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1222 else if (dim==1)
1223 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1224 else if (dim==2)
1225 return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1226
1227 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1228 }
1229
1230 //protected
1231 dim_t Brick::getNumDOF() const
1232 {
1233 return m_nodeDistribution[m_mpiInfo->rank+1]
1234 -m_nodeDistribution[m_mpiInfo->rank];
1235 }
1236
1237 //protected
1238 dim_t Brick::getNumFaceElements() const
1239 {
1240 dim_t n=0;
1241 //left
1242 if (m_offset0==0)
1243 n+=m_NE1*m_NE2;
1244 //right
1245 if (m_mpiInfo->rank%m_NX==m_NX-1)
1246 n+=m_NE1*m_NE2;
1247 //bottom
1248 if (m_offset1==0)
1249 n+=m_NE0*m_NE2;
1250 //top
1251 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1252 n+=m_NE0*m_NE2;
1253 //front
1254 if (m_offset2==0)
1255 n+=m_NE0*m_NE1;
1256 //back
1257 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1258 n+=m_NE0*m_NE1;
1259
1260 return n;
1261 }
1262
1263 //protected
1264 void Brick::assembleCoordinates(escript::Data& arg) const
1265 {
1266 escriptDataC x = arg.getDataC();
1267 int numDim = m_numDim;
1268 if (!isDataPointShapeEqual(&x, 1, &numDim))
1269 throw RipleyException("setToX: Invalid Data object shape");
1270 if (!numSamplesEqual(&x, 1, getNumNodes()))
1271 throw RipleyException("setToX: Illegal number of samples in Data object");
1272
1273 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1274 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1275 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1276 arg.requireWrite();
1277 #pragma omp parallel for
1278 for (dim_t i2 = 0; i2 < m_N2; i2++) {
1279 for (dim_t i1 = 0; i1 < m_N1; i1++) {
1280 for (dim_t i0 = 0; i0 < m_N0; i0++) {
1281 double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1282 point[0] = xdx.first+i0*xdx.second;
1283 point[1] = ydy.first+i1*ydy.second;
1284 point[2] = zdz.first+i2*zdz.second;
1285 }
1286 }
1287 }
1288 }
1289
1290 //private
1291 void Brick::populateSampleIds()
1292 {
1293 // identifiers are ordered from left to right, bottom to top, front to back
1294 // on each rank, except for the shared nodes which are owned by the rank
1295 // below / to the left / to the front of the current rank
1296
1297 // build node distribution vector first.
1298 // m_nodeDistribution[i] is the first node id on rank i, that is
1299 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1300 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1301 m_nodeDistribution[1]=getNumNodes();
1302 for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1303 const index_t x = k%m_NX;
1304 const index_t y = k%(m_NX*m_NY)/m_NX;
1305 const index_t z = k/(m_NX*m_NY);
1306 index_t numNodes=getNumNodes();
1307 if (x>0)
1308 numNodes-=m_N1*m_N2;
1309 if (y>0)
1310 numNodes-=m_N0*m_N2;
1311 if (z>0)
1312 numNodes-=m_N0*m_N1;
1313 // if an edge was subtracted twice add it back
1314 if (x>0 && y>0)
1315 numNodes+=m_N2;
1316 if (x>0 && z>0)
1317 numNodes+=m_N1;
1318 if (y>0 && z>0)
1319 numNodes+=m_N0;
1320 // the corner node was removed 3x and added back 3x, so subtract it
1321 if (x>0 && y>0 && z>0)
1322 numNodes--;
1323 m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1324 }
1325 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1326
1327 m_nodeId.resize(getNumNodes());
1328
1329 // the bottom, left and front planes are not owned by this rank so the
1330 // identifiers need to be computed accordingly
1331 const index_t left = (m_offset0==0 ? 0 : 1);
1332 const index_t bottom = (m_offset1==0 ? 0 : 1);
1333 const index_t front = (m_offset2==0 ? 0 : 1);
1334
1335 // case 1: all nodes on left plane are owned by rank on the left
1336 if (left>0) {
1337 const int neighbour=m_mpiInfo->rank-1;
1338 const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1339 const index_t leftN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1340 #pragma omp parallel for
1341 for (dim_t i2=front; i2<m_N2; i2++) {
1342 for (dim_t i1=bottom; i1<m_N1; i1++) {
1343 m_nodeId[i1*m_N0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1344 + (i1-bottom+1)*leftN0
1345 + (i2-front)*leftN0*leftN1 - 1;
1346 }
1347 }
1348 }
1349 // case 2: all nodes on bottom plane are owned by rank below
1350 if (bottom>0) {
1351 const int neighbour=m_mpiInfo->rank-m_NX;
1352 const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1353 const index_t bottomN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1354 #pragma omp parallel for
1355 for (dim_t i2=front; i2<m_N2; i2++) {
1356 for (dim_t i0=left; i0<m_N0; i0++) {
1357 m_nodeId[i0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1358 + bottomN0*(bottomN1-1)
1359 + (i2-front)*bottomN0*bottomN1 + i0-left;
1360 }
1361 }
1362 }
1363 // case 3: all nodes on front plane are owned by rank in front
1364 if (front>0) {
1365 const int neighbour=m_mpiInfo->rank-m_NX*m_NY;
1366 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1367 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1368 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1369 #pragma omp parallel for
1370 for (dim_t i1=bottom; i1<m_N1; i1++) {
1371 for (dim_t i0=left; i0<m_N0; i0++) {
1372 m_nodeId[i0+i1*m_N0]=m_nodeDistribution[neighbour]
1373 + N0*N1*(N2-1)+(i1-bottom)*N0 + i0-left;
1374 }
1375 }
1376 }
1377 // case 4: nodes on front bottom edge are owned by the corresponding rank
1378 if (front>0 && bottom>0) {
1379 const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1);
1380 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1381 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1382 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1383 #pragma omp parallel for
1384 for (dim_t i0=left; i0<m_N0; i0++) {
1385 m_nodeId[i0]=m_nodeDistribution[neighbour]
1386 + N0*N1*(N2-1)+(N1-1)*N0 + i0-left;
1387 }
1388 }
1389 // case 5: nodes on left bottom edge are owned by the corresponding rank
1390 if (left>0 && bottom>0) {
1391 const int neighbour=m_mpiInfo->rank-m_NX-1;
1392 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1393 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1394 #pragma omp parallel for
1395 for (dim_t i2=front; i2<m_N2; i2++) {
1396 m_nodeId[i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1397 + (1+i2-front)*N0*N1-1;
1398 }
1399 }
1400 // case 6: nodes on left front edge are owned by the corresponding rank
1401 if (left>0 && front>0) {
1402 const int neighbour=m_mpiInfo->rank-m_NX*m_NY-1;
1403 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1404 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1405 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1406 #pragma omp parallel for
1407 for (dim_t i1=bottom; i1<m_N1; i1++) {
1408 m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1409 + N0*N1*(N2-1)+N0-1+(i1-bottom)*N0;
1410 }
1411 }
1412 // case 7: bottom-left-front corner node owned by corresponding rank
1413 if (left>0 && bottom>0 && front>0) {
1414 const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1)-1;
1415 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1416 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1417 const index_t N2=(neighbour/(m_NX*m_NY) == 0 ? m_N2 : m_N2-1);
1418 m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1*N2-1;
1419 }
1420
1421 // the rest of the id's are contiguous
1422 const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1423 #pragma omp parallel for
1424 for (dim_t i2=front; i2<m_N2; i2++) {
1425 for (dim_t i1=bottom; i1<m_N1; i1++) {
1426 for (dim_t i0=left; i0<m_N0; i0++) {
1427 m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] = firstId+i0-left
1428 +(i1-bottom)*(m_N0-left)
1429 +(i2-front)*(m_N0-left)*(m_N1-bottom);
1430 }
1431 }
1432 }
1433 m_nodeTags.assign(getNumNodes(), 0);
1434 updateTagsInUse(Nodes);
1435
1436 // elements
1437 m_elementId.resize(getNumElements());
1438 #pragma omp parallel for
1439 for (dim_t k=0; k<getNumElements(); k++) {
1440 m_elementId[k]=k;
1441 }
1442 m_elementTags.assign(getNumElements(), 0);
1443 updateTagsInUse(Elements);
1444
1445 // face elements
1446 m_faceId.resize(getNumFaceElements());
1447 #pragma omp parallel for
1448 for (dim_t k=0; k<getNumFaceElements(); k++) {
1449 m_faceId[k]=k;
1450 }
1451
1452 // generate face offset vector and set face tags
1453 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1454 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1455 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1456 m_faceOffset.assign(facesPerEdge.size(), -1);
1457 m_faceTags.clear();
1458 index_t offset=0;
1459 for (size_t i=0; i<facesPerEdge.size(); i++) {
1460 if (facesPerEdge[i]>0) {
1461 m_faceOffset[i]=offset;
1462 offset+=facesPerEdge[i];
1463 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1464 }
1465 }
1466 setTagMap("left", LEFT);
1467 setTagMap("right", RIGHT);
1468 setTagMap("bottom", BOTTOM);
1469 setTagMap("top", TOP);
1470 setTagMap("front", FRONT);
1471 setTagMap("back", BACK);
1472 updateTagsInUse(FaceElements);
1473 }
1474
1475 //protected
1476 void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in,
1477 bool reduced) const
1478 {
1479 const dim_t numComp = in.getDataPointSize();
1480 if (reduced) {
1481 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1482 const double c0 = .125;
1483 #pragma omp parallel for
1484 for (index_t k2=0; k2 < m_NE2; ++k2) {
1485 for (index_t k1=0; k1 < m_NE1; ++k1) {
1486 for (index_t k0=0; k0 < m_NE0; ++k0) {
1487 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1488 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1489 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1490 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1491 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1492 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1493 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1494 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1495 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1496 for (index_t i=0; i < numComp; ++i) {
1497 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i] + f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1498 } /* end of component loop i */
1499 } /* end of k0 loop */
1500 } /* end of k1 loop */
1501 } /* end of k2 loop */
1502 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1503 } else {
1504 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1505 const double c0 = .0094373878376559314545;
1506 const double c1 = .035220810900864519624;
1507 const double c2 = .13144585576580214704;
1508 const double c3 = .49056261216234406855;
1509 #pragma omp parallel for
1510 for (index_t k2=0; k2 < m_NE2; ++k2) {
1511 for (index_t k1=0; k1 < m_NE1; ++k1) {
1512 for (index_t k0=0; k0 < m_NE0; ++k0) {
1513 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1514 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1515 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1516 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1517 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1518 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1519 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1520 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1521 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1522 for (index_t i=0; i < numComp; ++i) {
1523 o[INDEX2(i,numComp,0)] = f_000[i]*c3 + f_111[i]*c0 + c2*(f_001[i] + f_010[i] + f_100[i]) + c1*(f_011[i] + f_101[i] + f_110[i]);
1524 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_100[i]*c3 + c2*(f_000[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_111[i]);
1525 o[INDEX2(i,numComp,2)] = f_010[i]*c3 + f_101[i]*c0 + c2*(f_000[i] + f_011[i] + f_110[i]) + c1*(f_001[i] + f_100[i] + f_111[i]);
1526 o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_110[i]*c3 + c2*(f_010[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_101[i]);
1527 o[INDEX2(i,numComp,4)] = f_001[i]*c3 + f_110[i]*c0 + c2*(f_000[i] + f_011[i] + f_101[i]) + c1*(f_010[i] + f_100[i] + f_111[i]);
1528 o[INDEX2(i,numComp,5)] = f_010[i]*c0 + f_101[i]*c3 + c2*(f_001[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_110[i]);
1529 o[INDEX2(i,numComp,6)] = f_011[i]*c3 + f_100[i]*c0 + c2*(f_001[i] + f_010[i] + f_111[i]) + c1*(f_000[i] + f_101[i] + f_110[i]);
1530 o[INDEX2(i,numComp,7)] = f_000[i]*c0 + f_111[i]*c3 + c2*(f_011[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_100[i]);
1531 } /* end of component loop i */
1532 } /* end of k0 loop */
1533 } /* end of k1 loop */
1534 } /* end of k2 loop */
1535 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1536 }
1537 }
1538
1539 //protected
1540 void Brick::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1541 bool reduced) const
1542 {
1543 const dim_t numComp = in.getDataPointSize();
1544 if (reduced) {
1545 const double c0 = .25;
1546 #pragma omp parallel
1547 {
1548 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1549 if (m_faceOffset[0] > -1) {
1550 #pragma omp for nowait
1551 for (index_t k2=0; k2 < m_NE2; ++k2) {
1552 for (index_t k1=0; k1 < m_NE1; ++k1) {
1553 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1554 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1555 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1556 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1557 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1558 for (index_t i=0; i < numComp; ++i) {
1559 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]);
1560 } /* end of component loop i */
1561 } /* end of k1 loop */
1562 } /* end of k2 loop */
1563 } /* end of face 0 */
1564 if (m_faceOffset[1] > -1) {
1565 #pragma omp for nowait
1566 for (index_t k2=0; k2 < m_NE2; ++k2) {
1567 for (index_t k1=0; k1 < m_NE1; ++k1) {
1568 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1569 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1570 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1571 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
1572 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1573 for (index_t i=0; i < numComp; ++i) {
1574 o[INDEX2(i,numComp,0)] = c0*(f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1575 } /* end of component loop i */
1576 } /* end of k1 loop */
1577 } /* end of k2 loop */
1578 } /* end of face 1 */
1579 if (m_faceOffset[2] > -1) {
1580 #pragma omp for nowait
1581 for (index_t k2=0; k2 < m_NE2; ++k2) {
1582 for (index_t k0=0; k0 < m_NE0; ++k0) {
1583 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
1584 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
1585 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
1586 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
1587 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1588 for (index_t i=0; i < numComp; ++i) {
1589 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_100[i] + f_101[i]);
1590 } /* end of component loop i */
1591 } /* end of k0 loop */
1592 } /* end of k2 loop */
1593 } /* end of face 2 */
1594 if (m_faceOffset[3] > -1) {
1595 #pragma omp for nowait
1596 for (index_t k2=0; k2 < m_NE2; ++k2) {
1597 for (index_t k0=0; k0 < m_NE0; ++k0) {
1598 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
1599 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
1600 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
1601 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
1602 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1603 for (index_t i=0; i < numComp; ++i) {
1604 o[INDEX2(i,numComp,0)] = c0*(f_010[i] + f_011[i] + f_110[i] + f_111[i]);
1605 } /* end of component loop i */
1606 } /* end of k0 loop */
1607 } /* end of k2 loop */
1608 } /* end of face 3 */
1609 if (m_faceOffset[4] > -1) {
1610 #pragma omp for nowait
1611 for (index_t k1=0; k1 < m_NE1; ++k1) {
1612 for (index_t k0=0; k0 < m_NE0; ++k0) {
1613 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
1614 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
1615 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
1616 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
1617 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1618 for (index_t i=0; i < numComp; ++i) {
1619 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_010[i] + f_100[i] + f_110[i]);
1620 } /* end of component loop i */
1621 } /* end of k0 loop */
1622 } /* end of k1 loop */
1623 } /* end of face 4 */
1624 if (m_faceOffset[5] > -1) {
1625 #pragma omp for nowait
1626 for (index_t k1=0; k1 < m_NE1; ++k1) {
1627 for (index_t k0=0; k0 < m_NE0; ++k0) {
1628 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
1629 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
1630 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
1631 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
1632 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1633 for (index_t i=0; i < numComp; ++i) {
1634 o[INDEX2(i,numComp,0)] = c0*(f_001[i] + f_011[i] + f_101[i] + f_111[i]);
1635 } /* end of component loop i */
1636 } /* end of k0 loop */
1637 } /* end of k1 loop */
1638 } /* end of face 5 */
1639 /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1640 } // end of parallel section
1641 } else {
1642 const double c0 = 0.044658198738520451079;
1643 const double c1 = 0.16666666666666666667;
1644 const double c2 = 0.62200846792814621559;
1645 #pragma omp parallel
1646 {
1647 /*** GENERATOR SNIP_INTERPOLATE_FACES TOP */
1648 if (m_faceOffset[0] > -1) {
1649 #pragma omp for nowait
1650 for (index_t k2=0; k2 < m_NE2; ++k2) {
1651 for (index_t k1=0; k1 < m_NE1; ++k1) {
1652 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1653 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1654 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1655 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1656 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1657 for (index_t i=0; i < numComp; ++i) {
1658 o[INDEX2(i,numComp,0)] = f_000[i]*c2 + f_011[i]*c0 + c1*(f_001[i] + f_010[i]);
1659 o[INDEX2(i,numComp,1)] = f_001[i]*c0 + f_010[i]*c2 + c1*(f_000[i] + f_011[i]);
1660 o[INDEX2(i,numComp,2)] = f_001[i]*c2 + f_010[i]*c0 + c1*(f_000[i] + f_011[i]);
1661 o[INDEX2(i,numComp,3)] = f_000[i]*c0 + f_011[i]*c2 + c1*(f_001[i] + f_010[i]);
1662 } /* end of component loop i */
1663 } /* end of k1 loop */
1664 } /* end of k2 loop */
1665 } /* end of face 0 */
1666 if (m_faceOffset[1] > -1) {
1667 #pragma omp for nowait
1668 for (index_t k2=0; k2 < m_NE2; ++k2) {
1669 for (index_t k1=0; k1 < m_NE1; ++k1) {
1670 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1671 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1672 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1673 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
1674 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1675 for (index_t i=0; i < numComp; ++i) {
1676 o[INDEX2(i,numComp,0)] = f_100[i]*c2 + f_111[i]*c0 + c1*(f_101[i] + f_110[i]);
1677 o[INDEX2(i,numComp,1)] = f_101[i]*c0 + f_110[i]*c2 + c1*(f_100[i] + f_111[i]);
1678 o[INDEX2(i,numComp,2)] = f_101[i]*c2 + f_110[i]*c0 + c1*(f_100[i] + f_111[i]);
1679 o[INDEX2(i,numComp,3)] = f_100[i]*c0 + f_111[i]*c2 + c1*(f_101[i] + f_110[i]);
1680 } /* end of component loop i */
1681 } /* end of k1 loop */
1682 } /* end of k2 loop */
1683 } /* end of face 1 */
1684 if (m_faceOffset[2] > -1) {
1685 #pragma omp for nowait
1686 for (index_t k2=0; k2 < m_NE2; ++k2) {
1687 for (index_t k0=0; k0 < m_NE0; ++k0) {
1688 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
1689 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
1690 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
1691 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
1692 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1693 for (index_t i=0; i < numComp; ++i) {
1694 o[INDEX2(i,numComp,0)] = f_000[i]*c2 + f_101[i]*c0 + c1*(f_001[i] + f_100[i]);
1695 o[INDEX2(i,numComp,1)] = f_001[i]*c0 + f_100[i]*c2 + c1*(f_000[i] + f_101[i]);
1696 o[INDEX2(i,numComp,2)] = f_001[i]*c2 + f_100[i]*c0 + c1*(f_000[i] + f_101[i]);
1697 o[INDEX2(i,numComp,3)] = f_000[i]*c0 + f_101[i]*c2 + c1*(f_001[i] + f_100[i]);
1698 } /* end of component loop i */
1699 } /* end of k0 loop */
1700 } /* end of k2 loop */
1701 } /* end of face 2 */
1702 if (m_faceOffset[3] > -1) {
1703 #pragma omp for nowait
1704 for (index_t k2=0; k2 < m_NE2; ++k2) {
1705 for (index_t k0=0; k0 < m_NE0; ++k0) {
1706 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
1707 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
1708 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
1709 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
1710 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1711 for (index_t i=0; i < numComp; ++i) {
1712 o[INDEX2(i,numComp,0)] = f_010[i]*c2 + f_111[i]*c0 + c1*(f_011[i] + f_110[i]);
1713 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_110[i]*c2 + c1*(f_010[i] + f_111[i]);
1714 o[INDEX2(i,numComp,2)] = f_011[i]*c2 + f_110[i]*c0 + c1*(f_010[i] + f_111[i]);
1715 o[INDEX2(i,numComp,3)] = f_010[i]*c0 + f_111[i]*c2 + c1*(f_011[i] + f_110[i]);
1716 } /* end of component loop i */
1717 } /* end of k0 loop */
1718 } /* end of k2 loop */
1719 } /* end of face 3 */
1720 if (m_faceOffset[4] > -1) {
1721 #pragma omp for nowait
1722 for (index_t k1=0; k1 < m_NE1; ++k1) {
1723 for (index_t k0=0; k0 < m_NE0; ++k0) {
1724 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
1725 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
1726 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
1727 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
1728 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1729 for (index_t i=0; i < numComp; ++i) {
1730 o[INDEX2(i,numComp,0)] = f_000[i]*c2 + f_110[i]*c0 + c1*(f_010[i] + f_100[i]);
1731 o[INDEX2(i,numComp,1)] = f_010[i]*c0 + f_100[i]*c2 + c1*(f_000[i] + f_110[i]);
1732 o[INDEX2(i,numComp,2)] = f_010[i]*c2 + f_100[i]*c0 + c1*(f_000[i] + f_110[i]);
1733 o[INDEX2(i,numComp,3)] = f_000[i]*c0 + f_110[i]*c2 + c1*(f_010[i] + f_100[i]);
1734 } /* end of component loop i */
1735 } /* end of k0 loop */
1736 } /* end of k1 loop */
1737 } /* end of face 4 */
1738 if (m_faceOffset[5] > -1) {
1739 #pragma omp for nowait
1740 for (index_t k1=0; k1 < m_NE1; ++k1) {
1741 for (index_t k0=0; k0 < m_NE0; ++k0) {
1742 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
1743 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
1744 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
1745 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
1746 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1747 for (index_t i=0; i < numComp; ++i) {
1748 o[INDEX2(i,numComp,0)] = f_001[i]*c2 + f_111[i]*c0 + c1*(f_011[i] + f_101[i]);
1749 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_101[i]*c2 + c1*(f_001[i] + f_111[i]);
1750 o[INDEX2(i,numComp,2)] = f_011[i]*c2 + f_101[i]*c0 + c1*(f_001[i] + f_111[i]);
1751 o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_111[i]*c2 + c1*(f_011[i] + f_101[i]);
1752 } /* end of component loop i */
1753 } /* end of k0 loop */
1754 } /* end of k1 loop */
1755 } /* end of face 5 */
1756 /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1757 } // end of parallel section
1758 }
1759 }
1760
1761 } // end of namespace ripley
1762

  ViewVC Help
Powered by ViewVC 1.1.26