/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3707 - (show annotations)
Mon Dec 5 05:48:25 2011 UTC (7 years, 4 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Brick.cpp
File size: 73017 byte(s)
Fixes to the face element code.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Brick.h>
15 extern "C" {
16 #include "paso/SystemMatrixPattern.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33 int d1, int d2) :
34 RipleyDomain(3),
35 m_gNE0(n0),
36 m_gNE1(n1),
37 m_gNE2(n2),
38 m_l0(l0),
39 m_l1(l1),
40 m_l2(l2),
41 m_NX(d0),
42 m_NY(d1),
43 m_NZ(d2)
44 {
45 // ensure number of subdivisions is valid and nodes can be distributed
46 // among number of ranks
47 if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48 throw RipleyException("Invalid number of spatial subdivisions");
49
50 if (n0%m_NX > 0 || n1%m_NY > 0 || n2%m_NZ > 0)
51 throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
52
53 // local number of elements
54 m_NE0 = n0/m_NX;
55 m_NE1 = n1/m_NY;
56 m_NE2 = n2/m_NZ;
57 // local number of nodes (not necessarily owned)
58 m_N0 = m_NE0+1;
59 m_N1 = m_NE1+1;
60 m_N2 = m_NE2+1;
61 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
62 m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
63 m_offset1 = m_NE1*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
64 m_offset2 = m_NE2*(m_mpiInfo->rank/(m_NX*m_NY));
65 populateSampleIds();
66 }
67
68
69 Brick::~Brick()
70 {
71 }
72
73 string Brick::getDescription() const
74 {
75 return "ripley::Brick";
76 }
77
78 bool Brick::operator==(const AbstractDomain& other) const
79 {
80 if (dynamic_cast<const Brick*>(&other))
81 return this==&other;
82
83 return false;
84 }
85
86 void Brick::dump(const string& fileName) const
87 {
88 #if USE_SILO
89 string fn(fileName);
90 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
91 fn+=".silo";
92 }
93
94 const int NUM_SILO_FILES = 1;
95 const char* blockDirFmt = "/block%04d";
96 int driver=DB_HDF5;
97 string siloPath;
98 DBfile* dbfile = NULL;
99
100 #ifdef ESYS_MPI
101 PMPIO_baton_t* baton = NULL;
102 #endif
103
104 if (m_mpiInfo->size > 1) {
105 #ifdef ESYS_MPI
106 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
107 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
108 PMPIO_DefaultClose, (void*)&driver);
109 // try the fallback driver in case of error
110 if (!baton && driver != DB_PDB) {
111 driver = DB_PDB;
112 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
113 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
114 PMPIO_DefaultClose, (void*)&driver);
115 }
116 if (baton) {
117 char str[64];
118 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
119 siloPath = str;
120 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
121 }
122 #endif
123 } else {
124 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
125 getDescription().c_str(), driver);
126 // try the fallback driver in case of error
127 if (!dbfile && driver != DB_PDB) {
128 driver = DB_PDB;
129 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
130 getDescription().c_str(), driver);
131 }
132 }
133
134 if (!dbfile)
135 throw RipleyException("dump: Could not create Silo file");
136
137 /*
138 if (driver==DB_HDF5) {
139 // gzip level 1 already provides good compression with minimal
140 // performance penalty. Some tests showed that gzip levels >3 performed
141 // rather badly on escript data both in terms of time and space
142 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
143 }
144 */
145
146 boost::scoped_ptr<double> x(new double[m_N0]);
147 boost::scoped_ptr<double> y(new double[m_N1]);
148 boost::scoped_ptr<double> z(new double[m_N2]);
149 double* coords[3] = { x.get(), y.get(), z.get() };
150 pair<double,double> xdx = getFirstCoordAndSpacing(0);
151 pair<double,double> ydy = getFirstCoordAndSpacing(1);
152 pair<double,double> zdz = getFirstCoordAndSpacing(2);
153 #pragma omp parallel
154 {
155 #pragma omp for
156 for (dim_t i0 = 0; i0 < m_N0; i0++) {
157 coords[0][i0]=xdx.first+i0*xdx.second;
158 }
159 #pragma omp for
160 for (dim_t i1 = 0; i1 < m_N1; i1++) {
161 coords[1][i1]=ydy.first+i1*ydy.second;
162 }
163 #pragma omp for
164 for (dim_t i2 = 0; i2 < m_N2; i2++) {
165 coords[2][i2]=zdz.first+i2*zdz.second;
166 }
167 }
168 IndexVector dims = getNumNodesPerDim();
169 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
170 DB_COLLINEAR, NULL);
171
172 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
173 NULL, 0, DB_INT, DB_NODECENT, NULL);
174
175 // write element ids
176 dims = getNumElementsPerDim();
177 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
178 &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
179
180 // rank 0 writes multimesh and multivar
181 if (m_mpiInfo->rank == 0) {
182 vector<string> tempstrings;
183 vector<char*> names;
184 for (dim_t i=0; i<m_mpiInfo->size; i++) {
185 stringstream path;
186 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
187 tempstrings.push_back(path.str());
188 names.push_back((char*)tempstrings.back().c_str());
189 }
190 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
191 DBSetDir(dbfile, "/");
192 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
193 &types[0], NULL);
194 tempstrings.clear();
195 names.clear();
196 for (dim_t i=0; i<m_mpiInfo->size; i++) {
197 stringstream path;
198 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
199 tempstrings.push_back(path.str());
200 names.push_back((char*)tempstrings.back().c_str());
201 }
202 types.assign(m_mpiInfo->size, DB_QUADVAR);
203 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
204 &types[0], NULL);
205 tempstrings.clear();
206 names.clear();
207 for (dim_t i=0; i<m_mpiInfo->size; i++) {
208 stringstream path;
209 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
210 tempstrings.push_back(path.str());
211 names.push_back((char*)tempstrings.back().c_str());
212 }
213 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
214 &types[0], NULL);
215 }
216
217 if (m_mpiInfo->size > 1) {
218 #ifdef ESYS_MPI
219 PMPIO_HandOffBaton(baton, dbfile);
220 PMPIO_Finish(baton);
221 #endif
222 } else {
223 DBClose(dbfile);
224 }
225
226 #else // USE_SILO
227 throw RipleyException("dump(): no Silo support");
228 #endif
229 }
230
231 const int* Brick::borrowSampleReferenceIDs(int fsType) const
232 {
233 switch (fsType) {
234 case Nodes:
235 return &m_nodeId[0];
236 case Elements:
237 return &m_elementId[0];
238 case FaceElements:
239 return &m_faceId[0];
240 default:
241 break;
242 }
243
244 stringstream msg;
245 msg << "borrowSampleReferenceIDs() not implemented for function space type "
246 << fsType;
247 throw RipleyException(msg.str());
248 }
249
250 bool Brick::ownSample(int fsCode, index_t id) const
251 {
252 #ifdef ESYS_MPI
253 if (fsCode == Nodes) {
254 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
255 const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
256 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
257 } else
258 throw RipleyException("ownSample() only implemented for Nodes");
259 #else
260 return true;
261 #endif
262 }
263
264 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
265 {
266 escript::Data& in = *const_cast<escript::Data*>(&cIn);
267 const dim_t numComp = in.getDataPointSize();
268 const double h0 = m_l0/m_gNE0;
269 const double h1 = m_l1/m_gNE1;
270 const double h2 = m_l1/m_gNE2;
271 if (out.getFunctionSpace().getTypeCode() == Elements) {
272 /* GENERATOR SNIP_GRAD_ELEMENTS TOP */
273 const double tmp0_22 = -0.044658198738520451079/h1;
274 const double tmp0_16 = 0.16666666666666666667/h0;
275 const double tmp0_33 = -0.62200846792814621559/h1;
276 const double tmp0_0 = -0.62200846792814621559/h0;
277 const double tmp0_21 = -0.16666666666666666667/h1;
278 const double tmp0_17 = 0.62200846792814621559/h0;
279 const double tmp0_52 = -0.044658198738520451079/h2;
280 const double tmp0_1 = -0.16666666666666666667/h0;
281 const double tmp0_20 = -0.62200846792814621559/h1;
282 const double tmp0_14 = -0.044658198738520451079/h0;
283 const double tmp0_53 = -0.62200846792814621559/h2;
284 const double tmp0_49 = 0.16666666666666666667/h2;
285 const double tmp0_2 = 0.16666666666666666667/h0;
286 const double tmp0_27 = -0.044658198738520451079/h1;
287 const double tmp0_15 = -0.16666666666666666667/h0;
288 const double tmp0_50 = -0.16666666666666666667/h2;
289 const double tmp0_48 = 0.62200846792814621559/h2;
290 const double tmp0_3 = 0.044658198738520451079/h0;
291 const double tmp0_26 = -0.16666666666666666667/h1;
292 const double tmp0_12 = -0.62200846792814621559/h0;
293 const double tmp0_51 = 0.044658198738520451079/h2;
294 const double tmp0_25 = 0.62200846792814621559/h1;
295 const double tmp0_13 = 0.16666666666666666667/h0;
296 const double tmp0_56 = 0.16666666666666666667/h2;
297 const double tmp0_24 = 0.16666666666666666667/h1;
298 const double tmp0_10 = 0.62200846792814621559/h0;
299 const double tmp0_57 = 0.62200846792814621559/h2;
300 const double tmp0_11 = -0.16666666666666666667/h0;
301 const double tmp0_54 = -0.044658198738520451079/h2;
302 const double tmp0_38 = 0.16666666666666666667/h1;
303 const double tmp0_34 = -0.044658198738520451079/h1;
304 const double tmp0_42 = 0.16666666666666666667/h2;
305 const double tmp0_35 = -0.16666666666666666667/h1;
306 const double tmp0_36 = -0.62200846792814621559/h1;
307 const double tmp0_41 = 0.62200846792814621559/h2;
308 const double tmp0_8 = 0.044658198738520451079/h0;
309 const double tmp0_37 = 0.62200846792814621559/h1;
310 const double tmp0_29 = 0.16666666666666666667/h1;
311 const double tmp0_40 = -0.62200846792814621559/h2;
312 const double tmp0_9 = 0.16666666666666666667/h0;
313 const double tmp0_30 = 0.62200846792814621559/h1;
314 const double tmp0_28 = -0.16666666666666666667/h1;
315 const double tmp0_43 = 0.044658198738520451079/h2;
316 const double tmp0_32 = 0.16666666666666666667/h1;
317 const double tmp0_31 = 0.044658198738520451079/h1;
318 const double tmp0_39 = 0.044658198738520451079/h1;
319 const double tmp0_58 = -0.62200846792814621559/h2;
320 const double tmp0_55 = 0.044658198738520451079/h2;
321 const double tmp0_18 = -0.62200846792814621559/h0;
322 const double tmp0_45 = -0.16666666666666666667/h2;
323 const double tmp0_59 = -0.16666666666666666667/h2;
324 const double tmp0_4 = -0.044658198738520451079/h0;
325 const double tmp0_19 = 0.044658198738520451079/h0;
326 const double tmp0_44 = -0.044658198738520451079/h2;
327 const double tmp0_5 = 0.62200846792814621559/h0;
328 const double tmp0_47 = 0.16666666666666666667/h2;
329 const double tmp0_6 = -0.16666666666666666667/h0;
330 const double tmp0_23 = 0.044658198738520451079/h1;
331 const double tmp0_46 = -0.16666666666666666667/h2;
332 const double tmp0_7 = -0.044658198738520451079/h0;
333 #pragma omp parallel for
334 for (index_t k2=0; k2 < m_NE2; ++k2) {
335 for (index_t k1=0; k1 < m_NE1; ++k1) {
336 for (index_t k0=0; k0 < m_NE0; ++k0) {
337 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
338 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
339 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
340 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
341 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
342 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
343 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
344 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
345 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
346 for (index_t i=0; i < numComp; ++i) {
347 o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
348 o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_20 + f_010[i]*tmp0_25 + f_101[i]*tmp0_22 + f_111[i]*tmp0_23 + tmp0_21*(f_001[i] + f_100[i]) + tmp0_24*(f_011[i] + f_110[i]);
349 o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_40 + f_001[i]*tmp0_41 + f_110[i]*tmp0_44 + f_111[i]*tmp0_43 + tmp0_42*(f_011[i] + f_101[i]) + tmp0_45*(f_010[i] + f_100[i]);
350 o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
351 o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_27 + f_010[i]*tmp0_32 + f_011[i]*tmp0_31 + f_100[i]*tmp0_33 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
352 o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_52 + f_011[i]*tmp0_51 + f_100[i]*tmp0_53 + f_101[i]*tmp0_48 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
353 o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
354 o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_20 + f_010[i]*tmp0_25 + f_101[i]*tmp0_22 + f_111[i]*tmp0_23 + tmp0_21*(f_001[i] + f_100[i]) + tmp0_24*(f_011[i] + f_110[i]);
355 o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_53 + f_011[i]*tmp0_48 + f_100[i]*tmp0_52 + f_101[i]*tmp0_51 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
356 o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
357 o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_27 + f_010[i]*tmp0_32 + f_011[i]*tmp0_31 + f_100[i]*tmp0_33 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
358 o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_54 + f_001[i]*tmp0_55 + f_110[i]*tmp0_58 + f_111[i]*tmp0_57 + tmp0_56*(f_011[i] + f_101[i]) + tmp0_59*(f_010[i] + f_100[i]);
359 o[INDEX3(i,0,4,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
360 o[INDEX3(i,1,4,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_33 + f_010[i]*tmp0_32 + f_011[i]*tmp0_30 + f_100[i]*tmp0_27 + f_101[i]*tmp0_28 + f_110[i]*tmp0_31 + f_111[i]*tmp0_29;
361 o[INDEX3(i,2,4,numComp,3)] = f_000[i]*tmp0_40 + f_001[i]*tmp0_41 + f_110[i]*tmp0_44 + f_111[i]*tmp0_43 + tmp0_42*(f_011[i] + f_101[i]) + tmp0_45*(f_010[i] + f_100[i]);
362 o[INDEX3(i,0,5,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
363 o[INDEX3(i,1,5,numComp,3)] = f_000[i]*tmp0_34 + f_010[i]*tmp0_39 + f_101[i]*tmp0_36 + f_111[i]*tmp0_37 + tmp0_35*(f_001[i] + f_100[i]) + tmp0_38*(f_011[i] + f_110[i]);
364 o[INDEX3(i,2,5,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_52 + f_011[i]*tmp0_51 + f_100[i]*tmp0_53 + f_101[i]*tmp0_48 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
365 o[INDEX3(i,0,6,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
366 o[INDEX3(i,1,6,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_33 + f_010[i]*tmp0_32 + f_011[i]*tmp0_30 + f_100[i]*tmp0_27 + f_101[i]*tmp0_28 + f_110[i]*tmp0_31 + f_111[i]*tmp0_29;
367 o[INDEX3(i,2,6,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_53 + f_011[i]*tmp0_48 + f_100[i]*tmp0_52 + f_101[i]*tmp0_51 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
368 o[INDEX3(i,0,7,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
369 o[INDEX3(i,1,7,numComp,3)] = f_000[i]*tmp0_34 + f_010[i]*tmp0_39 + f_101[i]*tmp0_36 + f_111[i]*tmp0_37 + tmp0_35*(f_001[i] + f_100[i]) + tmp0_38*(f_011[i] + f_110[i]);
370 o[INDEX3(i,2,7,numComp,3)] = f_000[i]*tmp0_54 + f_001[i]*tmp0_55 + f_110[i]*tmp0_58 + f_111[i]*tmp0_57 + tmp0_56*(f_011[i] + f_101[i]) + tmp0_59*(f_010[i] + f_100[i]);
371 } /* end of component loop i */
372 } /* end of k0 loop */
373 } /* end of k1 loop */
374 } /* end of k2 loop */
375 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
376 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
377 /* GENERATOR SNIP_GRAD_FACES TOP */
378 if (m_faceOffset[0] > -1) {
379 const double tmp0_22 = 0.21132486540518711775/h1;
380 const double tmp0_16 = 0.16666666666666666667/h0;
381 const double tmp0_33 = 0.21132486540518711775/h2;
382 const double tmp0_0 = -0.62200846792814621559/h0;
383 const double tmp0_21 = -0.21132486540518711775/h1;
384 const double tmp0_17 = 0.62200846792814621559/h0;
385 const double tmp0_1 = -0.16666666666666666667/h0;
386 const double tmp0_20 = -0.78867513459481288225/h1;
387 const double tmp0_14 = -0.044658198738520451079/h0;
388 const double tmp0_2 = 0.16666666666666666667/h0;
389 const double tmp0_27 = 0.21132486540518711775/h1;
390 const double tmp0_15 = -0.16666666666666666667/h0;
391 const double tmp0_3 = 0.044658198738520451079/h0;
392 const double tmp0_26 = 0.78867513459481288225/h1;
393 const double tmp0_12 = -0.62200846792814621559/h0;
394 const double tmp0_25 = -0.78867513459481288225/h1;
395 const double tmp0_13 = 0.16666666666666666667/h0;
396 const double tmp0_24 = -0.21132486540518711775/h1;
397 const double tmp0_10 = 0.62200846792814621559/h0;
398 const double tmp0_11 = -0.16666666666666666667/h0;
399 const double tmp0_34 = 0.78867513459481288225/h2;
400 const double tmp0_35 = -0.78867513459481288225/h2;
401 const double tmp0_8 = 0.044658198738520451079/h0;
402 const double tmp0_29 = 0.78867513459481288225/h2;
403 const double tmp0_9 = 0.16666666666666666667/h0;
404 const double tmp0_30 = 0.21132486540518711775/h2;
405 const double tmp0_28 = -0.78867513459481288225/h2;
406 const double tmp0_32 = -0.21132486540518711775/h2;
407 const double tmp0_31 = -0.21132486540518711775/h2;
408 const double tmp0_18 = -0.62200846792814621559/h0;
409 const double tmp0_4 = -0.044658198738520451079/h0;
410 const double tmp0_19 = 0.044658198738520451079/h0;
411 const double tmp0_5 = 0.62200846792814621559/h0;
412 const double tmp0_6 = -0.16666666666666666667/h0;
413 const double tmp0_23 = 0.78867513459481288225/h1;
414 const double tmp0_7 = -0.044658198738520451079/h0;
415 #pragma omp parallel for
416 for (index_t k2=0; k2 < m_NE2; ++k2) {
417 for (index_t k1=0; k1 < m_NE1; ++k1) {
418 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
419 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
420 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
421 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
422 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
423 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
424 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
425 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
426 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
427 for (index_t i=0; i < numComp; ++i) {
428 o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
429 o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_20 + f_001[i]*tmp0_21 + f_010[i]*tmp0_23 + f_011[i]*tmp0_22;
430 o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_28 + f_001[i]*tmp0_29 + f_010[i]*tmp0_31 + f_011[i]*tmp0_30;
431 o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
432 o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_20 + f_001[i]*tmp0_21 + f_010[i]*tmp0_23 + f_011[i]*tmp0_22;
433 o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_32 + f_001[i]*tmp0_33 + f_010[i]*tmp0_35 + f_011[i]*tmp0_34;
434 o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
435 o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_24 + f_001[i]*tmp0_25 + f_010[i]*tmp0_27 + f_011[i]*tmp0_26;
436 o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_28 + f_001[i]*tmp0_29 + f_010[i]*tmp0_31 + f_011[i]*tmp0_30;
437 o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
438 o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_24 + f_001[i]*tmp0_25 + f_010[i]*tmp0_27 + f_011[i]*tmp0_26;
439 o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_32 + f_001[i]*tmp0_33 + f_010[i]*tmp0_35 + f_011[i]*tmp0_34;
440 } /* end of component loop i */
441 } /* end of k1 loop */
442 } /* end of k2 loop */
443 } /* end of face 0 */
444 if (m_faceOffset[1] > -1) {
445 const double tmp0_22 = 0.78867513459481288225/h1;
446 const double tmp0_16 = 0.16666666666666666667/h0;
447 const double tmp0_33 = 0.78867513459481288225/h2;
448 const double tmp0_0 = -0.62200846792814621559/h0;
449 const double tmp0_21 = 0.21132486540518711775/h1;
450 const double tmp0_17 = 0.62200846792814621559/h0;
451 const double tmp0_1 = -0.16666666666666666667/h0;
452 const double tmp0_20 = -0.21132486540518711775/h1;
453 const double tmp0_14 = -0.044658198738520451079/h0;
454 const double tmp0_2 = 0.16666666666666666667/h0;
455 const double tmp0_27 = -0.21132486540518711775/h1;
456 const double tmp0_15 = -0.16666666666666666667/h0;
457 const double tmp0_3 = 0.044658198738520451079/h0;
458 const double tmp0_26 = 0.21132486540518711775/h1;
459 const double tmp0_12 = -0.62200846792814621559/h0;
460 const double tmp0_25 = 0.78867513459481288225/h1;
461 const double tmp0_13 = 0.16666666666666666667/h0;
462 const double tmp0_24 = -0.78867513459481288225/h1;
463 const double tmp0_10 = 0.62200846792814621559/h0;
464 const double tmp0_11 = -0.16666666666666666667/h0;
465 const double tmp0_34 = -0.78867513459481288225/h2;
466 const double tmp0_35 = -0.21132486540518711775/h2;
467 const double tmp0_8 = 0.044658198738520451079/h0;
468 const double tmp0_29 = 0.21132486540518711775/h2;
469 const double tmp0_9 = 0.16666666666666666667/h0;
470 const double tmp0_30 = -0.21132486540518711775/h2;
471 const double tmp0_28 = 0.78867513459481288225/h2;
472 const double tmp0_32 = 0.21132486540518711775/h2;
473 const double tmp0_31 = -0.78867513459481288225/h2;
474 const double tmp0_18 = -0.62200846792814621559/h0;
475 const double tmp0_4 = -0.044658198738520451079/h0;
476 const double tmp0_19 = 0.044658198738520451079/h0;
477 const double tmp0_5 = 0.62200846792814621559/h0;
478 const double tmp0_6 = -0.16666666666666666667/h0;
479 const double tmp0_23 = -0.78867513459481288225/h1;
480 const double tmp0_7 = -0.044658198738520451079/h0;
481 #pragma omp parallel for
482 for (index_t k2=0; k2 < m_NE2; ++k2) {
483 for (index_t k1=0; k1 < m_NE1; ++k1) {
484 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
485 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
486 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
487 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
488 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
489 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
490 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
491 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
492 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
493 for (index_t i=0; i < numComp; ++i) {
494 o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
495 o[INDEX3(i,1,0,numComp,3)] = f_100[i]*tmp0_23 + f_101[i]*tmp0_20 + f_110[i]*tmp0_22 + f_111[i]*tmp0_21;
496 o[INDEX3(i,2,0,numComp,3)] = f_100[i]*tmp0_31 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
497 o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
498 o[INDEX3(i,1,1,numComp,3)] = f_100[i]*tmp0_23 + f_101[i]*tmp0_20 + f_110[i]*tmp0_22 + f_111[i]*tmp0_21;
499 o[INDEX3(i,2,1,numComp,3)] = f_100[i]*tmp0_35 + f_101[i]*tmp0_32 + f_110[i]*tmp0_34 + f_111[i]*tmp0_33;
500 o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
501 o[INDEX3(i,1,2,numComp,3)] = f_100[i]*tmp0_27 + f_101[i]*tmp0_24 + f_110[i]*tmp0_26 + f_111[i]*tmp0_25;
502 o[INDEX3(i,2,2,numComp,3)] = f_100[i]*tmp0_31 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
503 o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
504 o[INDEX3(i,1,3,numComp,3)] = f_100[i]*tmp0_27 + f_101[i]*tmp0_24 + f_110[i]*tmp0_26 + f_111[i]*tmp0_25;
505 o[INDEX3(i,2,3,numComp,3)] = f_100[i]*tmp0_35 + f_101[i]*tmp0_32 + f_110[i]*tmp0_34 + f_111[i]*tmp0_33;
506 } /* end of component loop i */
507 } /* end of k1 loop */
508 } /* end of k2 loop */
509 } /* end of face 1 */
510 if (m_faceOffset[2] > -1) {
511 const double tmp0_22 = -0.044658198738520451079/h1;
512 const double tmp0_16 = -0.16666666666666666667/h1;
513 const double tmp0_33 = 0.21132486540518711775/h2;
514 const double tmp0_0 = -0.78867513459481288225/h0;
515 const double tmp0_21 = 0.16666666666666666667/h1;
516 const double tmp0_17 = -0.62200846792814621559/h1;
517 const double tmp0_1 = -0.21132486540518711775/h0;
518 const double tmp0_20 = 0.044658198738520451079/h1;
519 const double tmp0_14 = -0.16666666666666666667/h1;
520 const double tmp0_2 = 0.21132486540518711775/h0;
521 const double tmp0_27 = 0.044658198738520451079/h1;
522 const double tmp0_15 = -0.044658198738520451079/h1;
523 const double tmp0_3 = 0.78867513459481288225/h0;
524 const double tmp0_26 = 0.16666666666666666667/h1;
525 const double tmp0_12 = 0.16666666666666666667/h1;
526 const double tmp0_25 = 0.62200846792814621559/h1;
527 const double tmp0_13 = 0.62200846792814621559/h1;
528 const double tmp0_24 = -0.62200846792814621559/h1;
529 const double tmp0_10 = -0.044658198738520451079/h1;
530 const double tmp0_11 = 0.044658198738520451079/h1;
531 const double tmp0_34 = 0.78867513459481288225/h2;
532 const double tmp0_35 = -0.78867513459481288225/h2;
533 const double tmp0_8 = -0.62200846792814621559/h1;
534 const double tmp0_29 = 0.78867513459481288225/h2;
535 const double tmp0_9 = -0.16666666666666666667/h1;
536 const double tmp0_30 = 0.21132486540518711775/h2;
537 const double tmp0_28 = -0.78867513459481288225/h2;
538 const double tmp0_32 = -0.21132486540518711775/h2;
539 const double tmp0_31 = -0.21132486540518711775/h2;
540 const double tmp0_18 = 0.16666666666666666667/h1;
541 const double tmp0_4 = -0.21132486540518711775/h0;
542 const double tmp0_19 = 0.62200846792814621559/h1;
543 const double tmp0_5 = -0.78867513459481288225/h0;
544 const double tmp0_6 = 0.78867513459481288225/h0;
545 const double tmp0_23 = -0.16666666666666666667/h1;
546 const double tmp0_7 = 0.21132486540518711775/h0;
547 #pragma omp parallel for
548 for (index_t k2=0; k2 < m_NE2; ++k2) {
549 for (index_t k0=0; k0 < m_NE0; ++k0) {
550 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
551 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
552 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
553 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
554 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
555 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
556 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
557 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
558 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
559 for (index_t i=0; i < numComp; ++i) {
560 o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_001[i]*tmp0_1 + f_100[i]*tmp0_3 + f_101[i]*tmp0_2;
561 o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_8 + f_010[i]*tmp0_13 + f_101[i]*tmp0_10 + f_111[i]*tmp0_11 + tmp0_12*(f_011[i] + f_110[i]) + tmp0_9*(f_001[i] + f_100[i]);
562 o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_28 + f_001[i]*tmp0_29 + f_100[i]*tmp0_31 + f_101[i]*tmp0_30;
563 o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_0 + f_001[i]*tmp0_1 + f_100[i]*tmp0_3 + f_101[i]*tmp0_2;
564 o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_14 + f_001[i]*tmp0_15 + f_010[i]*tmp0_21 + f_011[i]*tmp0_20 + f_100[i]*tmp0_17 + f_101[i]*tmp0_16 + f_110[i]*tmp0_19 + f_111[i]*tmp0_18;
565 o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_32 + f_001[i]*tmp0_33 + f_100[i]*tmp0_35 + f_101[i]*tmp0_34;
566 o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_4 + f_001[i]*tmp0_5 + f_100[i]*tmp0_7 + f_101[i]*tmp0_6;
567 o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_14 + f_001[i]*tmp0_17 + f_010[i]*tmp0_21 + f_011[i]*tmp0_19 + f_100[i]*tmp0_15 + f_101[i]*tmp0_16 + f_110[i]*tmp0_20 + f_111[i]*tmp0_18;
568 o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_28 + f_001[i]*tmp0_29 + f_100[i]*tmp0_31 + f_101[i]*tmp0_30;
569 o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_4 + f_001[i]*tmp0_5 + f_100[i]*tmp0_7 + f_101[i]*tmp0_6;
570 o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_22 + f_010[i]*tmp0_27 + f_101[i]*tmp0_24 + f_111[i]*tmp0_25 + tmp0_23*(f_001[i] + f_100[i]) + tmp0_26*(f_011[i] + f_110[i]);
571 o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_32 + f_001[i]*tmp0_33 + f_100[i]*tmp0_35 + f_101[i]*tmp0_34;
572 } /* end of component loop i */
573 } /* end of k0 loop */
574 } /* end of k2 loop */
575 } /* end of face 2 */
576 if (m_faceOffset[3] > -1) {
577 const double tmp0_22 = 0.16666666666666666667/h1;
578 const double tmp0_16 = 0.16666666666666666667/h1;
579 const double tmp0_33 = -0.78867513459481288225/h2;
580 const double tmp0_0 = -0.21132486540518711775/h0;
581 const double tmp0_21 = -0.62200846792814621559/h1;
582 const double tmp0_17 = 0.16666666666666666667/h1;
583 const double tmp0_1 = 0.78867513459481288225/h0;
584 const double tmp0_20 = -0.16666666666666666667/h1;
585 const double tmp0_14 = 0.044658198738520451079/h1;
586 const double tmp0_2 = -0.78867513459481288225/h0;
587 const double tmp0_27 = -0.62200846792814621559/h1;
588 const double tmp0_15 = 0.62200846792814621559/h1;
589 const double tmp0_3 = 0.21132486540518711775/h0;
590 const double tmp0_26 = -0.16666666666666666667/h1;
591 const double tmp0_12 = -0.16666666666666666667/h1;
592 const double tmp0_25 = -0.044658198738520451079/h1;
593 const double tmp0_13 = -0.044658198738520451079/h1;
594 const double tmp0_24 = 0.62200846792814621559/h1;
595 const double tmp0_10 = 0.044658198738520451079/h1;
596 const double tmp0_11 = -0.62200846792814621559/h1;
597 const double tmp0_34 = -0.21132486540518711775/h2;
598 const double tmp0_35 = 0.78867513459481288225/h2;
599 const double tmp0_8 = 0.16666666666666666667/h1;
600 const double tmp0_29 = -0.21132486540518711775/h2;
601 const double tmp0_9 = 0.62200846792814621559/h1;
602 const double tmp0_30 = -0.78867513459481288225/h2;
603 const double tmp0_28 = 0.78867513459481288225/h2;
604 const double tmp0_32 = 0.21132486540518711775/h2;
605 const double tmp0_31 = 0.21132486540518711775/h2;
606 const double tmp0_18 = -0.16666666666666666667/h1;
607 const double tmp0_4 = -0.78867513459481288225/h0;
608 const double tmp0_19 = -0.044658198738520451079/h1;
609 const double tmp0_5 = 0.21132486540518711775/h0;
610 const double tmp0_6 = -0.21132486540518711775/h0;
611 const double tmp0_23 = 0.044658198738520451079/h1;
612 const double tmp0_7 = 0.78867513459481288225/h0;
613 #pragma omp parallel for
614 for (index_t k2=0; k2 < m_NE2; ++k2) {
615 for (index_t k0=0; k0 < m_NE0; ++k0) {
616 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
617 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
618 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
619 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
620 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
621 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
622 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
623 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
624 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
625 for (index_t i=0; i < numComp; ++i) {
626 o[INDEX3(i,0,0,numComp,3)] = f_010[i]*tmp0_2 + f_011[i]*tmp0_0 + f_110[i]*tmp0_1 + f_111[i]*tmp0_3;
627 o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_11 + f_010[i]*tmp0_9 + f_101[i]*tmp0_13 + f_111[i]*tmp0_10 + tmp0_12*(f_001[i] + f_100[i]) + tmp0_8*(f_011[i] + f_110[i]);
628 o[INDEX3(i,2,0,numComp,3)] = f_010[i]*tmp0_30 + f_011[i]*tmp0_28 + f_110[i]*tmp0_29 + f_111[i]*tmp0_31;
629 o[INDEX3(i,0,1,numComp,3)] = f_010[i]*tmp0_2 + f_011[i]*tmp0_0 + f_110[i]*tmp0_1 + f_111[i]*tmp0_3;
630 o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_18 + f_001[i]*tmp0_19 + f_010[i]*tmp0_16 + f_011[i]*tmp0_14 + f_100[i]*tmp0_21 + f_101[i]*tmp0_20 + f_110[i]*tmp0_15 + f_111[i]*tmp0_17;
631 o[INDEX3(i,2,1,numComp,3)] = f_010[i]*tmp0_34 + f_011[i]*tmp0_32 + f_110[i]*tmp0_33 + f_111[i]*tmp0_35;
632 o[INDEX3(i,0,2,numComp,3)] = f_010[i]*tmp0_6 + f_011[i]*tmp0_4 + f_110[i]*tmp0_5 + f_111[i]*tmp0_7;
633 o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_18 + f_001[i]*tmp0_21 + f_010[i]*tmp0_16 + f_011[i]*tmp0_15 + f_100[i]*tmp0_19 + f_101[i]*tmp0_20 + f_110[i]*tmp0_14 + f_111[i]*tmp0_17;
634 o[INDEX3(i,2,2,numComp,3)] = f_010[i]*tmp0_30 + f_011[i]*tmp0_28 + f_110[i]*tmp0_29 + f_111[i]*tmp0_31;
635 o[INDEX3(i,0,3,numComp,3)] = f_010[i]*tmp0_6 + f_011[i]*tmp0_4 + f_110[i]*tmp0_5 + f_111[i]*tmp0_7;
636 o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_25 + f_010[i]*tmp0_23 + f_101[i]*tmp0_27 + f_111[i]*tmp0_24 + tmp0_22*(f_011[i] + f_110[i]) + tmp0_26*(f_001[i] + f_100[i]);
637 o[INDEX3(i,2,3,numComp,3)] = f_010[i]*tmp0_34 + f_011[i]*tmp0_32 + f_110[i]*tmp0_33 + f_111[i]*tmp0_35;
638 } /* end of component loop i */
639 } /* end of k0 loop */
640 } /* end of k2 loop */
641 } /* end of face 3 */
642 if (m_faceOffset[4] > -1) {
643 const double tmp0_22 = -0.16666666666666666667/h2;
644 const double tmp0_16 = -0.62200846792814621559/h2;
645 const double tmp0_33 = 0.044658198738520451079/h2;
646 const double tmp0_0 = -0.78867513459481288225/h0;
647 const double tmp0_21 = 0.044658198738520451079/h2;
648 const double tmp0_17 = -0.16666666666666666667/h2;
649 const double tmp0_1 = 0.78867513459481288225/h0;
650 const double tmp0_20 = 0.16666666666666666667/h2;
651 const double tmp0_14 = 0.78867513459481288225/h1;
652 const double tmp0_2 = 0.21132486540518711775/h0;
653 const double tmp0_27 = 0.62200846792814621559/h2;
654 const double tmp0_15 = 0.21132486540518711775/h1;
655 const double tmp0_3 = -0.21132486540518711775/h0;
656 const double tmp0_26 = 0.16666666666666666667/h2;
657 const double tmp0_12 = -0.21132486540518711775/h1;
658 const double tmp0_25 = -0.044658198738520451079/h2;
659 const double tmp0_13 = -0.78867513459481288225/h1;
660 const double tmp0_24 = -0.16666666666666666667/h2;
661 const double tmp0_10 = 0.21132486540518711775/h1;
662 const double tmp0_11 = 0.78867513459481288225/h1;
663 const double tmp0_34 = 0.16666666666666666667/h2;
664 const double tmp0_35 = 0.62200846792814621559/h2;
665 const double tmp0_8 = -0.78867513459481288225/h1;
666 const double tmp0_29 = 0.16666666666666666667/h2;
667 const double tmp0_9 = -0.21132486540518711775/h1;
668 const double tmp0_30 = -0.044658198738520451079/h2;
669 const double tmp0_28 = 0.044658198738520451079/h2;
670 const double tmp0_32 = -0.62200846792814621559/h2;
671 const double tmp0_31 = -0.16666666666666666667/h2;
672 const double tmp0_18 = -0.044658198738520451079/h2;
673 const double tmp0_4 = -0.21132486540518711775/h0;
674 const double tmp0_19 = 0.62200846792814621559/h2;
675 const double tmp0_5 = 0.21132486540518711775/h0;
676 const double tmp0_6 = 0.78867513459481288225/h0;
677 const double tmp0_23 = -0.62200846792814621559/h2;
678 const double tmp0_7 = -0.78867513459481288225/h0;
679 #pragma omp parallel for
680 for (index_t k1=0; k1 < m_NE1; ++k1) {
681 for (index_t k0=0; k0 < m_NE0; ++k0) {
682 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
683 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
684 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
685 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
686 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
687 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
688 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
689 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
690 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
691 for (index_t i=0; i < numComp; ++i) {
692 o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_010[i]*tmp0_3 + f_100[i]*tmp0_1 + f_110[i]*tmp0_2;
693 o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_8 + f_010[i]*tmp0_11 + f_100[i]*tmp0_9 + f_110[i]*tmp0_10;
694 o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_16 + f_001[i]*tmp0_19 + f_110[i]*tmp0_18 + f_111[i]*tmp0_21 + tmp0_17*(f_010[i] + f_100[i]) + tmp0_20*(f_011[i] + f_101[i]);
695 o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_0 + f_010[i]*tmp0_3 + f_100[i]*tmp0_1 + f_110[i]*tmp0_2;
696 o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_12 + f_010[i]*tmp0_15 + f_100[i]*tmp0_13 + f_110[i]*tmp0_14;
697 o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_22 + f_001[i]*tmp0_26 + f_010[i]*tmp0_25 + f_011[i]*tmp0_28 + f_100[i]*tmp0_23 + f_101[i]*tmp0_27 + f_110[i]*tmp0_24 + f_111[i]*tmp0_29;
698 o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_4 + f_010[i]*tmp0_7 + f_100[i]*tmp0_5 + f_110[i]*tmp0_6;
699 o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_8 + f_010[i]*tmp0_11 + f_100[i]*tmp0_9 + f_110[i]*tmp0_10;
700 o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_22 + f_001[i]*tmp0_26 + f_010[i]*tmp0_23 + f_011[i]*tmp0_27 + f_100[i]*tmp0_25 + f_101[i]*tmp0_28 + f_110[i]*tmp0_24 + f_111[i]*tmp0_29;
701 o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_4 + f_010[i]*tmp0_7 + f_100[i]*tmp0_5 + f_110[i]*tmp0_6;
702 o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_12 + f_010[i]*tmp0_15 + f_100[i]*tmp0_13 + f_110[i]*tmp0_14;
703 o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_30 + f_001[i]*tmp0_33 + f_110[i]*tmp0_32 + f_111[i]*tmp0_35 + tmp0_31*(f_010[i] + f_100[i]) + tmp0_34*(f_011[i] + f_101[i]);
704 } /* end of component loop i */
705 } /* end of k0 loop */
706 } /* end of k1 loop */
707 } /* end of face 4 */
708 if (m_faceOffset[5] > -1) {
709 const double tmp0_22 = 0.16666666666666666667/h2;
710 const double tmp0_16 = 0.62200846792814621559/h2;
711 const double tmp0_33 = -0.044658198738520451079/h2;
712 const double tmp0_0 = -0.78867513459481288225/h0;
713 const double tmp0_21 = -0.16666666666666666667/h2;
714 const double tmp0_17 = 0.16666666666666666667/h2;
715 const double tmp0_1 = 0.78867513459481288225/h0;
716 const double tmp0_20 = -0.044658198738520451079/h2;
717 const double tmp0_14 = 0.21132486540518711775/h1;
718 const double tmp0_2 = -0.21132486540518711775/h0;
719 const double tmp0_27 = -0.16666666666666666667/h2;
720 const double tmp0_15 = 0.78867513459481288225/h1;
721 const double tmp0_3 = 0.21132486540518711775/h0;
722 const double tmp0_26 = -0.16666666666666666667/h2;
723 const double tmp0_12 = -0.21132486540518711775/h1;
724 const double tmp0_25 = 0.16666666666666666667/h2;
725 const double tmp0_13 = -0.78867513459481288225/h1;
726 const double tmp0_24 = 0.044658198738520451079/h2;
727 const double tmp0_10 = 0.78867513459481288225/h1;
728 const double tmp0_11 = 0.21132486540518711775/h1;
729 const double tmp0_34 = -0.62200846792814621559/h2;
730 const double tmp0_35 = -0.16666666666666666667/h2;
731 const double tmp0_8 = -0.78867513459481288225/h1;
732 const double tmp0_29 = -0.62200846792814621559/h2;
733 const double tmp0_9 = -0.21132486540518711775/h1;
734 const double tmp0_30 = 0.044658198738520451079/h2;
735 const double tmp0_28 = -0.044658198738520451079/h2;
736 const double tmp0_32 = 0.62200846792814621559/h2;
737 const double tmp0_31 = 0.16666666666666666667/h2;
738 const double tmp0_18 = 0.044658198738520451079/h2;
739 const double tmp0_4 = -0.21132486540518711775/h0;
740 const double tmp0_19 = -0.62200846792814621559/h2;
741 const double tmp0_5 = 0.21132486540518711775/h0;
742 const double tmp0_6 = -0.78867513459481288225/h0;
743 const double tmp0_23 = 0.62200846792814621559/h2;
744 const double tmp0_7 = 0.78867513459481288225/h0;
745 #pragma omp parallel for
746 for (index_t k1=0; k1 < m_NE1; ++k1) {
747 for (index_t k0=0; k0 < m_NE0; ++k0) {
748 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
749 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
750 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
751 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
752 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
753 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
754 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
755 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
756 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
757 for (index_t i=0; i < numComp; ++i) {
758 o[INDEX3(i,0,0,numComp,3)] = f_001[i]*tmp0_0 + f_011[i]*tmp0_2 + f_101[i]*tmp0_1 + f_111[i]*tmp0_3;
759 o[INDEX3(i,1,0,numComp,3)] = f_001[i]*tmp0_8 + f_011[i]*tmp0_10 + f_101[i]*tmp0_9 + f_111[i]*tmp0_11;
760 o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_19 + f_001[i]*tmp0_16 + f_110[i]*tmp0_20 + f_111[i]*tmp0_18 + tmp0_17*(f_011[i] + f_101[i]) + tmp0_21*(f_010[i] + f_100[i]);
761 o[INDEX3(i,0,1,numComp,3)] = f_001[i]*tmp0_0 + f_011[i]*tmp0_2 + f_101[i]*tmp0_1 + f_111[i]*tmp0_3;
762 o[INDEX3(i,1,1,numComp,3)] = f_001[i]*tmp0_12 + f_011[i]*tmp0_14 + f_101[i]*tmp0_13 + f_111[i]*tmp0_15;
763 o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_22 + f_010[i]*tmp0_28 + f_011[i]*tmp0_24 + f_100[i]*tmp0_29 + f_101[i]*tmp0_23 + f_110[i]*tmp0_27 + f_111[i]*tmp0_25;
764 o[INDEX3(i,0,2,numComp,3)] = f_001[i]*tmp0_4 + f_011[i]*tmp0_6 + f_101[i]*tmp0_5 + f_111[i]*tmp0_7;
765 o[INDEX3(i,1,2,numComp,3)] = f_001[i]*tmp0_8 + f_011[i]*tmp0_10 + f_101[i]*tmp0_9 + f_111[i]*tmp0_11;
766 o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_22 + f_010[i]*tmp0_29 + f_011[i]*tmp0_23 + f_100[i]*tmp0_28 + f_101[i]*tmp0_24 + f_110[i]*tmp0_27 + f_111[i]*tmp0_25;
767 o[INDEX3(i,0,3,numComp,3)] = f_001[i]*tmp0_4 + f_011[i]*tmp0_6 + f_101[i]*tmp0_5 + f_111[i]*tmp0_7;
768 o[INDEX3(i,1,3,numComp,3)] = f_001[i]*tmp0_12 + f_011[i]*tmp0_14 + f_101[i]*tmp0_13 + f_111[i]*tmp0_15;
769 o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_33 + f_001[i]*tmp0_30 + f_110[i]*tmp0_34 + f_111[i]*tmp0_32 + tmp0_31*(f_011[i] + f_101[i]) + tmp0_35*(f_010[i] + f_100[i]);
770 } /* end of component loop i */
771 } /* end of k0 loop */
772 } /* end of k1 loop */
773 } /* end of face 5 */
774 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
775 } else {
776 stringstream msg;
777 msg << "setToGradient() not implemented for "
778 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
779 throw RipleyException(msg.str());
780 }
781 }
782
783 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
784 bool reducedColOrder) const
785 {
786 if (reducedRowOrder || reducedColOrder)
787 throw RipleyException("getPattern() not implemented for reduced order");
788
789 throw RipleyException("getPattern() not implemented");
790 }
791
792 void Brick::Print_Mesh_Info(const bool full) const
793 {
794 RipleyDomain::Print_Mesh_Info(full);
795 if (full) {
796 cout << " Id Coordinates" << endl;
797 cout.precision(15);
798 cout.setf(ios::scientific, ios::floatfield);
799 pair<double,double> xdx = getFirstCoordAndSpacing(0);
800 pair<double,double> ydy = getFirstCoordAndSpacing(1);
801 pair<double,double> zdz = getFirstCoordAndSpacing(2);
802 for (index_t i=0; i < getNumNodes(); i++) {
803 cout << " " << setw(5) << m_nodeId[i]
804 << " " << xdx.first+(i%m_N0)*xdx.second
805 << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
806 << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
807 }
808 }
809 }
810
811 IndexVector Brick::getNumNodesPerDim() const
812 {
813 IndexVector ret;
814 ret.push_back(m_N0);
815 ret.push_back(m_N1);
816 ret.push_back(m_N2);
817 return ret;
818 }
819
820 IndexVector Brick::getNumElementsPerDim() const
821 {
822 IndexVector ret;
823 ret.push_back(m_NE0);
824 ret.push_back(m_NE1);
825 ret.push_back(m_NE2);
826 return ret;
827 }
828
829 IndexVector Brick::getNumFacesPerBoundary() const
830 {
831 IndexVector ret(6, 0);
832 //left
833 if (m_offset0==0)
834 ret[0]=m_NE1*m_NE2;
835 //right
836 if (m_mpiInfo->rank%m_NX==m_NX-1)
837 ret[1]=m_NE1*m_NE2;
838 //bottom
839 if (m_offset1==0)
840 ret[2]=m_NE0*m_NE2;
841 //top
842 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
843 ret[3]=m_NE0*m_NE2;
844 //front
845 if (m_offset2==0)
846 ret[4]=m_NE0*m_NE1;
847 //back
848 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
849 ret[5]=m_NE0*m_NE1;
850 return ret;
851 }
852
853 pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
854 {
855 if (dim==0)
856 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
857 else if (dim==1)
858 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
859 else if (dim==2)
860 return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
861
862 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
863 }
864
865
866 //protected
867 dim_t Brick::getNumFaceElements() const
868 {
869 dim_t n=0;
870 //left
871 if (m_offset0==0)
872 n+=m_NE1*m_NE2;
873 //right
874 if (m_mpiInfo->rank%m_NX==m_NX-1)
875 n+=m_NE1*m_NE2;
876 //bottom
877 if (m_offset1==0)
878 n+=m_NE0*m_NE2;
879 //top
880 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
881 n+=m_NE0*m_NE2;
882 //front
883 if (m_offset2==0)
884 n+=m_NE0*m_NE1;
885 //back
886 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
887 n+=m_NE0*m_NE1;
888
889 return n;
890 }
891
892 //protected
893 void Brick::assembleCoordinates(escript::Data& arg) const
894 {
895 escriptDataC x = arg.getDataC();
896 int numDim = m_numDim;
897 if (!isDataPointShapeEqual(&x, 1, &numDim))
898 throw RipleyException("setToX: Invalid Data object shape");
899 if (!numSamplesEqual(&x, 1, getNumNodes()))
900 throw RipleyException("setToX: Illegal number of samples in Data object");
901
902 pair<double,double> xdx = getFirstCoordAndSpacing(0);
903 pair<double,double> ydy = getFirstCoordAndSpacing(1);
904 pair<double,double> zdz = getFirstCoordAndSpacing(2);
905 arg.requireWrite();
906 #pragma omp parallel for
907 for (dim_t i2 = 0; i2 < m_N2; i2++) {
908 for (dim_t i1 = 0; i1 < m_N1; i1++) {
909 for (dim_t i0 = 0; i0 < m_N0; i0++) {
910 double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
911 point[0] = xdx.first+i0*xdx.second;
912 point[1] = ydy.first+i1*ydy.second;
913 point[2] = zdz.first+i2*zdz.second;
914 }
915 }
916 }
917 }
918
919 //private
920 void Brick::populateSampleIds()
921 {
922 // identifiers are ordered from left to right, bottom to top, front to back
923 // on each rank, except for the shared nodes which are owned by the rank
924 // below / to the left / to the front of the current rank
925
926 // build node distribution vector first.
927 // m_nodeDistribution[i] is the first node id on rank i, that is
928 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
929 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
930 m_nodeDistribution[1]=getNumNodes();
931 for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
932 const index_t x = k%m_NX;
933 const index_t y = k%(m_NX*m_NY)/m_NX;
934 const index_t z = k/(m_NX*m_NY);
935 index_t numNodes=getNumNodes();
936 if (x>0)
937 numNodes-=m_N1*m_N2;
938 if (y>0)
939 numNodes-=m_N0*m_N2;
940 if (z>0)
941 numNodes-=m_N0*m_N1;
942 // if an edge was subtracted twice add it back
943 if (x>0 && y>0)
944 numNodes+=m_N2;
945 if (x>0 && z>0)
946 numNodes+=m_N1;
947 if (y>0 && z>0)
948 numNodes+=m_N0;
949 // the corner node was removed 3x and added back 3x, so subtract it
950 if (x>0 && y>0 && z>0)
951 numNodes--;
952 m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
953 }
954 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
955
956 m_nodeId.resize(getNumNodes());
957
958 // the bottom, left and front planes are not owned by this rank so the
959 // identifiers need to be computed accordingly
960 const index_t left = (m_offset0==0 ? 0 : 1);
961 const index_t bottom = (m_offset1==0 ? 0 : 1);
962 const index_t front = (m_offset2==0 ? 0 : 1);
963
964 // case 1: all nodes on left plane are owned by rank on the left
965 if (left>0) {
966 const int neighbour=m_mpiInfo->rank-1;
967 const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
968 const index_t leftN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
969 #pragma omp parallel for
970 for (dim_t i2=front; i2<m_N2; i2++) {
971 for (dim_t i1=bottom; i1<m_N1; i1++) {
972 m_nodeId[i1*m_N0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
973 + (i1-bottom+1)*leftN0
974 + (i2-front)*leftN0*leftN1 - 1;
975 }
976 }
977 }
978 // case 2: all nodes on bottom plane are owned by rank below
979 if (bottom>0) {
980 const int neighbour=m_mpiInfo->rank-m_NX;
981 const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
982 const index_t bottomN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
983 #pragma omp parallel for
984 for (dim_t i2=front; i2<m_N2; i2++) {
985 for (dim_t i0=left; i0<m_N0; i0++) {
986 m_nodeId[i0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
987 + bottomN0*(bottomN1-1)
988 + (i2-front)*bottomN0*bottomN1 + i0-left;
989 }
990 }
991 }
992 // case 3: all nodes on front plane are owned by rank in front
993 if (front>0) {
994 const int neighbour=m_mpiInfo->rank-m_NX*m_NY;
995 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
996 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
997 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
998 #pragma omp parallel for
999 for (dim_t i1=bottom; i1<m_N1; i1++) {
1000 for (dim_t i0=left; i0<m_N0; i0++) {
1001 m_nodeId[i0+i1*m_N0]=m_nodeDistribution[neighbour]
1002 + N0*N1*(N2-1)+(i1-bottom)*N0 + i0-left;
1003 }
1004 }
1005 }
1006 // case 4: nodes on front bottom edge are owned by the corresponding rank
1007 if (front>0 && bottom>0) {
1008 const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1);
1009 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1010 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1011 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1012 #pragma omp parallel for
1013 for (dim_t i0=left; i0<m_N0; i0++) {
1014 m_nodeId[i0]=m_nodeDistribution[neighbour]
1015 + N0*N1*(N2-1)+(N1-1)*N0 + i0-left;
1016 }
1017 }
1018 // case 5: nodes on left bottom edge are owned by the corresponding rank
1019 if (left>0 && bottom>0) {
1020 const int neighbour=m_mpiInfo->rank-m_NX-1;
1021 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1022 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1023 #pragma omp parallel for
1024 for (dim_t i2=front; i2<m_N2; i2++) {
1025 m_nodeId[i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1026 + (1+i2-front)*N0*N1-1;
1027 }
1028 }
1029 // case 6: nodes on left front edge are owned by the corresponding rank
1030 if (left>0 && front>0) {
1031 const int neighbour=m_mpiInfo->rank-m_NX*m_NY-1;
1032 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1033 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1034 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1035 #pragma omp parallel for
1036 for (dim_t i1=bottom; i1<m_N1; i1++) {
1037 m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1038 + N0*N1*(N2-1)+N0-1+(i1-bottom)*N0;
1039 }
1040 }
1041 // case 7: bottom-left-front corner node owned by corresponding rank
1042 if (left>0 && bottom>0 && front>0) {
1043 const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1)-1;
1044 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1045 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1046 const index_t N2=(neighbour/(m_NX*m_NY) == 0 ? m_N2 : m_N2-1);
1047 m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1*N2-1;
1048 }
1049
1050 // the rest of the id's are contiguous
1051 const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1052 #pragma omp parallel for
1053 for (dim_t i2=front; i2<m_N2; i2++) {
1054 for (dim_t i1=bottom; i1<m_N1; i1++) {
1055 for (dim_t i0=left; i0<m_N0; i0++) {
1056 m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] = firstId+i0-left
1057 +(i1-bottom)*(m_N0-left)
1058 +(i2-front)*(m_N0-left)*(m_N1-bottom);
1059 }
1060 }
1061 }
1062
1063 // elements
1064 m_elementId.resize(getNumElements());
1065 #pragma omp parallel for
1066 for (dim_t k=0; k<getNumElements(); k++) {
1067 m_elementId[k]=k;
1068 }
1069
1070 // face elements
1071 m_faceId.resize(getNumFaceElements());
1072 #pragma omp parallel for
1073 for (dim_t k=0; k<getNumFaceElements(); k++) {
1074 m_faceId[k]=k;
1075 }
1076
1077 // generate face offset vector
1078 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1079 m_faceOffset.assign(facesPerEdge.size(), -1);
1080 index_t offset=0;
1081 for (size_t i=0; i<facesPerEdge.size(); i++) {
1082 if (facesPerEdge[i]>0) {
1083 m_faceOffset[i]=offset;
1084 offset+=facesPerEdge[i];
1085 }
1086 }
1087 }
1088
1089 //protected
1090 void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in) const
1091 {
1092 const dim_t numComp = in.getDataPointSize();
1093 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1094 const double tmp0_3 = 0.0094373878376559314545;
1095 const double tmp0_2 = 0.035220810900864519624;
1096 const double tmp0_1 = 0.13144585576580214704;
1097 const double tmp0_0 = 0.49056261216234406855;
1098 #pragma omp parallel for
1099 for (index_t k2=0; k2 < m_NE2; ++k2) {
1100 for (index_t k1=0; k1 < m_NE1; ++k1) {
1101 for (index_t k0=0; k0 < m_NE0; ++k0) {
1102 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1103 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1104 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1105 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1106 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1107 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1108 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1109 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1110 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1111 for (index_t i=0; i < numComp; ++i) {
1112 o[INDEX2(i,numComp,0)] = f_000[i]*tmp0_0 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i] + f_100[i]) + tmp0_2*(f_011[i] + f_101[i] + f_110[i]);
1113 o[INDEX2(i,numComp,1)] = f_011[i]*tmp0_3 + f_100[i]*tmp0_0 + tmp0_1*(f_000[i] + f_101[i] + f_110[i]) + tmp0_2*(f_001[i] + f_010[i] + f_111[i]);
1114 o[INDEX2(i,numComp,2)] = f_010[i]*tmp0_0 + f_101[i]*tmp0_3 + tmp0_1*(f_000[i] + f_011[i] + f_110[i]) + tmp0_2*(f_001[i] + f_100[i] + f_111[i]);
1115 o[INDEX2(i,numComp,3)] = f_001[i]*tmp0_3 + f_110[i]*tmp0_0 + tmp0_1*(f_010[i] + f_100[i] + f_111[i]) + tmp0_2*(f_000[i] + f_011[i] + f_101[i]);
1116 o[INDEX2(i,numComp,4)] = f_001[i]*tmp0_0 + f_110[i]*tmp0_3 + tmp0_1*(f_000[i] + f_011[i] + f_101[i]) + tmp0_2*(f_010[i] + f_100[i] + f_111[i]);
1117 o[INDEX2(i,numComp,5)] = f_010[i]*tmp0_3 + f_101[i]*tmp0_0 + tmp0_1*(f_001[i] + f_100[i] + f_111[i]) + tmp0_2*(f_000[i] + f_011[i] + f_110[i]);
1118 o[INDEX2(i,numComp,6)] = f_011[i]*tmp0_0 + f_100[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i] + f_111[i]) + tmp0_2*(f_000[i] + f_101[i] + f_110[i]);
1119 o[INDEX2(i,numComp,7)] = f_000[i]*tmp0_3 + f_111[i]*tmp0_0 + tmp0_1*(f_011[i] + f_101[i] + f_110[i]) + tmp0_2*(f_001[i] + f_010[i] + f_100[i]);
1120 } /* end of component loop i */
1121 } /* end of k0 loop */
1122 } /* end of k1 loop */
1123 } /* end of k2 loop */
1124 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1125 }
1126
1127 //protected
1128 void Brick::interpolateNodesOnFaces(escript::Data& out, escript::Data& in) const
1129 {
1130 const dim_t numComp = in.getDataPointSize();
1131 /* GENERATOR SNIP_INTERPOLATE_FACES TOP */
1132 if (m_faceOffset[0] > -1) {
1133 const double tmp0_2 = 0.044658198738520451079;
1134 const double tmp0_1 = 0.16666666666666666667;
1135 const double tmp0_0 = 0.62200846792814621559;
1136 #pragma omp parallel for
1137 for (index_t k2=0; k2 < m_NE2; ++k2) {
1138 for (index_t k1=0; k1 < m_NE1; ++k1) {
1139 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1140 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1141 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1142 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1143 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1144 for (index_t i=0; i < numComp; ++i) {
1145 o[INDEX2(i,numComp,0)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_2 + tmp0_1*(f_001[i] + f_010[i]);
1146 o[INDEX2(i,numComp,1)] = f_001[i]*tmp0_2 + f_010[i]*tmp0_0 + tmp0_1*(f_000[i] + f_011[i]);
1147 o[INDEX2(i,numComp,2)] = f_001[i]*tmp0_0 + f_010[i]*tmp0_2 + tmp0_1*(f_000[i] + f_011[i]);
1148 o[INDEX2(i,numComp,3)] = f_000[i]*tmp0_2 + f_011[i]*tmp0_0 + tmp0_1*(f_001[i] + f_010[i]);
1149 } /* end of component loop i */
1150 } /* end of k1 loop */
1151 } /* end of k2 loop */
1152 } /* end of face 0 */
1153 if (m_faceOffset[1] > -1) {
1154 const double tmp0_2 = 0.044658198738520451079;
1155 const double tmp0_1 = 0.62200846792814621559;
1156 const double tmp0_0 = 0.16666666666666666667;
1157 #pragma omp parallel for
1158 for (index_t k2=0; k2 < m_NE2; ++k2) {
1159 for (index_t k1=0; k1 < m_NE1; ++k1) {
1160 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1161 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1162 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1163 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
1164 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1165 for (index_t i=0; i < numComp; ++i) {
1166 o[INDEX2(i,numComp,0)] = f_100[i]*tmp0_1 + f_111[i]*tmp0_2 + tmp0_0*(f_101[i] + f_110[i]);
1167 o[INDEX2(i,numComp,1)] = f_101[i]*tmp0_2 + f_110[i]*tmp0_1 + tmp0_0*(f_100[i] + f_111[i]);
1168 o[INDEX2(i,numComp,2)] = f_101[i]*tmp0_1 + f_110[i]*tmp0_2 + tmp0_0*(f_100[i] + f_111[i]);
1169 o[INDEX2(i,numComp,3)] = f_100[i]*tmp0_2 + f_111[i]*tmp0_1 + tmp0_0*(f_101[i] + f_110[i]);
1170 } /* end of component loop i */
1171 } /* end of k1 loop */
1172 } /* end of k2 loop */
1173 } /* end of face 1 */
1174 if (m_faceOffset[2] > -1) {
1175 const double tmp0_2 = 0.044658198738520451079;
1176 const double tmp0_1 = 0.16666666666666666667;
1177 const double tmp0_0 = 0.62200846792814621559;
1178 #pragma omp parallel for
1179 for (index_t k2=0; k2 < m_NE2; ++k2) {
1180 for (index_t k0=0; k0 < m_NE0; ++k0) {
1181 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
1182 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
1183 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
1184 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
1185 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1186 for (index_t i=0; i < numComp; ++i) {
1187 o[INDEX2(i,numComp,0)] = f_000[i]*tmp0_0 + f_101[i]*tmp0_2 + tmp0_1*(f_001[i] + f_100[i]);
1188 o[INDEX2(i,numComp,1)] = f_001[i]*tmp0_2 + f_100[i]*tmp0_0 + tmp0_1*(f_000[i] + f_101[i]);
1189 o[INDEX2(i,numComp,2)] = f_001[i]*tmp0_0 + f_100[i]*tmp0_2 + tmp0_1*(f_000[i] + f_101[i]);
1190 o[INDEX2(i,numComp,3)] = f_000[i]*tmp0_2 + f_101[i]*tmp0_0 + tmp0_1*(f_001[i] + f_100[i]);
1191 } /* end of component loop i */
1192 } /* end of k0 loop */
1193 } /* end of k2 loop */
1194 } /* end of face 2 */
1195 if (m_faceOffset[3] > -1) {
1196 const double tmp0_2 = 0.044658198738520451079;
1197 const double tmp0_1 = 0.62200846792814621559;
1198 const double tmp0_0 = 0.16666666666666666667;
1199 #pragma omp parallel for
1200 for (index_t k2=0; k2 < m_NE2; ++k2) {
1201 for (index_t k0=0; k0 < m_NE0; ++k0) {
1202 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
1203 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
1204 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
1205 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
1206 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1207 for (index_t i=0; i < numComp; ++i) {
1208 o[INDEX2(i,numComp,0)] = f_010[i]*tmp0_1 + f_111[i]*tmp0_2 + tmp0_0*(f_011[i] + f_110[i]);
1209 o[INDEX2(i,numComp,1)] = f_011[i]*tmp0_2 + f_110[i]*tmp0_1 + tmp0_0*(f_010[i] + f_111[i]);
1210 o[INDEX2(i,numComp,2)] = f_011[i]*tmp0_1 + f_110[i]*tmp0_2 + tmp0_0*(f_010[i] + f_111[i]);
1211 o[INDEX2(i,numComp,3)] = f_010[i]*tmp0_2 + f_111[i]*tmp0_1 + tmp0_0*(f_011[i] + f_110[i]);
1212 } /* end of component loop i */
1213 } /* end of k0 loop */
1214 } /* end of k2 loop */
1215 } /* end of face 3 */
1216 if (m_faceOffset[4] > -1) {
1217 const double tmp0_2 = 0.044658198738520451079;
1218 const double tmp0_1 = 0.16666666666666666667;
1219 const double tmp0_0 = 0.62200846792814621559;
1220 #pragma omp parallel for
1221 for (index_t k1=0; k1 < m_NE1; ++k1) {
1222 for (index_t k0=0; k0 < m_NE0; ++k0) {
1223 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
1224 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
1225 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
1226 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
1227 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1228 for (index_t i=0; i < numComp; ++i) {
1229 o[INDEX2(i,numComp,0)] = f_000[i]*tmp0_0 + f_110[i]*tmp0_2 + tmp0_1*(f_010[i] + f_100[i]);
1230 o[INDEX2(i,numComp,1)] = f_010[i]*tmp0_2 + f_100[i]*tmp0_0 + tmp0_1*(f_000[i] + f_110[i]);
1231 o[INDEX2(i,numComp,2)] = f_010[i]*tmp0_0 + f_100[i]*tmp0_2 + tmp0_1*(f_000[i] + f_110[i]);
1232 o[INDEX2(i,numComp,3)] = f_000[i]*tmp0_2 + f_110[i]*tmp0_0 + tmp0_1*(f_010[i] + f_100[i]);
1233 } /* end of component loop i */
1234 } /* end of k0 loop */
1235 } /* end of k1 loop */
1236 } /* end of face 4 */
1237 if (m_faceOffset[5] > -1) {
1238 const double tmp0_2 = 0.044658198738520451079;
1239 const double tmp0_1 = 0.16666666666666666667;
1240 const double tmp0_0 = 0.62200846792814621559;
1241 #pragma omp parallel for
1242 for (index_t k1=0; k1 < m_NE1; ++k1) {
1243 for (index_t k0=0; k0 < m_NE0; ++k0) {
1244 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
1245 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
1246 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
1247 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
1248 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1249 for (index_t i=0; i < numComp; ++i) {
1250 o[INDEX2(i,numComp,0)] = f_001[i]*tmp0_0 + f_111[i]*tmp0_2 + tmp0_1*(f_011[i] + f_101[i]);
1251 o[INDEX2(i,numComp,1)] = f_011[i]*tmp0_2 + f_101[i]*tmp0_0 + tmp0_1*(f_001[i] + f_111[i]);
1252 o[INDEX2(i,numComp,2)] = f_011[i]*tmp0_0 + f_101[i]*tmp0_2 + tmp0_1*(f_001[i] + f_111[i]);
1253 o[INDEX2(i,numComp,3)] = f_001[i]*tmp0_2 + f_111[i]*tmp0_0 + tmp0_1*(f_011[i] + f_101[i]);
1254 } /* end of component loop i */
1255 } /* end of k0 loop */
1256 } /* end of k1 loop */
1257 } /* end of face 5 */
1258 /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1259 }
1260
1261 } // end of namespace ripley
1262

  ViewVC Help
Powered by ViewVC 1.1.26