/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3756 - (show annotations)
Fri Jan 6 02:35:19 2012 UTC (7 years, 3 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Brick.cpp
File size: 377442 byte(s)
Fixed interpolation from DOF to nodes and moved common code to the base class.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Brick.h>
15 extern "C" {
16 #include "paso/SystemMatrix.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33 int d1, int d2) :
34 RipleyDomain(3),
35 m_gNE0(n0),
36 m_gNE1(n1),
37 m_gNE2(n2),
38 m_l0(l0),
39 m_l1(l1),
40 m_l2(l2),
41 m_NX(d0),
42 m_NY(d1),
43 m_NZ(d2)
44 {
45 // ensure number of subdivisions is valid and nodes can be distributed
46 // among number of ranks
47 if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48 throw RipleyException("Invalid number of spatial subdivisions");
49
50 if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0 || (n2+1)%m_NZ > 0)
51 throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension");
52
53 if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2) || (m_NZ > 1 && (n2+1)/m_NZ<2))
54 throw RipleyException("Too few elements for the number of ranks");
55
56 // local number of elements (including overlap)
57 m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0);
58 if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1)
59 m_NE0++;
60 m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1);
61 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX>0 && m_mpiInfo->rank%(m_NX*m_NY)/m_NX<m_NY-1)
62 m_NE1++;
63 m_NE2 = (m_NZ>1 ? (n2+1)/m_NZ : n2);
64 if (m_mpiInfo->rank/(m_NX*m_NY)>0 && m_mpiInfo->rank/(m_NX*m_NY)<m_NZ-1)
65 m_NE2++;
66
67 // local number of nodes
68 m_N0 = m_NE0+1;
69 m_N1 = m_NE1+1;
70 m_N2 = m_NE2+1;
71
72 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
73 m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX);
74 if (m_offset0 > 0)
75 m_offset0--;
76 m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
77 if (m_offset1 > 0)
78 m_offset1--;
79 m_offset2 = (n2+1)/m_NZ*(m_mpiInfo->rank/(m_NX*m_NY));
80 if (m_offset2 > 0)
81 m_offset2--;
82
83 populateSampleIds();
84 createPattern();
85 }
86
87
88 Brick::~Brick()
89 {
90 }
91
92 string Brick::getDescription() const
93 {
94 return "ripley::Brick";
95 }
96
97 bool Brick::operator==(const AbstractDomain& other) const
98 {
99 const Brick* o=dynamic_cast<const Brick*>(&other);
100 if (o) {
101 return (RipleyDomain::operator==(other) &&
102 m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 && m_gNE2==o->m_gNE2
103 && m_l0==o->m_l0 && m_l1==o->m_l1 && m_l2==o->m_l2
104 && m_NX==o->m_NX && m_NY==o->m_NY && m_NZ==o->m_NZ);
105 }
106
107 return false;
108 }
109
110 void Brick::dump(const string& fileName) const
111 {
112 #if USE_SILO
113 string fn(fileName);
114 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
115 fn+=".silo";
116 }
117
118 const int NUM_SILO_FILES = 1;
119 const char* blockDirFmt = "/block%04d";
120 int driver=DB_HDF5;
121 string siloPath;
122 DBfile* dbfile = NULL;
123
124 #ifdef ESYS_MPI
125 PMPIO_baton_t* baton = NULL;
126 #endif
127
128 if (m_mpiInfo->size > 1) {
129 #ifdef ESYS_MPI
130 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
131 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
132 PMPIO_DefaultClose, (void*)&driver);
133 // try the fallback driver in case of error
134 if (!baton && driver != DB_PDB) {
135 driver = DB_PDB;
136 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
137 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
138 PMPIO_DefaultClose, (void*)&driver);
139 }
140 if (baton) {
141 char str[64];
142 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
143 siloPath = str;
144 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
145 }
146 #endif
147 } else {
148 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
149 getDescription().c_str(), driver);
150 // try the fallback driver in case of error
151 if (!dbfile && driver != DB_PDB) {
152 driver = DB_PDB;
153 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
154 getDescription().c_str(), driver);
155 }
156 }
157
158 if (!dbfile)
159 throw RipleyException("dump: Could not create Silo file");
160
161 /*
162 if (driver==DB_HDF5) {
163 // gzip level 1 already provides good compression with minimal
164 // performance penalty. Some tests showed that gzip levels >3 performed
165 // rather badly on escript data both in terms of time and space
166 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
167 }
168 */
169
170 boost::scoped_ptr<double> x(new double[m_N0]);
171 boost::scoped_ptr<double> y(new double[m_N1]);
172 boost::scoped_ptr<double> z(new double[m_N2]);
173 double* coords[3] = { x.get(), y.get(), z.get() };
174 pair<double,double> xdx = getFirstCoordAndSpacing(0);
175 pair<double,double> ydy = getFirstCoordAndSpacing(1);
176 pair<double,double> zdz = getFirstCoordAndSpacing(2);
177 #pragma omp parallel
178 {
179 #pragma omp for
180 for (dim_t i0 = 0; i0 < m_N0; i0++) {
181 coords[0][i0]=xdx.first+i0*xdx.second;
182 }
183 #pragma omp for
184 for (dim_t i1 = 0; i1 < m_N1; i1++) {
185 coords[1][i1]=ydy.first+i1*ydy.second;
186 }
187 #pragma omp for
188 for (dim_t i2 = 0; i2 < m_N2; i2++) {
189 coords[2][i2]=zdz.first+i2*zdz.second;
190 }
191 }
192 IndexVector dims = getNumNodesPerDim();
193 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
194 DB_COLLINEAR, NULL);
195
196 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
197 NULL, 0, DB_INT, DB_NODECENT, NULL);
198
199 // write element ids
200 dims = getNumElementsPerDim();
201 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
202 &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
203
204 // rank 0 writes multimesh and multivar
205 if (m_mpiInfo->rank == 0) {
206 vector<string> tempstrings;
207 vector<char*> names;
208 for (dim_t i=0; i<m_mpiInfo->size; i++) {
209 stringstream path;
210 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
211 tempstrings.push_back(path.str());
212 names.push_back((char*)tempstrings.back().c_str());
213 }
214 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
215 DBSetDir(dbfile, "/");
216 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
217 &types[0], NULL);
218 tempstrings.clear();
219 names.clear();
220 for (dim_t i=0; i<m_mpiInfo->size; i++) {
221 stringstream path;
222 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
223 tempstrings.push_back(path.str());
224 names.push_back((char*)tempstrings.back().c_str());
225 }
226 types.assign(m_mpiInfo->size, DB_QUADVAR);
227 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
228 &types[0], NULL);
229 tempstrings.clear();
230 names.clear();
231 for (dim_t i=0; i<m_mpiInfo->size; i++) {
232 stringstream path;
233 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
234 tempstrings.push_back(path.str());
235 names.push_back((char*)tempstrings.back().c_str());
236 }
237 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
238 &types[0], NULL);
239 }
240
241 if (m_mpiInfo->size > 1) {
242 #ifdef ESYS_MPI
243 PMPIO_HandOffBaton(baton, dbfile);
244 PMPIO_Finish(baton);
245 #endif
246 } else {
247 DBClose(dbfile);
248 }
249
250 #else // USE_SILO
251 throw RipleyException("dump(): no Silo support");
252 #endif
253 }
254
255 const int* Brick::borrowSampleReferenceIDs(int fsType) const
256 {
257 switch (fsType) {
258 case Nodes:
259 case ReducedNodes: //FIXME: reduced
260 return &m_nodeId[0];
261 case DegreesOfFreedom: //FIXME
262 case ReducedDegreesOfFreedom: //FIXME
263 return &m_dofId[0];
264 case Elements:
265 case ReducedElements:
266 return &m_elementId[0];
267 case ReducedFaceElements:
268 return &m_faceId[0];
269 default:
270 break;
271 }
272
273 stringstream msg;
274 msg << "borrowSampleReferenceIDs() not implemented for function space type "
275 << fsType;
276 throw RipleyException(msg.str());
277 }
278
279 bool Brick::ownSample(int fsCode, index_t id) const
280 {
281 #ifdef ESYS_MPI
282 if (fsCode == Nodes) {
283 return (m_dofMap[id] < getNumDOF());
284 } else {
285 stringstream msg;
286 msg << "ownSample() not implemented for "
287 << functionSpaceTypeAsString(fsCode);
288 throw RipleyException(msg.str());
289 }
290 #else
291 return true;
292 #endif
293 }
294
295 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
296 {
297 escript::Data& in = *const_cast<escript::Data*>(&cIn);
298 const dim_t numComp = in.getDataPointSize();
299 const double h0 = m_l0/m_gNE0;
300 const double h1 = m_l1/m_gNE1;
301 const double h2 = m_l1/m_gNE2;
302 const double C0 = .044658198738520451079;
303 const double C1 = .16666666666666666667;
304 const double C2 = .21132486540518711775;
305 const double C3 = .25;
306 const double C4 = .5;
307 const double C5 = .62200846792814621559;
308 const double C6 = .78867513459481288225;
309
310 if (out.getFunctionSpace().getTypeCode() == Elements) {
311 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
312 #pragma omp parallel for
313 for (index_t k2=0; k2 < m_NE2; ++k2) {
314 for (index_t k1=0; k1 < m_NE1; ++k1) {
315 for (index_t k0=0; k0 < m_NE0; ++k0) {
316 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
317 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
318 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
319 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
320 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
321 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
322 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
323 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
324 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
325 for (index_t i=0; i < numComp; ++i) {
326 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
327 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
328 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
329 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
330 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
331 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
332 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
333 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
334 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
335 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
336 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
337 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
338 o[INDEX3(i,0,0,numComp,3)] = V0;
339 o[INDEX3(i,1,0,numComp,3)] = V4;
340 o[INDEX3(i,2,0,numComp,3)] = V8;
341 o[INDEX3(i,0,1,numComp,3)] = V0;
342 o[INDEX3(i,1,1,numComp,3)] = V5;
343 o[INDEX3(i,2,1,numComp,3)] = V9;
344 o[INDEX3(i,0,2,numComp,3)] = V1;
345 o[INDEX3(i,1,2,numComp,3)] = V4;
346 o[INDEX3(i,2,2,numComp,3)] = V10;
347 o[INDEX3(i,0,3,numComp,3)] = V1;
348 o[INDEX3(i,1,3,numComp,3)] = V5;
349 o[INDEX3(i,2,3,numComp,3)] = V11;
350 o[INDEX3(i,0,4,numComp,3)] = V2;
351 o[INDEX3(i,1,4,numComp,3)] = V6;
352 o[INDEX3(i,2,4,numComp,3)] = V8;
353 o[INDEX3(i,0,5,numComp,3)] = V2;
354 o[INDEX3(i,1,5,numComp,3)] = V7;
355 o[INDEX3(i,2,5,numComp,3)] = V9;
356 o[INDEX3(i,0,6,numComp,3)] = V3;
357 o[INDEX3(i,1,6,numComp,3)] = V6;
358 o[INDEX3(i,2,6,numComp,3)] = V10;
359 o[INDEX3(i,0,7,numComp,3)] = V3;
360 o[INDEX3(i,1,7,numComp,3)] = V7;
361 o[INDEX3(i,2,7,numComp,3)] = V11;
362 } /* end of component loop i */
363 } /* end of k0 loop */
364 } /* end of k1 loop */
365 } /* end of k2 loop */
366 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
367 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
368 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
369 #pragma omp parallel for
370 for (index_t k2=0; k2 < m_NE2; ++k2) {
371 for (index_t k1=0; k1 < m_NE1; ++k1) {
372 for (index_t k0=0; k0 < m_NE0; ++k0) {
373 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
374 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
375 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
376 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
377 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
378 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
379 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
380 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
381 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
382 for (index_t i=0; i < numComp; ++i) {
383 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
384 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
385 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
386 } /* end of component loop i */
387 } /* end of k0 loop */
388 } /* end of k1 loop */
389 } /* end of k2 loop */
390 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
391 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
392 /*** GENERATOR SNIP_GRAD_FACES TOP */
393 #pragma omp parallel
394 {
395 if (m_faceOffset[0] > -1) {
396 #pragma omp for nowait
397 for (index_t k2=0; k2 < m_NE2; ++k2) {
398 for (index_t k1=0; k1 < m_NE1; ++k1) {
399 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
400 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
401 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
402 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
403 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
404 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
405 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
406 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
407 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
408 for (index_t i=0; i < numComp; ++i) {
409 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
410 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
411 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
412 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
413 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
414 o[INDEX3(i,1,0,numComp,3)] = V0;
415 o[INDEX3(i,2,0,numComp,3)] = V2;
416 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
417 o[INDEX3(i,1,1,numComp,3)] = V0;
418 o[INDEX3(i,2,1,numComp,3)] = V3;
419 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
420 o[INDEX3(i,1,2,numComp,3)] = V1;
421 o[INDEX3(i,2,2,numComp,3)] = V2;
422 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
423 o[INDEX3(i,1,3,numComp,3)] = V1;
424 o[INDEX3(i,2,3,numComp,3)] = V3;
425 } /* end of component loop i */
426 } /* end of k1 loop */
427 } /* end of k2 loop */
428 } /* end of face 0 */
429 if (m_faceOffset[1] > -1) {
430 #pragma omp for nowait
431 for (index_t k2=0; k2 < m_NE2; ++k2) {
432 for (index_t k1=0; k1 < m_NE1; ++k1) {
433 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
434 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
435 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
436 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
437 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
438 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
439 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
440 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
441 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
442 for (index_t i=0; i < numComp; ++i) {
443 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
444 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
445 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
446 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
447 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
448 o[INDEX3(i,1,0,numComp,3)] = V0;
449 o[INDEX3(i,2,0,numComp,3)] = V2;
450 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
451 o[INDEX3(i,1,1,numComp,3)] = V0;
452 o[INDEX3(i,2,1,numComp,3)] = V3;
453 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
454 o[INDEX3(i,1,2,numComp,3)] = V1;
455 o[INDEX3(i,2,2,numComp,3)] = V2;
456 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
457 o[INDEX3(i,1,3,numComp,3)] = V1;
458 o[INDEX3(i,2,3,numComp,3)] = V3;
459 } /* end of component loop i */
460 } /* end of k1 loop */
461 } /* end of k2 loop */
462 } /* end of face 1 */
463 if (m_faceOffset[2] > -1) {
464 #pragma omp for nowait
465 for (index_t k2=0; k2 < m_NE2; ++k2) {
466 for (index_t k0=0; k0 < m_NE0; ++k0) {
467 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
468 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
469 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
470 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
471 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
472 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
473 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
474 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
475 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
476 for (index_t i=0; i < numComp; ++i) {
477 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
478 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
479 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
480 o[INDEX3(i,0,0,numComp,3)] = V0;
481 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
482 o[INDEX3(i,2,0,numComp,3)] = V1;
483 o[INDEX3(i,0,1,numComp,3)] = V0;
484 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
485 o[INDEX3(i,2,1,numComp,3)] = V2;
486 o[INDEX3(i,0,2,numComp,3)] = V0;
487 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
488 o[INDEX3(i,2,2,numComp,3)] = V1;
489 o[INDEX3(i,0,3,numComp,3)] = V0;
490 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
491 o[INDEX3(i,2,3,numComp,3)] = V2;
492 } /* end of component loop i */
493 } /* end of k0 loop */
494 } /* end of k2 loop */
495 } /* end of face 2 */
496 if (m_faceOffset[3] > -1) {
497 #pragma omp for nowait
498 for (index_t k2=0; k2 < m_NE2; ++k2) {
499 for (index_t k0=0; k0 < m_NE0; ++k0) {
500 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
501 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
502 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
503 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
504 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
505 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
506 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
507 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
508 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
509 for (index_t i=0; i < numComp; ++i) {
510 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
511 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
512 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
513 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
514 o[INDEX3(i,0,0,numComp,3)] = V0;
515 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
516 o[INDEX3(i,2,0,numComp,3)] = V2;
517 o[INDEX3(i,0,1,numComp,3)] = V0;
518 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
519 o[INDEX3(i,2,1,numComp,3)] = V3;
520 o[INDEX3(i,0,2,numComp,3)] = V1;
521 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
522 o[INDEX3(i,2,2,numComp,3)] = V2;
523 o[INDEX3(i,0,3,numComp,3)] = V1;
524 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
525 o[INDEX3(i,2,3,numComp,3)] = V3;
526 } /* end of component loop i */
527 } /* end of k0 loop */
528 } /* end of k2 loop */
529 } /* end of face 3 */
530 if (m_faceOffset[4] > -1) {
531 #pragma omp for nowait
532 for (index_t k1=0; k1 < m_NE1; ++k1) {
533 for (index_t k0=0; k0 < m_NE0; ++k0) {
534 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
535 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
536 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
537 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
538 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
539 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
540 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
541 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
542 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
543 for (index_t i=0; i < numComp; ++i) {
544 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
545 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
546 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
547 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
548 o[INDEX3(i,0,0,numComp,3)] = V0;
549 o[INDEX3(i,1,0,numComp,3)] = V2;
550 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
551 o[INDEX3(i,0,1,numComp,3)] = V0;
552 o[INDEX3(i,1,1,numComp,3)] = V3;
553 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
554 o[INDEX3(i,0,2,numComp,3)] = V1;
555 o[INDEX3(i,1,2,numComp,3)] = V2;
556 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
557 o[INDEX3(i,0,3,numComp,3)] = V1;
558 o[INDEX3(i,1,3,numComp,3)] = V3;
559 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
560 } /* end of component loop i */
561 } /* end of k0 loop */
562 } /* end of k1 loop */
563 } /* end of face 4 */
564 if (m_faceOffset[5] > -1) {
565 #pragma omp for nowait
566 for (index_t k1=0; k1 < m_NE1; ++k1) {
567 for (index_t k0=0; k0 < m_NE0; ++k0) {
568 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
569 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
570 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
571 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
572 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
573 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
574 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
575 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
576 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
577 for (index_t i=0; i < numComp; ++i) {
578 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
579 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
580 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
581 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
582 o[INDEX3(i,0,0,numComp,3)] = V0;
583 o[INDEX3(i,1,0,numComp,3)] = V2;
584 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
585 o[INDEX3(i,0,1,numComp,3)] = V0;
586 o[INDEX3(i,1,1,numComp,3)] = V3;
587 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
588 o[INDEX3(i,0,2,numComp,3)] = V1;
589 o[INDEX3(i,1,2,numComp,3)] = V2;
590 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
591 o[INDEX3(i,0,3,numComp,3)] = V1;
592 o[INDEX3(i,1,3,numComp,3)] = V3;
593 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
594 } /* end of component loop i */
595 } /* end of k0 loop */
596 } /* end of k1 loop */
597 } /* end of face 5 */
598 } // end of parallel section
599 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
600 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
601 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
602 #pragma omp parallel
603 {
604 if (m_faceOffset[0] > -1) {
605 #pragma omp for nowait
606 for (index_t k2=0; k2 < m_NE2; ++k2) {
607 for (index_t k1=0; k1 < m_NE1; ++k1) {
608 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
609 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
610 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
611 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
612 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
613 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
614 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
615 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
616 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
617 for (index_t i=0; i < numComp; ++i) {
618 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
619 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
620 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
621 } /* end of component loop i */
622 } /* end of k1 loop */
623 } /* end of k2 loop */
624 } /* end of face 0 */
625 if (m_faceOffset[1] > -1) {
626 #pragma omp for nowait
627 for (index_t k2=0; k2 < m_NE2; ++k2) {
628 for (index_t k1=0; k1 < m_NE1; ++k1) {
629 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
630 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
631 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
632 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
633 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
634 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
635 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
636 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
637 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
638 for (index_t i=0; i < numComp; ++i) {
639 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
640 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
641 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
642 } /* end of component loop i */
643 } /* end of k1 loop */
644 } /* end of k2 loop */
645 } /* end of face 1 */
646 if (m_faceOffset[2] > -1) {
647 #pragma omp for nowait
648 for (index_t k2=0; k2 < m_NE2; ++k2) {
649 for (index_t k0=0; k0 < m_NE0; ++k0) {
650 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
651 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
652 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
653 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
654 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
655 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
656 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
657 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
658 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
659 for (index_t i=0; i < numComp; ++i) {
660 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
661 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
662 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
663 } /* end of component loop i */
664 } /* end of k0 loop */
665 } /* end of k2 loop */
666 } /* end of face 2 */
667 if (m_faceOffset[3] > -1) {
668 #pragma omp for nowait
669 for (index_t k2=0; k2 < m_NE2; ++k2) {
670 for (index_t k0=0; k0 < m_NE0; ++k0) {
671 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
672 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
673 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
674 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
675 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
676 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
677 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
678 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
679 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
680 for (index_t i=0; i < numComp; ++i) {
681 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
682 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
683 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
684 } /* end of component loop i */
685 } /* end of k0 loop */
686 } /* end of k2 loop */
687 } /* end of face 3 */
688 if (m_faceOffset[4] > -1) {
689 #pragma omp for nowait
690 for (index_t k1=0; k1 < m_NE1; ++k1) {
691 for (index_t k0=0; k0 < m_NE0; ++k0) {
692 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
693 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
694 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
695 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
696 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
697 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
698 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
699 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
700 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
701 for (index_t i=0; i < numComp; ++i) {
702 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
703 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
704 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
705 } /* end of component loop i */
706 } /* end of k0 loop */
707 } /* end of k1 loop */
708 } /* end of face 4 */
709 if (m_faceOffset[5] > -1) {
710 #pragma omp for nowait
711 for (index_t k1=0; k1 < m_NE1; ++k1) {
712 for (index_t k0=0; k0 < m_NE0; ++k0) {
713 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
714 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
715 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
716 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
717 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
718 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
719 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
720 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
721 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
722 for (index_t i=0; i < numComp; ++i) {
723 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
724 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
725 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
726 } /* end of component loop i */
727 } /* end of k0 loop */
728 } /* end of k1 loop */
729 } /* end of face 5 */
730 } // end of parallel section
731 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
732 } else {
733 stringstream msg;
734 msg << "setToGradient() not implemented for "
735 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
736 throw RipleyException(msg.str());
737 }
738 }
739
740 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
741 {
742 escript::Data& in = *const_cast<escript::Data*>(&arg);
743 const dim_t numComp = in.getDataPointSize();
744 const double h0 = m_l0/m_gNE0;
745 const double h1 = m_l1/m_gNE1;
746 const double h2 = m_l2/m_gNE2;
747 if (arg.getFunctionSpace().getTypeCode() == Elements) {
748 const double w_0 = h0*h1*h2/8.;
749 #pragma omp parallel
750 {
751 vector<double> int_local(numComp, 0);
752 #pragma omp for nowait
753 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
754 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
755 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
756 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
757 for (index_t i=0; i < numComp; ++i) {
758 const register double f_0 = f[INDEX2(i,0,numComp)];
759 const register double f_1 = f[INDEX2(i,1,numComp)];
760 const register double f_2 = f[INDEX2(i,2,numComp)];
761 const register double f_3 = f[INDEX2(i,3,numComp)];
762 const register double f_4 = f[INDEX2(i,4,numComp)];
763 const register double f_5 = f[INDEX2(i,5,numComp)];
764 const register double f_6 = f[INDEX2(i,6,numComp)];
765 const register double f_7 = f[INDEX2(i,7,numComp)];
766 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
767 } /* end of component loop i */
768 } /* end of k0 loop */
769 } /* end of k1 loop */
770 } /* end of k2 loop */
771
772 #pragma omp critical
773 for (index_t i=0; i<numComp; i++)
774 integrals[i]+=int_local[i];
775 } // end of parallel section
776 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
777 const double w_0 = h0*h1*h2;
778 #pragma omp parallel
779 {
780 vector<double> int_local(numComp, 0);
781 #pragma omp for nowait
782 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
783 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
784 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
785 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
786 for (index_t i=0; i < numComp; ++i) {
787 int_local[i]+=f[i]*w_0;
788 } /* end of component loop i */
789 } /* end of k0 loop */
790 } /* end of k1 loop */
791 } /* end of k2 loop */
792
793 #pragma omp critical
794 for (index_t i=0; i<numComp; i++)
795 integrals[i]+=int_local[i];
796 } // end of parallel section
797 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
798 const double w_0 = h1*h2/4.;
799 const double w_1 = h0*h2/4.;
800 const double w_2 = h0*h1/4.;
801 #pragma omp parallel
802 {
803 vector<double> int_local(numComp, 0);
804 if (m_faceOffset[0] > -1) {
805 #pragma omp for nowait
806 for (index_t k2=0; k2 < m_NE2; ++k2) {
807 for (index_t k1=0; k1 < m_NE1; ++k1) {
808 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
809 for (index_t i=0; i < numComp; ++i) {
810 const register double f_0 = f[INDEX2(i,0,numComp)];
811 const register double f_1 = f[INDEX2(i,1,numComp)];
812 const register double f_2 = f[INDEX2(i,2,numComp)];
813 const register double f_3 = f[INDEX2(i,3,numComp)];
814 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
815 } /* end of component loop i */
816 } /* end of k1 loop */
817 } /* end of k2 loop */
818 }
819
820 if (m_faceOffset[1] > -1) {
821 #pragma omp for nowait
822 for (index_t k2=0; k2 < m_NE2; ++k2) {
823 for (index_t k1=0; k1 < m_NE1; ++k1) {
824 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
825 for (index_t i=0; i < numComp; ++i) {
826 const register double f_0 = f[INDEX2(i,0,numComp)];
827 const register double f_1 = f[INDEX2(i,1,numComp)];
828 const register double f_2 = f[INDEX2(i,2,numComp)];
829 const register double f_3 = f[INDEX2(i,3,numComp)];
830 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
831 } /* end of component loop i */
832 } /* end of k1 loop */
833 } /* end of k2 loop */
834 }
835
836 if (m_faceOffset[2] > -1) {
837 #pragma omp for nowait
838 for (index_t k2=0; k2 < m_NE2; ++k2) {
839 for (index_t k0=0; k0 < m_NE0; ++k0) {
840 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
841 for (index_t i=0; i < numComp; ++i) {
842 const register double f_0 = f[INDEX2(i,0,numComp)];
843 const register double f_1 = f[INDEX2(i,1,numComp)];
844 const register double f_2 = f[INDEX2(i,2,numComp)];
845 const register double f_3 = f[INDEX2(i,3,numComp)];
846 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
847 } /* end of component loop i */
848 } /* end of k1 loop */
849 } /* end of k2 loop */
850 }
851
852 if (m_faceOffset[3] > -1) {
853 #pragma omp for nowait
854 for (index_t k2=0; k2 < m_NE2; ++k2) {
855 for (index_t k0=0; k0 < m_NE0; ++k0) {
856 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
857 for (index_t i=0; i < numComp; ++i) {
858 const register double f_0 = f[INDEX2(i,0,numComp)];
859 const register double f_1 = f[INDEX2(i,1,numComp)];
860 const register double f_2 = f[INDEX2(i,2,numComp)];
861 const register double f_3 = f[INDEX2(i,3,numComp)];
862 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
863 } /* end of component loop i */
864 } /* end of k1 loop */
865 } /* end of k2 loop */
866 }
867
868 if (m_faceOffset[4] > -1) {
869 #pragma omp for nowait
870 for (index_t k1=0; k1 < m_NE1; ++k1) {
871 for (index_t k0=0; k0 < m_NE0; ++k0) {
872 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
873 for (index_t i=0; i < numComp; ++i) {
874 const register double f_0 = f[INDEX2(i,0,numComp)];
875 const register double f_1 = f[INDEX2(i,1,numComp)];
876 const register double f_2 = f[INDEX2(i,2,numComp)];
877 const register double f_3 = f[INDEX2(i,3,numComp)];
878 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
879 } /* end of component loop i */
880 } /* end of k1 loop */
881 } /* end of k2 loop */
882 }
883
884 if (m_faceOffset[5] > -1) {
885 #pragma omp for nowait
886 for (index_t k1=0; k1 < m_NE1; ++k1) {
887 for (index_t k0=0; k0 < m_NE0; ++k0) {
888 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
889 for (index_t i=0; i < numComp; ++i) {
890 const register double f_0 = f[INDEX2(i,0,numComp)];
891 const register double f_1 = f[INDEX2(i,1,numComp)];
892 const register double f_2 = f[INDEX2(i,2,numComp)];
893 const register double f_3 = f[INDEX2(i,3,numComp)];
894 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
895 } /* end of component loop i */
896 } /* end of k1 loop */
897 } /* end of k2 loop */
898 }
899
900 #pragma omp critical
901 for (index_t i=0; i<numComp; i++)
902 integrals[i]+=int_local[i];
903 } // end of parallel section
904
905 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
906 const double w_0 = h1*h2;
907 const double w_1 = h0*h2;
908 const double w_2 = h0*h1;
909 #pragma omp parallel
910 {
911 vector<double> int_local(numComp, 0);
912 if (m_faceOffset[0] > -1) {
913 #pragma omp for nowait
914 for (index_t k2=0; k2 < m_NE2; ++k2) {
915 for (index_t k1=0; k1 < m_NE1; ++k1) {
916 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
917 for (index_t i=0; i < numComp; ++i) {
918 int_local[i]+=f[i]*w_0;
919 } /* end of component loop i */
920 } /* end of k1 loop */
921 } /* end of k2 loop */
922 }
923
924 if (m_faceOffset[1] > -1) {
925 #pragma omp for nowait
926 for (index_t k2=0; k2 < m_NE2; ++k2) {
927 for (index_t k1=0; k1 < m_NE1; ++k1) {
928 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
929 for (index_t i=0; i < numComp; ++i) {
930 int_local[i]+=f[i]*w_0;
931 } /* end of component loop i */
932 } /* end of k1 loop */
933 } /* end of k2 loop */
934 }
935
936 if (m_faceOffset[2] > -1) {
937 #pragma omp for nowait
938 for (index_t k2=0; k2 < m_NE2; ++k2) {
939 for (index_t k0=0; k0 < m_NE0; ++k0) {
940 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
941 for (index_t i=0; i < numComp; ++i) {
942 int_local[i]+=f[i]*w_1;
943 } /* end of component loop i */
944 } /* end of k1 loop */
945 } /* end of k2 loop */
946 }
947
948 if (m_faceOffset[3] > -1) {
949 #pragma omp for nowait
950 for (index_t k2=0; k2 < m_NE2; ++k2) {
951 for (index_t k0=0; k0 < m_NE0; ++k0) {
952 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
953 for (index_t i=0; i < numComp; ++i) {
954 int_local[i]+=f[i]*w_1;
955 } /* end of component loop i */
956 } /* end of k1 loop */
957 } /* end of k2 loop */
958 }
959
960 if (m_faceOffset[4] > -1) {
961 #pragma omp for nowait
962 for (index_t k1=0; k1 < m_NE1; ++k1) {
963 for (index_t k0=0; k0 < m_NE0; ++k0) {
964 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
965 for (index_t i=0; i < numComp; ++i) {
966 int_local[i]+=f[i]*w_2;
967 } /* end of component loop i */
968 } /* end of k1 loop */
969 } /* end of k2 loop */
970 }
971
972 if (m_faceOffset[5] > -1) {
973 #pragma omp for nowait
974 for (index_t k1=0; k1 < m_NE1; ++k1) {
975 for (index_t k0=0; k0 < m_NE0; ++k0) {
976 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
977 for (index_t i=0; i < numComp; ++i) {
978 int_local[i]+=f[i]*w_2;
979 } /* end of component loop i */
980 } /* end of k1 loop */
981 } /* end of k2 loop */
982 }
983
984 #pragma omp critical
985 for (index_t i=0; i<numComp; i++)
986 integrals[i]+=int_local[i];
987 } // end of parallel section
988
989 } else {
990 stringstream msg;
991 msg << "setToIntegrals() not implemented for "
992 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
993 throw RipleyException(msg.str());
994 }
995 }
996
997 void Brick::setToNormal(escript::Data& out) const
998 {
999 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1000 #pragma omp parallel
1001 {
1002 if (m_faceOffset[0] > -1) {
1003 #pragma omp for nowait
1004 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1005 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1006 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1007 // set vector at four quadrature points
1008 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1009 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1010 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1011 *o++ = -1.; *o++ = 0.; *o = 0.;
1012 }
1013 }
1014 }
1015
1016 if (m_faceOffset[1] > -1) {
1017 #pragma omp for nowait
1018 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1019 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1020 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1021 // set vector at four quadrature points
1022 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1023 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1024 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1025 *o++ = 1.; *o++ = 0.; *o = 0.;
1026 }
1027 }
1028 }
1029
1030 if (m_faceOffset[2] > -1) {
1031 #pragma omp for nowait
1032 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1033 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1034 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1035 // set vector at four quadrature points
1036 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1037 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1038 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1039 *o++ = 0.; *o++ = -1.; *o = 0.;
1040 }
1041 }
1042 }
1043
1044 if (m_faceOffset[3] > -1) {
1045 #pragma omp for nowait
1046 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1047 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1048 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1049 // set vector at four quadrature points
1050 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1051 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1052 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1053 *o++ = 0.; *o++ = 1.; *o = 0.;
1054 }
1055 }
1056 }
1057
1058 if (m_faceOffset[4] > -1) {
1059 #pragma omp for nowait
1060 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1061 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1062 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1063 // set vector at four quadrature points
1064 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1065 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1066 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1067 *o++ = 0.; *o++ = 0.; *o = -1.;
1068 }
1069 }
1070 }
1071
1072 if (m_faceOffset[5] > -1) {
1073 #pragma omp for nowait
1074 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1075 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1076 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1077 // set vector at four quadrature points
1078 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1079 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1080 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1081 *o++ = 0.; *o++ = 0.; *o = 1.;
1082 }
1083 }
1084 }
1085 } // end of parallel section
1086 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1087 #pragma omp parallel
1088 {
1089 if (m_faceOffset[0] > -1) {
1090 #pragma omp for nowait
1091 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1092 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1093 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1094 *o++ = -1.;
1095 *o++ = 0.;
1096 *o = 0.;
1097 }
1098 }
1099 }
1100
1101 if (m_faceOffset[1] > -1) {
1102 #pragma omp for nowait
1103 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1104 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1105 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1106 *o++ = 1.;
1107 *o++ = 0.;
1108 *o = 0.;
1109 }
1110 }
1111 }
1112
1113 if (m_faceOffset[2] > -1) {
1114 #pragma omp for nowait
1115 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1116 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1117 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1118 *o++ = 0.;
1119 *o++ = -1.;
1120 *o = 0.;
1121 }
1122 }
1123 }
1124
1125 if (m_faceOffset[3] > -1) {
1126 #pragma omp for nowait
1127 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1128 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1129 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1130 *o++ = 0.;
1131 *o++ = 1.;
1132 *o = 0.;
1133 }
1134 }
1135 }
1136
1137 if (m_faceOffset[4] > -1) {
1138 #pragma omp for nowait
1139 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1140 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1141 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1142 *o++ = 0.;
1143 *o++ = 0.;
1144 *o = -1.;
1145 }
1146 }
1147 }
1148
1149 if (m_faceOffset[5] > -1) {
1150 #pragma omp for nowait
1151 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1152 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1153 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1154 *o++ = 0.;
1155 *o++ = 0.;
1156 *o = 1.;
1157 }
1158 }
1159 }
1160 } // end of parallel section
1161
1162 } else {
1163 stringstream msg;
1164 msg << "setToNormal() not implemented for "
1165 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1166 throw RipleyException(msg.str());
1167 }
1168 }
1169
1170 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1171 bool reducedColOrder) const
1172 {
1173 if (reducedRowOrder || reducedColOrder)
1174 throw RipleyException("getPattern() not implemented for reduced order");
1175
1176 return m_pattern;
1177 }
1178
1179 void Brick::Print_Mesh_Info(const bool full) const
1180 {
1181 RipleyDomain::Print_Mesh_Info(full);
1182 if (full) {
1183 cout << " Id Coordinates" << endl;
1184 cout.precision(15);
1185 cout.setf(ios::scientific, ios::floatfield);
1186 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1187 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1188 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1189 for (index_t i=0; i < getNumNodes(); i++) {
1190 cout << " " << setw(5) << m_nodeId[i]
1191 << " " << xdx.first+(i%m_N0)*xdx.second
1192 << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1193 << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1194 }
1195 }
1196 }
1197
1198 IndexVector Brick::getNumNodesPerDim() const
1199 {
1200 IndexVector ret;
1201 ret.push_back(m_N0);
1202 ret.push_back(m_N1);
1203 ret.push_back(m_N2);
1204 return ret;
1205 }
1206
1207 IndexVector Brick::getNumElementsPerDim() const
1208 {
1209 IndexVector ret;
1210 ret.push_back(m_NE0);
1211 ret.push_back(m_NE1);
1212 ret.push_back(m_NE2);
1213 return ret;
1214 }
1215
1216 IndexVector Brick::getNumFacesPerBoundary() const
1217 {
1218 IndexVector ret(6, 0);
1219 //left
1220 if (m_offset0==0)
1221 ret[0]=m_NE1*m_NE2;
1222 //right
1223 if (m_mpiInfo->rank%m_NX==m_NX-1)
1224 ret[1]=m_NE1*m_NE2;
1225 //bottom
1226 if (m_offset1==0)
1227 ret[2]=m_NE0*m_NE2;
1228 //top
1229 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1230 ret[3]=m_NE0*m_NE2;
1231 //front
1232 if (m_offset2==0)
1233 ret[4]=m_NE0*m_NE1;
1234 //back
1235 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1236 ret[5]=m_NE0*m_NE1;
1237 return ret;
1238 }
1239
1240 pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1241 {
1242 if (dim==0)
1243 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1244 else if (dim==1)
1245 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1246 else if (dim==2)
1247 return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1248
1249 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1250 }
1251
1252 //protected
1253 dim_t Brick::getNumDOF() const
1254 {
1255 return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY*(m_gNE2+1)/m_NZ;
1256 }
1257
1258 //protected
1259 dim_t Brick::getNumFaceElements() const
1260 {
1261 const IndexVector faces = getNumFacesPerBoundary();
1262 dim_t n=0;
1263 for (size_t i=0; i<faces.size(); i++)
1264 n+=faces[i];
1265 return n;
1266 }
1267
1268 //protected
1269 void Brick::assembleCoordinates(escript::Data& arg) const
1270 {
1271 escriptDataC x = arg.getDataC();
1272 int numDim = m_numDim;
1273 if (!isDataPointShapeEqual(&x, 1, &numDim))
1274 throw RipleyException("setToX: Invalid Data object shape");
1275 if (!numSamplesEqual(&x, 1, getNumNodes()))
1276 throw RipleyException("setToX: Illegal number of samples in Data object");
1277
1278 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1279 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1280 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1281 arg.requireWrite();
1282 #pragma omp parallel for
1283 for (dim_t i2 = 0; i2 < m_N2; i2++) {
1284 for (dim_t i1 = 0; i1 < m_N1; i1++) {
1285 for (dim_t i0 = 0; i0 < m_N0; i0++) {
1286 double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1287 point[0] = xdx.first+i0*xdx.second;
1288 point[1] = ydy.first+i1*ydy.second;
1289 point[2] = zdz.first+i2*zdz.second;
1290 }
1291 }
1292 }
1293 }
1294
1295 //protected
1296 dim_t Brick::insertNeighbourNodes(IndexVector& index, index_t node) const
1297 {
1298 const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1299 const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1300 const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1301 const int x=node%nDOF0;
1302 const int y=node%(nDOF0*nDOF1)/nDOF0;
1303 const int z=node/(nDOF0*nDOF1);
1304 int num=0;
1305 // loop through potential neighbours and add to index if positions are
1306 // within bounds
1307 for (int i2=-1; i2<2; i2++) {
1308 for (int i1=-1; i1<2; i1++) {
1309 for (int i0=-1; i0<2; i0++) {
1310 // skip node itself
1311 if (i0==0 && i1==0 && i2==0)
1312 continue;
1313 // location of neighbour node
1314 const int nx=x+i0;
1315 const int ny=y+i1;
1316 const int nz=z+i2;
1317 if (nx>=0 && ny>=0 && nz>=0
1318 && nx<nDOF0 && ny<nDOF1 && nz<nDOF2) {
1319 index.push_back(nz*nDOF0*nDOF1+ny*nDOF0+nx);
1320 num++;
1321 }
1322 }
1323 }
1324 }
1325
1326 return num;
1327 }
1328
1329 //protected
1330 void Brick::nodesToDOF(escript::Data& out, escript::Data& in) const
1331 {
1332 const dim_t numComp = in.getDataPointSize();
1333 out.requireWrite();
1334
1335 const index_t left = (m_offset0==0 ? 0 : 1);
1336 const index_t bottom = (m_offset1==0 ? 0 : 1);
1337 const index_t front = (m_offset2==0 ? 0 : 1);
1338 const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1339 const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1340 const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1341 #pragma omp parallel for
1342 for (index_t i=0; i<nDOF2; i++) {
1343 for (index_t j=0; j<nDOF1; j++) {
1344 for (index_t k=0; k<nDOF0; k++) {
1345 const index_t n=k+left+(j+bottom)*m_N0+(i+front)*m_N0*m_N1;
1346 const double* src=in.getSampleDataRO(n);
1347 copy(src, src+numComp, out.getSampleDataRW(k+j*nDOF0+i*nDOF0*nDOF1));
1348 }
1349 }
1350 }
1351 }
1352
1353 //protected
1354 void Brick::dofToNodes(escript::Data& out, escript::Data& in) const
1355 {
1356 const dim_t numComp = in.getDataPointSize();
1357 Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp);
1358 in.requireWrite();
1359 Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0));
1360
1361 const dim_t numDOF = getNumDOF();
1362 out.requireWrite();
1363 const double* buffer = Paso_Coupler_finishCollect(coupler);
1364
1365 #pragma omp parallel for
1366 for (index_t i=0; i<getNumNodes(); i++) {
1367 const double* src=(m_dofMap[i]<numDOF ?
1368 in.getSampleDataRO(m_dofMap[i])
1369 : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1370 copy(src, src+numComp, out.getSampleDataRW(i));
1371 }
1372 }
1373
1374 //private
1375 void Brick::populateSampleIds()
1376 {
1377 // identifiers are ordered from left to right, bottom to top, front to back
1378 // globally
1379
1380 // build node distribution vector first.
1381 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1382 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1383 const dim_t numDOF=getNumDOF();
1384 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1385 m_nodeDistribution[k]=k*numDOF;
1386 }
1387 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1388 m_nodeId.resize(getNumNodes());
1389 m_dofId.resize(numDOF);
1390 m_elementId.resize(getNumElements());
1391 m_faceId.resize(getNumFaceElements());
1392
1393 #pragma omp parallel
1394 {
1395 #pragma omp for nowait
1396 // nodes
1397 for (dim_t i2=0; i2<m_N2; i2++) {
1398 for (dim_t i1=0; i1<m_N1; i1++) {
1399 for (dim_t i0=0; i0<m_N0; i0++) {
1400 m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] =
1401 (m_offset2+i2)*(m_gNE0+1)*(m_gNE1+1)
1402 +(m_offset1+i1)*(m_gNE0+1)
1403 +m_offset0+i0;
1404 }
1405 }
1406 }
1407
1408 // degrees of freedom
1409 #pragma omp for nowait
1410 for (dim_t k=0; k<numDOF; k++)
1411 m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k;
1412
1413 // elements
1414 #pragma omp for nowait
1415 for (dim_t i2=0; i2<m_NE2; i2++) {
1416 for (dim_t i1=0; i1<m_NE1; i1++) {
1417 for (dim_t i0=0; i0<m_NE0; i0++) {
1418 m_elementId[i0+i1*m_NE0+i2*m_NE0*m_NE1] =
1419 (m_offset2+i2)*m_gNE0*m_gNE1
1420 +(m_offset1+i1)*m_gNE0
1421 +m_offset0+i0;
1422 }
1423 }
1424 }
1425
1426 // face elements
1427 #pragma omp for
1428 for (dim_t k=0; k<getNumFaceElements(); k++)
1429 m_faceId[k]=k;
1430 } // end parallel section
1431
1432 m_nodeTags.assign(getNumNodes(), 0);
1433 updateTagsInUse(Nodes);
1434
1435 m_elementTags.assign(getNumElements(), 0);
1436 updateTagsInUse(Elements);
1437
1438 // generate face offset vector and set face tags
1439 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1440 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1441 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1442 m_faceOffset.assign(facesPerEdge.size(), -1);
1443 m_faceTags.clear();
1444 index_t offset=0;
1445 for (size_t i=0; i<facesPerEdge.size(); i++) {
1446 if (facesPerEdge[i]>0) {
1447 m_faceOffset[i]=offset;
1448 offset+=facesPerEdge[i];
1449 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1450 }
1451 }
1452 setTagMap("left", LEFT);
1453 setTagMap("right", RIGHT);
1454 setTagMap("bottom", BOTTOM);
1455 setTagMap("top", TOP);
1456 setTagMap("front", FRONT);
1457 setTagMap("back", BACK);
1458 updateTagsInUse(FaceElements);
1459 }
1460
1461 //private
1462 void Brick::createPattern()
1463 {
1464 const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1465 const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1466 const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1467 const index_t left = (m_offset0==0 ? 0 : 1);
1468 const index_t bottom = (m_offset1==0 ? 0 : 1);
1469 const index_t front = (m_offset2==0 ? 0 : 1);
1470
1471 // populate node->DOF mapping with own degrees of freedom.
1472 // The rest is assigned in the loop further down
1473 m_dofMap.assign(getNumNodes(), 0);
1474 #pragma omp parallel for
1475 for (index_t i=front; i<m_N2; i++) {
1476 for (index_t j=bottom; j<m_N1; j++) {
1477 for (index_t k=left; k<m_N0; k++) {
1478 m_dofMap[i*m_N0*m_N1+j*m_N0+k]=(i-front)*nDOF0*nDOF1+(j-bottom)*nDOF0+k-left;
1479 }
1480 }
1481 }
1482
1483 // build list of shared components and neighbours by looping through
1484 // all potential neighbouring ranks and checking if positions are
1485 // within bounds
1486 const dim_t numDOF=nDOF0*nDOF1*nDOF2;
1487 vector<IndexVector> colIndices(numDOF); // for the couple blocks
1488 RankVector neighbour;
1489 IndexVector offsetInShared(1,0);
1490 IndexVector sendShared, recvShared;
1491 int numShared=0;
1492 const int x=m_mpiInfo->rank%m_NX;
1493 const int y=m_mpiInfo->rank%(m_NX*m_NY)/m_NX;
1494 const int z=m_mpiInfo->rank/(m_NX*m_NY);
1495 for (int i2=-1; i2<2; i2++) {
1496 for (int i1=-1; i1<2; i1++) {
1497 for (int i0=-1; i0<2; i0++) {
1498 // skip this rank
1499 if (i0==0 && i1==0 && i2==0)
1500 continue;
1501 // location of neighbour rank
1502 const int nx=x+i0;
1503 const int ny=y+i1;
1504 const int nz=z+i2;
1505 if (nx>=0 && ny>=0 && nz>=0 && nx<m_NX && ny<m_NY && nz<m_NZ) {
1506 neighbour.push_back(nz*m_NX*m_NY+ny*m_NX+nx);
1507 if (i0==0 && i1==0) {
1508 // sharing front or back plane
1509 offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF1);
1510 for (dim_t i=0; i<nDOF1; i++) {
1511 const int firstDOF=(i2==-1 ? i*nDOF0
1512 : i*nDOF0 + nDOF0*nDOF1*(nDOF2-1));
1513 const int firstNode=(i2==-1 ? left+(i+bottom)*m_N0
1514 : left+(i+bottom)*m_N0+m_N0*m_N1*(m_N2-1));
1515 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1516 sendShared.push_back(firstDOF+j);
1517 recvShared.push_back(numDOF+numShared);
1518 if (j>0) {
1519 if (i>0)
1520 colIndices[firstDOF+j-1-nDOF0].push_back(numShared);
1521 colIndices[firstDOF+j-1].push_back(numShared);
1522 if (i<nDOF1-1)
1523 colIndices[firstDOF+j-1+nDOF0].push_back(numShared);
1524 }
1525 if (i>0)
1526 colIndices[firstDOF+j-nDOF0].push_back(numShared);
1527 colIndices[firstDOF+j].push_back(numShared);
1528 if (i<nDOF1-1)
1529 colIndices[firstDOF+j+nDOF0].push_back(numShared);
1530 if (j<nDOF0-1) {
1531 if (i>0)
1532 colIndices[firstDOF+j+1-nDOF0].push_back(numShared);
1533 colIndices[firstDOF+j+1].push_back(numShared);
1534 if (i<nDOF1-1)
1535 colIndices[firstDOF+j+1+nDOF0].push_back(numShared);
1536 }
1537 m_dofMap[firstNode+j]=numDOF+numShared;
1538 }
1539 }
1540 } else if (i0==0 && i2==0) {
1541 // sharing top or bottom plane
1542 offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF2);
1543 for (dim_t i=0; i<nDOF2; i++) {
1544 const int firstDOF=(i1==-1 ? i*nDOF0*nDOF1
1545 : nDOF0*((i+1)*nDOF1-1));
1546 const int firstNode=(i1==-1 ?
1547 left+(i+front)*m_N0*m_N1
1548 : left+m_N0*((i+1+front)*m_N1-1));
1549 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1550 sendShared.push_back(firstDOF+j);
1551 recvShared.push_back(numDOF+numShared);
1552 if (j>0) {
1553 if (i>0)
1554 colIndices[firstDOF+j-1-nDOF0*nDOF1].push_back(numShared);
1555 colIndices[firstDOF+j-1].push_back(numShared);
1556 if (i<nDOF2-1)
1557 colIndices[firstDOF+j-1+nDOF0*nDOF1].push_back(numShared);
1558 }
1559 if (i>0)
1560 colIndices[firstDOF+j-nDOF0*nDOF1].push_back(numShared);
1561 colIndices[firstDOF+j].push_back(numShared);
1562 if (i<nDOF2-1)
1563 colIndices[firstDOF+j+nDOF0*nDOF1].push_back(numShared);
1564 if (j<nDOF0-1) {
1565 if (i>0)
1566 colIndices[firstDOF+j+1-nDOF0*nDOF1].push_back(numShared);
1567 colIndices[firstDOF+j+1].push_back(numShared);
1568 if (i<nDOF2-1)
1569 colIndices[firstDOF+j+1+nDOF0*nDOF1].push_back(numShared);
1570 }
1571 m_dofMap[firstNode+j]=numDOF+numShared;
1572 }
1573 }
1574 } else if (i1==0 && i2==0) {
1575 // sharing left or right plane
1576 offsetInShared.push_back(offsetInShared.back()+nDOF1*nDOF2);
1577 for (dim_t i=0; i<nDOF2; i++) {
1578 const int firstDOF=(i0==-1 ? i*nDOF0*nDOF1
1579 : nDOF0*(1+i*nDOF1)-1);
1580 const int firstNode=(i0==-1 ?
1581 (bottom+(i+front)*m_N1)*m_N0
1582 : (bottom+1+(i+front)*m_N1)*m_N0-1);
1583 for (dim_t j=0; j<nDOF1; j++, numShared++) {
1584 sendShared.push_back(firstDOF+j*nDOF0);
1585 recvShared.push_back(numDOF+numShared);
1586 if (j>0) {
1587 if (i>0)
1588 colIndices[firstDOF+(j-1)*nDOF0-nDOF0*nDOF1].push_back(numShared);
1589 colIndices[firstDOF+(j-1)*nDOF0].push_back(numShared);
1590 if (i<nDOF2-1)
1591 colIndices[firstDOF+(j-1)*nDOF0+nDOF0*nDOF1].push_back(numShared);
1592 }
1593 if (i>0)
1594 colIndices[firstDOF+j*nDOF0-nDOF0*nDOF1].push_back(numShared);
1595 colIndices[firstDOF+j*nDOF0].push_back(numShared);
1596 if (i<nDOF2-1)
1597 colIndices[firstDOF+j*nDOF0+nDOF0*nDOF1].push_back(numShared);
1598 if (j<nDOF1-1) {
1599 if (i>0)
1600 colIndices[firstDOF+(j+1)*nDOF0-nDOF0*nDOF1].push_back(numShared);
1601 colIndices[firstDOF+(j+1)*nDOF0].push_back(numShared);
1602 if (i<nDOF2-1)
1603 colIndices[firstDOF+(j+1)*nDOF0+nDOF0*nDOF1].push_back(numShared);
1604 }
1605 m_dofMap[firstNode+j*m_N0]=numDOF+numShared;
1606 }
1607 }
1608 } else if (i0==0) {
1609 // sharing an edge in x direction
1610 offsetInShared.push_back(offsetInShared.back()+nDOF0);
1611 const int firstDOF=(i1+1)/2*nDOF0*(nDOF1-1)
1612 +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
1613 const int firstNode=(i1+1)/2*m_N0*(m_N1-1)
1614 +(i2+1)/2*m_N0*m_N1*(m_N2-1);
1615 for (dim_t i=0; i<nDOF0; i++, numShared++) {
1616 sendShared.push_back(firstDOF+i);
1617 recvShared.push_back(numDOF+numShared);
1618 if (i>0)
1619 colIndices[firstDOF+i-1].push_back(numShared);
1620 colIndices[firstDOF+i].push_back(numShared);
1621 if (i<nDOF0-1)
1622 colIndices[firstDOF+i+1].push_back(numShared);
1623 m_dofMap[firstNode+i]=numDOF+numShared;
1624 }
1625 } else if (i1==0) {
1626 // sharing an edge in y direction
1627 offsetInShared.push_back(offsetInShared.back()+nDOF1);
1628 const int firstDOF=(i0+1)/2*(nDOF0-1)
1629 +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
1630 const int firstNode=(i0+1)/2*(m_N0-1)
1631 +(i2+1)/2*m_N0*m_N1*(m_N2-1);
1632 for (dim_t i=0; i<nDOF1; i++, numShared++) {
1633 sendShared.push_back(firstDOF+i*nDOF0);
1634 recvShared.push_back(numDOF+numShared);
1635 if (i>0)
1636 colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared);
1637 colIndices[firstDOF+i*nDOF0].push_back(numShared);
1638 if (i<nDOF1-1)
1639 colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared);
1640 m_dofMap[firstNode+i*m_N0]=numDOF+numShared;
1641 }
1642 } else if (i2==0) {
1643 // sharing an edge in z direction
1644 offsetInShared.push_back(offsetInShared.back()+nDOF2);
1645 const int firstDOF=(i0+1)/2*(nDOF0-1)
1646 +(i1+1)/2*nDOF0*(nDOF1-1);
1647 const int firstNode=(i0+1)/2*(m_N0-1)
1648 +(i1+1)/2*m_N0*(m_N1-1);
1649 for (dim_t i=0; i<nDOF2; i++, numShared++) {
1650 sendShared.push_back(firstDOF+i*nDOF0*nDOF1);
1651 recvShared.push_back(numDOF+numShared);
1652 if (i>0)
1653 colIndices[firstDOF+(i-1)*nDOF0*nDOF1].push_back(numShared);
1654 colIndices[firstDOF+i*nDOF0*nDOF1].push_back(numShared);
1655 if (i<nDOF2-1)
1656 colIndices[firstDOF+(i+1)*nDOF0*nDOF1].push_back(numShared);
1657 m_dofMap[firstNode+i*m_N0*m_N1]=numDOF+numShared;
1658 }
1659 } else {
1660 // sharing a node
1661 const int dof=(i0+1)/2*(nDOF0-1)
1662 +(i1+1)/2*nDOF0*(nDOF1-1)
1663 +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
1664 const int node=(i0+1)/2*(m_N0-1)
1665 +(i1+1)/2*m_N0*(m_N1-1)
1666 +(i2+1)/2*m_N0*m_N1*(m_N2-1);
1667 offsetInShared.push_back(offsetInShared.back()+1);
1668 sendShared.push_back(dof);
1669 recvShared.push_back(numDOF+numShared);
1670 colIndices[dof].push_back(numShared);
1671 m_dofMap[node]=numDOF+numShared;
1672 ++numShared;
1673 }
1674 }
1675 }
1676 }
1677 }
1678
1679 // create connector
1680 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
1681 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
1682 &offsetInShared[0], 1, 0, m_mpiInfo);
1683 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
1684 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
1685 &offsetInShared[0], 1, 0, m_mpiInfo);
1686 m_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
1687 Paso_SharedComponents_free(snd_shcomp);
1688 Paso_SharedComponents_free(rcv_shcomp);
1689
1690 // create main and couple blocks
1691 Paso_Pattern *mainPattern = createMainPattern();
1692 Paso_Pattern *colPattern, *rowPattern;
1693 createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern);
1694
1695 // allocate paso distribution
1696 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
1697 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
1698
1699 // finally create the system matrix
1700 m_pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT,
1701 distribution, distribution, mainPattern, colPattern, rowPattern,
1702 m_connector, m_connector);
1703
1704 Paso_Distribution_free(distribution);
1705
1706 // useful debug output
1707 /*
1708 cout << "--- rcv_shcomp ---" << endl;
1709 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
1710 for (size_t i=0; i<neighbour.size(); i++) {
1711 cout << "neighbor[" << i << "]=" << neighbour[i]
1712 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
1713 }
1714 for (size_t i=0; i<recvShared.size(); i++) {
1715 cout << "shared[" << i << "]=" << recvShared[i] << endl;
1716 }
1717 cout << "--- snd_shcomp ---" << endl;
1718 for (size_t i=0; i<sendShared.size(); i++) {
1719 cout << "shared[" << i << "]=" << sendShared[i] << endl;
1720 }
1721 cout << "--- dofMap ---" << endl;
1722 for (size_t i=0; i<m_dofMap.size(); i++) {
1723 cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl;
1724 }
1725 cout << "--- colIndices ---" << endl;
1726 for (size_t i=0; i<colIndices.size(); i++) {
1727 cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl;
1728 }
1729 */
1730
1731 /*
1732 cout << "--- main_pattern ---" << endl;
1733 cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl;
1734 for (size_t i=0; i<mainPattern->numOutput+1; i++) {
1735 cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl;
1736 }
1737 for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) {
1738 cout << "index[" << i << "]=" << mainPattern->index[i] << endl;
1739 }
1740 */
1741
1742 /*
1743 cout << "--- colCouple_pattern ---" << endl;
1744 cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl;
1745 for (size_t i=0; i<colPattern->numOutput+1; i++) {
1746 cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl;
1747 }
1748 for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) {
1749 cout << "index[" << i << "]=" << colPattern->index[i] << endl;
1750 }
1751 */
1752
1753 /*
1754 cout << "--- rowCouple_pattern ---" << endl;
1755 cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl;
1756 for (size_t i=0; i<rowPattern->numOutput+1; i++) {
1757 cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl;
1758 }
1759 for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) {
1760 cout << "index[" << i << "]=" << rowPattern->index[i] << endl;
1761 }
1762 */
1763
1764 Paso_Pattern_free(mainPattern);
1765 Paso_Pattern_free(colPattern);
1766 Paso_Pattern_free(rowPattern);
1767 }
1768
1769 //protected
1770 void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in,
1771 bool reduced) const
1772 {
1773 const dim_t numComp = in.getDataPointSize();
1774 if (reduced) {
1775 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1776 const double c0 = .125;
1777 #pragma omp parallel for
1778 for (index_t k2=0; k2 < m_NE2; ++k2) {
1779 for (index_t k1=0; k1 < m_NE1; ++k1) {
1780 for (index_t k0=0; k0 < m_NE0; ++k0) {
1781 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1782 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1783 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1784 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1785 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1786 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1787 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1788 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1789 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1790 for (index_t i=0; i < numComp; ++i) {
1791 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i] + f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1792 } /* end of component loop i */
1793 } /* end of k0 loop */
1794 } /* end of k1 loop */
1795 } /* end of k2 loop */
1796 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1797 } else {
1798 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1799 const double c0 = .0094373878376559314545;
1800 const double c1 = .035220810900864519624;
1801 const double c2 = .13144585576580214704;
1802 const double c3 = .49056261216234406855;
1803 #pragma omp parallel for
1804 for (index_t k2=0; k2 < m_NE2; ++k2) {
1805 for (index_t k1=0; k1 < m_NE1; ++k1) {
1806 for (index_t k0=0; k0 < m_NE0; ++k0) {
1807 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1808 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1809 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1810 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1811 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1812 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1813 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1814 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1815 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1816 for (index_t i=0; i < numComp; ++i) {
1817 o[INDEX2(i,numComp,0)] = f_000[i]*c3 + f_111[i]*c0 + c2*(f_001[i] + f_010[i] + f_100[i]) + c1*(f_011[i] + f_101[i] + f_110[i]);
1818 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_100[i]*c3 + c2*(f_000[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_111[i]);
1819 o[INDEX2(i,numComp,2)] = f_010[i]*c3 + f_101[i]*c0 + c2*(f_000[i] + f_011[i] + f_110[i]) + c1*(f_001[i] + f_100[i] + f_111[i]);
1820 o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_110[i]*c3 + c2*(f_010[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_101[i]);
1821 o[INDEX2(i,numComp,4)] = f_001[i]*c3 + f_110[i]*c0 + c2*(f_000[i] + f_011[i] + f_101[i]) + c1*(f_010[i] + f_100[i] + f_111[i]);
1822 o[INDEX2(i,numComp,5)] = f_010[i]*c0 + f_101[i]*c3 + c2*(f_001[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_110[i]);
1823 o[INDEX2(i,numComp,6)] = f_011[i]*c3 + f_100[i]*c0 + c2*(f_001[i] + f_010[i] + f_111[i]) + c1*(f_000[i] + f_101[i] + f_110[i]);
1824 o[INDEX2(i,numComp,7)] = f_000[i]*c0 + f_111[i]*c3 + c2*(f_011[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_100[i]);
1825 } /* end of component loop i */
1826 } /* end of k0 loop */
1827 } /* end of k1 loop */
1828 } /* end of k2 loop */
1829 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1830 }
1831 }
1832
1833 //protected
1834 void Brick::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1835 bool reduced) const
1836 {
1837 const dim_t numComp = in.getDataPointSize();
1838 if (reduced) {
1839 const double c0 = .25;
1840 #pragma omp parallel
1841 {
1842 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1843 if (m_faceOffset[0] > -1) {
1844 #pragma omp for nowait
1845 for (index_t k2=0; k2 < m_NE2; ++k2) {
1846 for (index_t k1=0; k1 < m_NE1; ++k1) {
1847 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1848 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1849 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1850 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1851 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1852 for (index_t i=0; i < numComp; ++i) {
1853 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]);
1854 } /* end of component loop i */
1855 } /* end of k1 loop */
1856 } /* end of k2 loop */
1857 } /* end of face 0 */
1858 if (m_faceOffset[1] > -1) {
1859 #pragma omp for nowait
1860 for (index_t k2=0; k2 < m_NE2; ++k2) {
1861 for (index_t k1=0; k1 < m_NE1; ++k1) {
1862 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1863 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1864 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1865 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
1866 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1867 for (index_t i=0; i < numComp; ++i) {
1868 o[INDEX2(i,numComp,0)] = c0*(f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1869 } /* end of component loop i */
1870 } /* end of k1 loop */
1871 } /* end of k2 loop */
1872 } /* end of face 1 */
1873 if (m_faceOffset[2] > -1) {
1874 #pragma omp for nowait
1875 for (index_t k2=0; k2 < m_NE2; ++k2) {
1876 for (index_t k0=0; k0 < m_NE0; ++k0) {
1877 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
1878 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
1879 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
1880 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
1881 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1882 for (index_t i=0; i < numComp; ++i) {
1883 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_100[i] + f_101[i]);
1884 } /* end of component loop i */
1885 } /* end of k0 loop */
1886 } /* end of k2 loop */
1887 } /* end of face 2 */
1888