/[escript]/branches/ripleygmg_from_3668/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/ripleygmg_from_3668/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3757 - (show annotations)
Fri Jan 6 04:48:27 2012 UTC (7 years, 4 months ago) by caltinay
File size: 378071 byte(s)
Implemented ownSample() for DOF and Elements.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Brick.h>
15 extern "C" {
16 #include "paso/SystemMatrix.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33 int d1, int d2) :
34 RipleyDomain(3),
35 m_gNE0(n0),
36 m_gNE1(n1),
37 m_gNE2(n2),
38 m_l0(l0),
39 m_l1(l1),
40 m_l2(l2),
41 m_NX(d0),
42 m_NY(d1),
43 m_NZ(d2)
44 {
45 // ensure number of subdivisions is valid and nodes can be distributed
46 // among number of ranks
47 if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48 throw RipleyException("Invalid number of spatial subdivisions");
49
50 if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0 || (n2+1)%m_NZ > 0)
51 throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension");
52
53 if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2) || (m_NZ > 1 && (n2+1)/m_NZ<2))
54 throw RipleyException("Too few elements for the number of ranks");
55
56 // local number of elements (including overlap)
57 m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0);
58 if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1)
59 m_NE0++;
60 m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1);
61 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX>0 && m_mpiInfo->rank%(m_NX*m_NY)/m_NX<m_NY-1)
62 m_NE1++;
63 m_NE2 = (m_NZ>1 ? (n2+1)/m_NZ : n2);
64 if (m_mpiInfo->rank/(m_NX*m_NY)>0 && m_mpiInfo->rank/(m_NX*m_NY)<m_NZ-1)
65 m_NE2++;
66
67 // local number of nodes
68 m_N0 = m_NE0+1;
69 m_N1 = m_NE1+1;
70 m_N2 = m_NE2+1;
71
72 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
73 m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX);
74 if (m_offset0 > 0)
75 m_offset0--;
76 m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
77 if (m_offset1 > 0)
78 m_offset1--;
79 m_offset2 = (n2+1)/m_NZ*(m_mpiInfo->rank/(m_NX*m_NY));
80 if (m_offset2 > 0)
81 m_offset2--;
82
83 populateSampleIds();
84 createPattern();
85 }
86
87
88 Brick::~Brick()
89 {
90 }
91
92 string Brick::getDescription() const
93 {
94 return "ripley::Brick";
95 }
96
97 bool Brick::operator==(const AbstractDomain& other) const
98 {
99 const Brick* o=dynamic_cast<const Brick*>(&other);
100 if (o) {
101 return (RipleyDomain::operator==(other) &&
102 m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 && m_gNE2==o->m_gNE2
103 && m_l0==o->m_l0 && m_l1==o->m_l1 && m_l2==o->m_l2
104 && m_NX==o->m_NX && m_NY==o->m_NY && m_NZ==o->m_NZ);
105 }
106
107 return false;
108 }
109
110 void Brick::dump(const string& fileName) const
111 {
112 #if USE_SILO
113 string fn(fileName);
114 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
115 fn+=".silo";
116 }
117
118 const int NUM_SILO_FILES = 1;
119 const char* blockDirFmt = "/block%04d";
120 int driver=DB_HDF5;
121 string siloPath;
122 DBfile* dbfile = NULL;
123
124 #ifdef ESYS_MPI
125 PMPIO_baton_t* baton = NULL;
126 #endif
127
128 if (m_mpiInfo->size > 1) {
129 #ifdef ESYS_MPI
130 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
131 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
132 PMPIO_DefaultClose, (void*)&driver);
133 // try the fallback driver in case of error
134 if (!baton && driver != DB_PDB) {
135 driver = DB_PDB;
136 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
137 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
138 PMPIO_DefaultClose, (void*)&driver);
139 }
140 if (baton) {
141 char str[64];
142 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
143 siloPath = str;
144 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
145 }
146 #endif
147 } else {
148 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
149 getDescription().c_str(), driver);
150 // try the fallback driver in case of error
151 if (!dbfile && driver != DB_PDB) {
152 driver = DB_PDB;
153 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
154 getDescription().c_str(), driver);
155 }
156 }
157
158 if (!dbfile)
159 throw RipleyException("dump: Could not create Silo file");
160
161 /*
162 if (driver==DB_HDF5) {
163 // gzip level 1 already provides good compression with minimal
164 // performance penalty. Some tests showed that gzip levels >3 performed
165 // rather badly on escript data both in terms of time and space
166 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
167 }
168 */
169
170 boost::scoped_ptr<double> x(new double[m_N0]);
171 boost::scoped_ptr<double> y(new double[m_N1]);
172 boost::scoped_ptr<double> z(new double[m_N2]);
173 double* coords[3] = { x.get(), y.get(), z.get() };
174 pair<double,double> xdx = getFirstCoordAndSpacing(0);
175 pair<double,double> ydy = getFirstCoordAndSpacing(1);
176 pair<double,double> zdz = getFirstCoordAndSpacing(2);
177 #pragma omp parallel
178 {
179 #pragma omp for
180 for (dim_t i0 = 0; i0 < m_N0; i0++) {
181 coords[0][i0]=xdx.first+i0*xdx.second;
182 }
183 #pragma omp for
184 for (dim_t i1 = 0; i1 < m_N1; i1++) {
185 coords[1][i1]=ydy.first+i1*ydy.second;
186 }
187 #pragma omp for
188 for (dim_t i2 = 0; i2 < m_N2; i2++) {
189 coords[2][i2]=zdz.first+i2*zdz.second;
190 }
191 }
192 IndexVector dims = getNumNodesPerDim();
193 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
194 DB_COLLINEAR, NULL);
195
196 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
197 NULL, 0, DB_INT, DB_NODECENT, NULL);
198
199 // write element ids
200 dims = getNumElementsPerDim();
201 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
202 &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
203
204 // rank 0 writes multimesh and multivar
205 if (m_mpiInfo->rank == 0) {
206 vector<string> tempstrings;
207 vector<char*> names;
208 for (dim_t i=0; i<m_mpiInfo->size; i++) {
209 stringstream path;
210 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
211 tempstrings.push_back(path.str());
212 names.push_back((char*)tempstrings.back().c_str());
213 }
214 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
215 DBSetDir(dbfile, "/");
216 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
217 &types[0], NULL);
218 tempstrings.clear();
219 names.clear();
220 for (dim_t i=0; i<m_mpiInfo->size; i++) {
221 stringstream path;
222 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
223 tempstrings.push_back(path.str());
224 names.push_back((char*)tempstrings.back().c_str());
225 }
226 types.assign(m_mpiInfo->size, DB_QUADVAR);
227 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
228 &types[0], NULL);
229 tempstrings.clear();
230 names.clear();
231 for (dim_t i=0; i<m_mpiInfo->size; i++) {
232 stringstream path;
233 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
234 tempstrings.push_back(path.str());
235 names.push_back((char*)tempstrings.back().c_str());
236 }
237 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
238 &types[0], NULL);
239 }
240
241 if (m_mpiInfo->size > 1) {
242 #ifdef ESYS_MPI
243 PMPIO_HandOffBaton(baton, dbfile);
244 PMPIO_Finish(baton);
245 #endif
246 } else {
247 DBClose(dbfile);
248 }
249
250 #else // USE_SILO
251 throw RipleyException("dump(): no Silo support");
252 #endif
253 }
254
255 const int* Brick::borrowSampleReferenceIDs(int fsType) const
256 {
257 switch (fsType) {
258 case Nodes:
259 case ReducedNodes: //FIXME: reduced
260 return &m_nodeId[0];
261 case DegreesOfFreedom:
262 case ReducedDegreesOfFreedom: //FIXME: reduced
263 return &m_dofId[0];
264 case Elements:
265 case ReducedElements:
266 return &m_elementId[0];
267 case FaceElements:
268 case ReducedFaceElements:
269 return &m_faceId[0];
270 default:
271 break;
272 }
273
274 stringstream msg;
275 msg << "borrowSampleReferenceIDs() not implemented for function space type "
276 << fsType;
277 throw RipleyException(msg.str());
278 }
279
280 bool Brick::ownSample(int fsType, index_t id) const
281 {
282 #ifdef ESYS_MPI
283 switch (fsType) {
284 case Nodes:
285 case ReducedNodes: //FIXME: reduced
286 return (m_dofMap[id] < getNumDOF());
287 case DegreesOfFreedom:
288 case ReducedDegreesOfFreedom:
289 return true;
290 case Elements:
291 case ReducedElements:
292 {
293 // check ownership of element's _last_ node
294 const index_t x=id%m_NE0 + 1;
295 const index_t y=id%(m_NE0*m_NE1)/m_NE0 + 1;
296 const index_t z=id/(m_NE0*m_NE1) + 1;
297 return (m_dofMap[x + m_N0*y +m_N0*m_N1*z] < getNumDOF());
298 }
299 case FaceElements:
300 case ReducedFaceElements:
301 default:
302 break;
303 }
304
305 stringstream msg;
306 msg << "ownSample() not implemented for "
307 << functionSpaceTypeAsString(fsType);
308 throw RipleyException(msg.str());
309 #else
310 return true;
311 #endif
312 }
313
314 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
315 {
316 escript::Data& in = *const_cast<escript::Data*>(&cIn);
317 const dim_t numComp = in.getDataPointSize();
318 const double h0 = m_l0/m_gNE0;
319 const double h1 = m_l1/m_gNE1;
320 const double h2 = m_l1/m_gNE2;
321 const double C0 = .044658198738520451079;
322 const double C1 = .16666666666666666667;
323 const double C2 = .21132486540518711775;
324 const double C3 = .25;
325 const double C4 = .5;
326 const double C5 = .62200846792814621559;
327 const double C6 = .78867513459481288225;
328
329 if (out.getFunctionSpace().getTypeCode() == Elements) {
330 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
331 #pragma omp parallel for
332 for (index_t k2=0; k2 < m_NE2; ++k2) {
333 for (index_t k1=0; k1 < m_NE1; ++k1) {
334 for (index_t k0=0; k0 < m_NE0; ++k0) {
335 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
336 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
337 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
338 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
339 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
340 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
341 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
342 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
343 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
344 for (index_t i=0; i < numComp; ++i) {
345 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
346 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
347 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
348 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
349 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
350 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
351 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
352 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
353 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
354 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
355 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
356 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
357 o[INDEX3(i,0,0,numComp,3)] = V0;
358 o[INDEX3(i,1,0,numComp,3)] = V4;
359 o[INDEX3(i,2,0,numComp,3)] = V8;
360 o[INDEX3(i,0,1,numComp,3)] = V0;
361 o[INDEX3(i,1,1,numComp,3)] = V5;
362 o[INDEX3(i,2,1,numComp,3)] = V9;
363 o[INDEX3(i,0,2,numComp,3)] = V1;
364 o[INDEX3(i,1,2,numComp,3)] = V4;
365 o[INDEX3(i,2,2,numComp,3)] = V10;
366 o[INDEX3(i,0,3,numComp,3)] = V1;
367 o[INDEX3(i,1,3,numComp,3)] = V5;
368 o[INDEX3(i,2,3,numComp,3)] = V11;
369 o[INDEX3(i,0,4,numComp,3)] = V2;
370 o[INDEX3(i,1,4,numComp,3)] = V6;
371 o[INDEX3(i,2,4,numComp,3)] = V8;
372 o[INDEX3(i,0,5,numComp,3)] = V2;
373 o[INDEX3(i,1,5,numComp,3)] = V7;
374 o[INDEX3(i,2,5,numComp,3)] = V9;
375 o[INDEX3(i,0,6,numComp,3)] = V3;
376 o[INDEX3(i,1,6,numComp,3)] = V6;
377 o[INDEX3(i,2,6,numComp,3)] = V10;
378 o[INDEX3(i,0,7,numComp,3)] = V3;
379 o[INDEX3(i,1,7,numComp,3)] = V7;
380 o[INDEX3(i,2,7,numComp,3)] = V11;
381 } /* end of component loop i */
382 } /* end of k0 loop */
383 } /* end of k1 loop */
384 } /* end of k2 loop */
385 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
386 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
387 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
388 #pragma omp parallel for
389 for (index_t k2=0; k2 < m_NE2; ++k2) {
390 for (index_t k1=0; k1 < m_NE1; ++k1) {
391 for (index_t k0=0; k0 < m_NE0; ++k0) {
392 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
393 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
394 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
395 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
396 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
397 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
398 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
399 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
400 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
401 for (index_t i=0; i < numComp; ++i) {
402 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
403 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
404 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
405 } /* end of component loop i */
406 } /* end of k0 loop */
407 } /* end of k1 loop */
408 } /* end of k2 loop */
409 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
410 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
411 /*** GENERATOR SNIP_GRAD_FACES TOP */
412 #pragma omp parallel
413 {
414 if (m_faceOffset[0] > -1) {
415 #pragma omp for nowait
416 for (index_t k2=0; k2 < m_NE2; ++k2) {
417 for (index_t k1=0; k1 < m_NE1; ++k1) {
418 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
419 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
420 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
421 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
422 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
423 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
424 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
425 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
426 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
427 for (index_t i=0; i < numComp; ++i) {
428 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
429 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
430 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
431 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
432 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
433 o[INDEX3(i,1,0,numComp,3)] = V0;
434 o[INDEX3(i,2,0,numComp,3)] = V2;
435 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
436 o[INDEX3(i,1,1,numComp,3)] = V0;
437 o[INDEX3(i,2,1,numComp,3)] = V3;
438 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
439 o[INDEX3(i,1,2,numComp,3)] = V1;
440 o[INDEX3(i,2,2,numComp,3)] = V2;
441 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
442 o[INDEX3(i,1,3,numComp,3)] = V1;
443 o[INDEX3(i,2,3,numComp,3)] = V3;
444 } /* end of component loop i */
445 } /* end of k1 loop */
446 } /* end of k2 loop */
447 } /* end of face 0 */
448 if (m_faceOffset[1] > -1) {
449 #pragma omp for nowait
450 for (index_t k2=0; k2 < m_NE2; ++k2) {
451 for (index_t k1=0; k1 < m_NE1; ++k1) {
452 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
453 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
454 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
455 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
456 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
457 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
458 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
459 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
460 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
461 for (index_t i=0; i < numComp; ++i) {
462 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
463 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
464 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
465 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
466 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
467 o[INDEX3(i,1,0,numComp,3)] = V0;
468 o[INDEX3(i,2,0,numComp,3)] = V2;
469 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
470 o[INDEX3(i,1,1,numComp,3)] = V0;
471 o[INDEX3(i,2,1,numComp,3)] = V3;
472 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
473 o[INDEX3(i,1,2,numComp,3)] = V1;
474 o[INDEX3(i,2,2,numComp,3)] = V2;
475 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
476 o[INDEX3(i,1,3,numComp,3)] = V1;
477 o[INDEX3(i,2,3,numComp,3)] = V3;
478 } /* end of component loop i */
479 } /* end of k1 loop */
480 } /* end of k2 loop */
481 } /* end of face 1 */
482 if (m_faceOffset[2] > -1) {
483 #pragma omp for nowait
484 for (index_t k2=0; k2 < m_NE2; ++k2) {
485 for (index_t k0=0; k0 < m_NE0; ++k0) {
486 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
487 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
488 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
489 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
490 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
491 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
492 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
493 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
494 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
495 for (index_t i=0; i < numComp; ++i) {
496 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
497 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
498 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
499 o[INDEX3(i,0,0,numComp,3)] = V0;
500 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
501 o[INDEX3(i,2,0,numComp,3)] = V1;
502 o[INDEX3(i,0,1,numComp,3)] = V0;
503 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
504 o[INDEX3(i,2,1,numComp,3)] = V2;
505 o[INDEX3(i,0,2,numComp,3)] = V0;
506 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
507 o[INDEX3(i,2,2,numComp,3)] = V1;
508 o[INDEX3(i,0,3,numComp,3)] = V0;
509 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
510 o[INDEX3(i,2,3,numComp,3)] = V2;
511 } /* end of component loop i */
512 } /* end of k0 loop */
513 } /* end of k2 loop */
514 } /* end of face 2 */
515 if (m_faceOffset[3] > -1) {
516 #pragma omp for nowait
517 for (index_t k2=0; k2 < m_NE2; ++k2) {
518 for (index_t k0=0; k0 < m_NE0; ++k0) {
519 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
520 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
521 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
522 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
523 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
524 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
525 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
526 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
527 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
528 for (index_t i=0; i < numComp; ++i) {
529 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
530 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
531 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
532 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
533 o[INDEX3(i,0,0,numComp,3)] = V0;
534 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
535 o[INDEX3(i,2,0,numComp,3)] = V2;
536 o[INDEX3(i,0,1,numComp,3)] = V0;
537 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
538 o[INDEX3(i,2,1,numComp,3)] = V3;
539 o[INDEX3(i,0,2,numComp,3)] = V1;
540 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
541 o[INDEX3(i,2,2,numComp,3)] = V2;
542 o[INDEX3(i,0,3,numComp,3)] = V1;
543 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
544 o[INDEX3(i,2,3,numComp,3)] = V3;
545 } /* end of component loop i */
546 } /* end of k0 loop */
547 } /* end of k2 loop */
548 } /* end of face 3 */
549 if (m_faceOffset[4] > -1) {
550 #pragma omp for nowait
551 for (index_t k1=0; k1 < m_NE1; ++k1) {
552 for (index_t k0=0; k0 < m_NE0; ++k0) {
553 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
554 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
555 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
556 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
557 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
558 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
559 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
560 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
561 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
562 for (index_t i=0; i < numComp; ++i) {
563 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
564 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
565 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
566 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
567 o[INDEX3(i,0,0,numComp,3)] = V0;
568 o[INDEX3(i,1,0,numComp,3)] = V2;
569 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
570 o[INDEX3(i,0,1,numComp,3)] = V0;
571 o[INDEX3(i,1,1,numComp,3)] = V3;
572 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
573 o[INDEX3(i,0,2,numComp,3)] = V1;
574 o[INDEX3(i,1,2,numComp,3)] = V2;
575 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
576 o[INDEX3(i,0,3,numComp,3)] = V1;
577 o[INDEX3(i,1,3,numComp,3)] = V3;
578 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
579 } /* end of component loop i */
580 } /* end of k0 loop */
581 } /* end of k1 loop */
582 } /* end of face 4 */
583 if (m_faceOffset[5] > -1) {
584 #pragma omp for nowait
585 for (index_t k1=0; k1 < m_NE1; ++k1) {
586 for (index_t k0=0; k0 < m_NE0; ++k0) {
587 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
588 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
589 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
590 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
591 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
592 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
593 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
594 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
595 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
596 for (index_t i=0; i < numComp; ++i) {
597 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
598 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
599 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
600 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
601 o[INDEX3(i,0,0,numComp,3)] = V0;
602 o[INDEX3(i,1,0,numComp,3)] = V2;
603 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
604 o[INDEX3(i,0,1,numComp,3)] = V0;
605 o[INDEX3(i,1,1,numComp,3)] = V3;
606 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
607 o[INDEX3(i,0,2,numComp,3)] = V1;
608 o[INDEX3(i,1,2,numComp,3)] = V2;
609 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
610 o[INDEX3(i,0,3,numComp,3)] = V1;
611 o[INDEX3(i,1,3,numComp,3)] = V3;
612 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
613 } /* end of component loop i */
614 } /* end of k0 loop */
615 } /* end of k1 loop */
616 } /* end of face 5 */
617 } // end of parallel section
618 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
619 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
620 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
621 #pragma omp parallel
622 {
623 if (m_faceOffset[0] > -1) {
624 #pragma omp for nowait
625 for (index_t k2=0; k2 < m_NE2; ++k2) {
626 for (index_t k1=0; k1 < m_NE1; ++k1) {
627 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
628 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
629 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
630 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
631 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
632 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
633 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
634 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
635 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
636 for (index_t i=0; i < numComp; ++i) {
637 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
638 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
639 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
640 } /* end of component loop i */
641 } /* end of k1 loop */
642 } /* end of k2 loop */
643 } /* end of face 0 */
644 if (m_faceOffset[1] > -1) {
645 #pragma omp for nowait
646 for (index_t k2=0; k2 < m_NE2; ++k2) {
647 for (index_t k1=0; k1 < m_NE1; ++k1) {
648 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
649 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
650 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
651 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
652 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
653 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
654 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
655 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
656 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
657 for (index_t i=0; i < numComp; ++i) {
658 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
659 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
660 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
661 } /* end of component loop i */
662 } /* end of k1 loop */
663 } /* end of k2 loop */
664 } /* end of face 1 */
665 if (m_faceOffset[2] > -1) {
666 #pragma omp for nowait
667 for (index_t k2=0; k2 < m_NE2; ++k2) {
668 for (index_t k0=0; k0 < m_NE0; ++k0) {
669 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
670 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
671 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
672 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
673 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
674 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
675 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
676 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
677 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
678 for (index_t i=0; i < numComp; ++i) {
679 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
680 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
681 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
682 } /* end of component loop i */
683 } /* end of k0 loop */
684 } /* end of k2 loop */
685 } /* end of face 2 */
686 if (m_faceOffset[3] > -1) {
687 #pragma omp for nowait
688 for (index_t k2=0; k2 < m_NE2; ++k2) {
689 for (index_t k0=0; k0 < m_NE0; ++k0) {
690 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
691 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
692 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
693 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
694 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
695 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
696 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
697 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
698 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
699 for (index_t i=0; i < numComp; ++i) {
700 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
701 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
702 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
703 } /* end of component loop i */
704 } /* end of k0 loop */
705 } /* end of k2 loop */
706 } /* end of face 3 */
707 if (m_faceOffset[4] > -1) {
708 #pragma omp for nowait
709 for (index_t k1=0; k1 < m_NE1; ++k1) {
710 for (index_t k0=0; k0 < m_NE0; ++k0) {
711 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
712 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
713 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
714 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
715 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
716 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
717 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
718 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
719 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
720 for (index_t i=0; i < numComp; ++i) {
721 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
722 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
723 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
724 } /* end of component loop i */
725 } /* end of k0 loop */
726 } /* end of k1 loop */
727 } /* end of face 4 */
728 if (m_faceOffset[5] > -1) {
729 #pragma omp for nowait
730 for (index_t k1=0; k1 < m_NE1; ++k1) {
731 for (index_t k0=0; k0 < m_NE0; ++k0) {
732 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
733 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
734 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
735 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
736 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
737 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
738 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
739 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
740 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
741 for (index_t i=0; i < numComp; ++i) {
742 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
743 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
744 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
745 } /* end of component loop i */
746 } /* end of k0 loop */
747 } /* end of k1 loop */
748 } /* end of face 5 */
749 } // end of parallel section
750 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
751 } else {
752 stringstream msg;
753 msg << "setToGradient() not implemented for "
754 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
755 throw RipleyException(msg.str());
756 }
757 }
758
759 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
760 {
761 escript::Data& in = *const_cast<escript::Data*>(&arg);
762 const dim_t numComp = in.getDataPointSize();
763 const double h0 = m_l0/m_gNE0;
764 const double h1 = m_l1/m_gNE1;
765 const double h2 = m_l2/m_gNE2;
766 if (arg.getFunctionSpace().getTypeCode() == Elements) {
767 const double w_0 = h0*h1*h2/8.;
768 #pragma omp parallel
769 {
770 vector<double> int_local(numComp, 0);
771 #pragma omp for nowait
772 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
773 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
774 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
775 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
776 for (index_t i=0; i < numComp; ++i) {
777 const register double f_0 = f[INDEX2(i,0,numComp)];
778 const register double f_1 = f[INDEX2(i,1,numComp)];
779 const register double f_2 = f[INDEX2(i,2,numComp)];
780 const register double f_3 = f[INDEX2(i,3,numComp)];
781 const register double f_4 = f[INDEX2(i,4,numComp)];
782 const register double f_5 = f[INDEX2(i,5,numComp)];
783 const register double f_6 = f[INDEX2(i,6,numComp)];
784 const register double f_7 = f[INDEX2(i,7,numComp)];
785 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
786 } /* end of component loop i */
787 } /* end of k0 loop */
788 } /* end of k1 loop */
789 } /* end of k2 loop */
790
791 #pragma omp critical
792 for (index_t i=0; i<numComp; i++)
793 integrals[i]+=int_local[i];
794 } // end of parallel section
795 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
796 const double w_0 = h0*h1*h2;
797 #pragma omp parallel
798 {
799 vector<double> int_local(numComp, 0);
800 #pragma omp for nowait
801 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
802 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
803 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
804 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
805 for (index_t i=0; i < numComp; ++i) {
806 int_local[i]+=f[i]*w_0;
807 } /* end of component loop i */
808 } /* end of k0 loop */
809 } /* end of k1 loop */
810 } /* end of k2 loop */
811
812 #pragma omp critical
813 for (index_t i=0; i<numComp; i++)
814 integrals[i]+=int_local[i];
815 } // end of parallel section
816 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
817 const double w_0 = h1*h2/4.;
818 const double w_1 = h0*h2/4.;
819 const double w_2 = h0*h1/4.;
820 #pragma omp parallel
821 {
822 vector<double> int_local(numComp, 0);
823 if (m_faceOffset[0] > -1) {
824 #pragma omp for nowait
825 for (index_t k2=0; k2 < m_NE2; ++k2) {
826 for (index_t k1=0; k1 < m_NE1; ++k1) {
827 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
828 for (index_t i=0; i < numComp; ++i) {
829 const register double f_0 = f[INDEX2(i,0,numComp)];
830 const register double f_1 = f[INDEX2(i,1,numComp)];
831 const register double f_2 = f[INDEX2(i,2,numComp)];
832 const register double f_3 = f[INDEX2(i,3,numComp)];
833 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
834 } /* end of component loop i */
835 } /* end of k1 loop */
836 } /* end of k2 loop */
837 }
838
839 if (m_faceOffset[1] > -1) {
840 #pragma omp for nowait
841 for (index_t k2=0; k2 < m_NE2; ++k2) {
842 for (index_t k1=0; k1 < m_NE1; ++k1) {
843 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
844 for (index_t i=0; i < numComp; ++i) {
845 const register double f_0 = f[INDEX2(i,0,numComp)];
846 const register double f_1 = f[INDEX2(i,1,numComp)];
847 const register double f_2 = f[INDEX2(i,2,numComp)];
848 const register double f_3 = f[INDEX2(i,3,numComp)];
849 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
850 } /* end of component loop i */
851 } /* end of k1 loop */
852 } /* end of k2 loop */
853 }
854
855 if (m_faceOffset[2] > -1) {
856 #pragma omp for nowait
857 for (index_t k2=0; k2 < m_NE2; ++k2) {
858 for (index_t k0=0; k0 < m_NE0; ++k0) {
859 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
860 for (index_t i=0; i < numComp; ++i) {
861 const register double f_0 = f[INDEX2(i,0,numComp)];
862 const register double f_1 = f[INDEX2(i,1,numComp)];
863 const register double f_2 = f[INDEX2(i,2,numComp)];
864 const register double f_3 = f[INDEX2(i,3,numComp)];
865 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
866 } /* end of component loop i */
867 } /* end of k1 loop */
868 } /* end of k2 loop */
869 }
870
871 if (m_faceOffset[3] > -1) {
872 #pragma omp for nowait
873 for (index_t k2=0; k2 < m_NE2; ++k2) {
874 for (index_t k0=0; k0 < m_NE0; ++k0) {
875 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
876 for (index_t i=0; i < numComp; ++i) {
877 const register double f_0 = f[INDEX2(i,0,numComp)];
878 const register double f_1 = f[INDEX2(i,1,numComp)];
879 const register double f_2 = f[INDEX2(i,2,numComp)];
880 const register double f_3 = f[INDEX2(i,3,numComp)];
881 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
882 } /* end of component loop i */
883 } /* end of k1 loop */
884 } /* end of k2 loop */
885 }
886
887 if (m_faceOffset[4] > -1) {
888 #pragma omp for nowait
889 for (index_t k1=0; k1 < m_NE1; ++k1) {
890 for (index_t k0=0; k0 < m_NE0; ++k0) {
891 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
892 for (index_t i=0; i < numComp; ++i) {
893 const register double f_0 = f[INDEX2(i,0,numComp)];
894 const register double f_1 = f[INDEX2(i,1,numComp)];
895 const register double f_2 = f[INDEX2(i,2,numComp)];
896 const register double f_3 = f[INDEX2(i,3,numComp)];
897 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
898 } /* end of component loop i */
899 } /* end of k1 loop */
900 } /* end of k2 loop */
901 }
902
903 if (m_faceOffset[5] > -1) {
904 #pragma omp for nowait
905 for (index_t k1=0; k1 < m_NE1; ++k1) {
906 for (index_t k0=0; k0 < m_NE0; ++k0) {
907 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
908 for (index_t i=0; i < numComp; ++i) {
909 const register double f_0 = f[INDEX2(i,0,numComp)];
910 const register double f_1 = f[INDEX2(i,1,numComp)];
911 const register double f_2 = f[INDEX2(i,2,numComp)];
912 const register double f_3 = f[INDEX2(i,3,numComp)];
913 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
914 } /* end of component loop i */
915 } /* end of k1 loop */
916 } /* end of k2 loop */
917 }
918
919 #pragma omp critical
920 for (index_t i=0; i<numComp; i++)
921 integrals[i]+=int_local[i];
922 } // end of parallel section
923
924 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
925 const double w_0 = h1*h2;
926 const double w_1 = h0*h2;
927 const double w_2 = h0*h1;
928 #pragma omp parallel
929 {
930 vector<double> int_local(numComp, 0);
931 if (m_faceOffset[0] > -1) {
932 #pragma omp for nowait
933 for (index_t k2=0; k2 < m_NE2; ++k2) {
934 for (index_t k1=0; k1 < m_NE1; ++k1) {
935 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
936 for (index_t i=0; i < numComp; ++i) {
937 int_local[i]+=f[i]*w_0;
938 } /* end of component loop i */
939 } /* end of k1 loop */
940 } /* end of k2 loop */
941 }
942
943 if (m_faceOffset[1] > -1) {
944 #pragma omp for nowait
945 for (index_t k2=0; k2 < m_NE2; ++k2) {
946 for (index_t k1=0; k1 < m_NE1; ++k1) {
947 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
948 for (index_t i=0; i < numComp; ++i) {
949 int_local[i]+=f[i]*w_0;
950 } /* end of component loop i */
951 } /* end of k1 loop */
952 } /* end of k2 loop */
953 }
954
955 if (m_faceOffset[2] > -1) {
956 #pragma omp for nowait
957 for (index_t k2=0; k2 < m_NE2; ++k2) {
958 for (index_t k0=0; k0 < m_NE0; ++k0) {
959 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
960 for (index_t i=0; i < numComp; ++i) {
961 int_local[i]+=f[i]*w_1;
962 } /* end of component loop i */
963 } /* end of k1 loop */
964 } /* end of k2 loop */
965 }
966
967 if (m_faceOffset[3] > -1) {
968 #pragma omp for nowait
969 for (index_t k2=0; k2 < m_NE2; ++k2) {
970 for (index_t k0=0; k0 < m_NE0; ++k0) {
971 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
972 for (index_t i=0; i < numComp; ++i) {
973 int_local[i]+=f[i]*w_1;
974 } /* end of component loop i */
975 } /* end of k1 loop */
976 } /* end of k2 loop */
977 }
978
979 if (m_faceOffset[4] > -1) {
980 #pragma omp for nowait
981 for (index_t k1=0; k1 < m_NE1; ++k1) {
982 for (index_t k0=0; k0 < m_NE0; ++k0) {
983 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
984 for (index_t i=0; i < numComp; ++i) {
985 int_local[i]+=f[i]*w_2;
986 } /* end of component loop i */
987 } /* end of k1 loop */
988 } /* end of k2 loop */
989 }
990
991 if (m_faceOffset[5] > -1) {
992 #pragma omp for nowait
993 for (index_t k1=0; k1 < m_NE1; ++k1) {
994 for (index_t k0=0; k0 < m_NE0; ++k0) {
995 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
996 for (index_t i=0; i < numComp; ++i) {
997 int_local[i]+=f[i]*w_2;
998 } /* end of component loop i */
999 } /* end of k1 loop */
1000 } /* end of k2 loop */
1001 }
1002
1003 #pragma omp critical
1004 for (index_t i=0; i<numComp; i++)
1005 integrals[i]+=int_local[i];
1006 } // end of parallel section
1007
1008 } else {
1009 stringstream msg;
1010 msg << "setToIntegrals() not implemented for "
1011 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
1012 throw RipleyException(msg.str());
1013 }
1014 }
1015
1016 void Brick::setToNormal(escript::Data& out) const
1017 {
1018 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1019 #pragma omp parallel
1020 {
1021 if (m_faceOffset[0] > -1) {
1022 #pragma omp for nowait
1023 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1024 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1025 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1026 // set vector at four quadrature points
1027 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1028 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1029 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1030 *o++ = -1.; *o++ = 0.; *o = 0.;
1031 }
1032 }
1033 }
1034
1035 if (m_faceOffset[1] > -1) {
1036 #pragma omp for nowait
1037 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1038 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1039 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1040 // set vector at four quadrature points
1041 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1042 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1043 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1044 *o++ = 1.; *o++ = 0.; *o = 0.;
1045 }
1046 }
1047 }
1048
1049 if (m_faceOffset[2] > -1) {
1050 #pragma omp for nowait
1051 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1052 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1053 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1054 // set vector at four quadrature points
1055 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1056 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1057 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1058 *o++ = 0.; *o++ = -1.; *o = 0.;
1059 }
1060 }
1061 }
1062
1063 if (m_faceOffset[3] > -1) {
1064 #pragma omp for nowait
1065 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1066 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1067 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1068 // set vector at four quadrature points
1069 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1070 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1071 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1072 *o++ = 0.; *o++ = 1.; *o = 0.;
1073 }
1074 }
1075 }
1076
1077 if (m_faceOffset[4] > -1) {
1078 #pragma omp for nowait
1079 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1080 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1081 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1082 // set vector at four quadrature points
1083 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1084 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1085 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1086 *o++ = 0.; *o++ = 0.; *o = -1.;
1087 }
1088 }
1089 }
1090
1091 if (m_faceOffset[5] > -1) {
1092 #pragma omp for nowait
1093 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1094 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1095 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1096 // set vector at four quadrature points
1097 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1098 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1099 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1100 *o++ = 0.; *o++ = 0.; *o = 1.;
1101 }
1102 }
1103 }
1104 } // end of parallel section
1105 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1106 #pragma omp parallel
1107 {
1108 if (m_faceOffset[0] > -1) {
1109 #pragma omp for nowait
1110 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1111 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1112 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1113 *o++ = -1.;
1114 *o++ = 0.;
1115 *o = 0.;
1116 }
1117 }
1118 }
1119
1120 if (m_faceOffset[1] > -1) {
1121 #pragma omp for nowait
1122 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1123 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1124 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1125 *o++ = 1.;
1126 *o++ = 0.;
1127 *o = 0.;
1128 }
1129 }
1130 }
1131
1132 if (m_faceOffset[2] > -1) {
1133 #pragma omp for nowait
1134 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1135 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1136 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1137 *o++ = 0.;
1138 *o++ = -1.;
1139 *o = 0.;
1140 }
1141 }
1142 }
1143
1144 if (m_faceOffset[3] > -1) {
1145 #pragma omp for nowait
1146 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1147 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1148 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1149 *o++ = 0.;
1150 *o++ = 1.;
1151 *o = 0.;
1152 }
1153 }
1154 }
1155
1156 if (m_faceOffset[4] > -1) {
1157 #pragma omp for nowait
1158 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1159 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1160 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1161 *o++ = 0.;
1162 *o++ = 0.;
1163 *o = -1.;
1164 }
1165 }
1166 }
1167
1168 if (m_faceOffset[5] > -1) {
1169 #pragma omp for nowait
1170 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1171 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1172 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1173 *o++ = 0.;
1174 *o++ = 0.;
1175 *o = 1.;
1176 }
1177 }
1178 }
1179 } // end of parallel section
1180
1181 } else {
1182 stringstream msg;
1183 msg << "setToNormal() not implemented for "
1184 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1185 throw RipleyException(msg.str());
1186 }
1187 }
1188
1189 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1190 bool reducedColOrder) const
1191 {
1192 if (reducedRowOrder || reducedColOrder)
1193 throw RipleyException("getPattern() not implemented for reduced order");
1194
1195 return m_pattern;
1196 }
1197
1198 void Brick::Print_Mesh_Info(const bool full) const
1199 {
1200 RipleyDomain::Print_Mesh_Info(full);
1201 if (full) {
1202 cout << " Id Coordinates" << endl;
1203 cout.precision(15);
1204 cout.setf(ios::scientific, ios::floatfield);
1205 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1206 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1207 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1208 for (index_t i=0; i < getNumNodes(); i++) {
1209 cout << " " << setw(5) << m_nodeId[i]
1210 << " " << xdx.first+(i%m_N0)*xdx.second
1211 << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1212 << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1213 }
1214 }
1215 }
1216
1217 IndexVector Brick::getNumNodesPerDim() const
1218 {
1219 IndexVector ret;
1220 ret.push_back(m_N0);
1221 ret.push_back(m_N1);
1222 ret.push_back(m_N2);
1223 return ret;
1224 }
1225
1226 IndexVector Brick::getNumElementsPerDim() const
1227 {
1228 IndexVector ret;
1229 ret.push_back(m_NE0);
1230 ret.push_back(m_NE1);
1231 ret.push_back(m_NE2);
1232 return ret;
1233 }
1234
1235 IndexVector Brick::getNumFacesPerBoundary() const
1236 {
1237 IndexVector ret(6, 0);
1238 //left
1239 if (m_offset0==0)
1240 ret[0]=m_NE1*m_NE2;
1241 //right
1242 if (m_mpiInfo->rank%m_NX==m_NX-1)
1243 ret[1]=m_NE1*m_NE2;
1244 //bottom
1245 if (m_offset1==0)
1246 ret[2]=m_NE0*m_NE2;
1247 //top
1248 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1249 ret[3]=m_NE0*m_NE2;
1250 //front
1251 if (m_offset2==0)
1252 ret[4]=m_NE0*m_NE1;
1253 //back
1254 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1255 ret[5]=m_NE0*m_NE1;
1256 return ret;
1257 }
1258
1259 pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1260 {
1261 if (dim==0)
1262 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1263 else if (dim==1)
1264 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1265 else if (dim==2)
1266 return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1267
1268 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1269 }
1270
1271 //protected
1272 dim_t Brick::getNumDOF() const
1273 {
1274 return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY*(m_gNE2+1)/m_NZ;
1275 }
1276
1277 //protected
1278 dim_t Brick::getNumFaceElements() const
1279 {
1280 const IndexVector faces = getNumFacesPerBoundary();
1281 dim_t n=0;
1282 for (size_t i=0; i<faces.size(); i++)
1283 n+=faces[i];
1284 return n;
1285 }
1286
1287 //protected
1288 void Brick::assembleCoordinates(escript::Data& arg) const
1289 {
1290 escriptDataC x = arg.getDataC();
1291 int numDim = m_numDim;
1292 if (!isDataPointShapeEqual(&x, 1, &numDim))
1293 throw RipleyException("setToX: Invalid Data object shape");
1294 if (!numSamplesEqual(&x, 1, getNumNodes()))
1295 throw RipleyException("setToX: Illegal number of samples in Data object");
1296
1297 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1298 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1299 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1300 arg.requireWrite();
1301 #pragma omp parallel for
1302 for (dim_t i2 = 0; i2 < m_N2; i2++) {
1303 for (dim_t i1 = 0; i1 < m_N1; i1++) {
1304 for (dim_t i0 = 0; i0 < m_N0; i0++) {
1305 double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1306 point[0] = xdx.first+i0*xdx.second;
1307 point[1] = ydy.first+i1*ydy.second;
1308 point[2] = zdz.first+i2*zdz.second;
1309 }
1310 }
1311 }
1312 }
1313
1314 //protected
1315 dim_t Brick::insertNeighbourNodes(IndexVector& index, index_t node) const
1316 {
1317 const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1318 const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1319 const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1320 const int x=node%nDOF0;
1321 const int y=node%(nDOF0*nDOF1)/nDOF0;
1322 const int z=node/(nDOF0*nDOF1);
1323 int num=0;
1324 // loop through potential neighbours and add to index if positions are
1325 // within bounds
1326 for (int i2=-1; i2<2; i2++) {
1327 for (int i1=-1; i1<2; i1++) {
1328 for (int i0=-1; i0<2; i0++) {
1329 // skip node itself
1330 if (i0==0 && i1==0 && i2==0)
1331 continue;
1332 // location of neighbour node
1333 const int nx=x+i0;
1334 const int ny=y+i1;
1335 const int nz=z+i2;
1336 if (nx>=0 && ny>=0 && nz>=0
1337 && nx<nDOF0 && ny<nDOF1 && nz<nDOF2) {
1338 index.push_back(nz*nDOF0*nDOF1+ny*nDOF0+nx);
1339 num++;
1340 }
1341 }
1342 }
1343 }
1344
1345 return num;
1346 }
1347
1348 //protected
1349 void Brick::nodesToDOF(escript::Data& out, escript::Data& in) const
1350 {
1351 const dim_t numComp = in.getDataPointSize();
1352 out.requireWrite();
1353
1354 const index_t left = (m_offset0==0 ? 0 : 1);
1355 const index_t bottom = (m_offset1==0 ? 0 : 1);
1356 const index_t front = (m_offset2==0 ? 0 : 1);
1357 const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1358 const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1359 const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1360 #pragma omp parallel for
1361 for (index_t i=0; i<nDOF2; i++) {
1362 for (index_t j=0; j<nDOF1; j++) {
1363 for (index_t k=0; k<nDOF0; k++) {
1364 const index_t n=k+left+(j+bottom)*m_N0+(i+front)*m_N0*m_N1;
1365 const double* src=in.getSampleDataRO(n);
1366 copy(src, src+numComp, out.getSampleDataRW(k+j*nDOF0+i*nDOF0*nDOF1));
1367 }
1368 }
1369 }
1370 }
1371
1372 //protected
1373 void Brick::dofToNodes(escript::Data& out, escript::Data& in) const
1374 {
1375 const dim_t numComp = in.getDataPointSize();
1376 Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp);
1377 in.requireWrite();
1378 Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0));
1379
1380 const dim_t numDOF = getNumDOF();
1381 out.requireWrite();
1382 const double* buffer = Paso_Coupler_finishCollect(coupler);
1383
1384 #pragma omp parallel for
1385 for (index_t i=0; i<getNumNodes(); i++) {
1386 const double* src=(m_dofMap[i]<numDOF ?
1387 in.getSampleDataRO(m_dofMap[i])
1388 : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1389 copy(src, src+numComp, out.getSampleDataRW(i));
1390 }
1391 }
1392
1393 //private
1394 void Brick::populateSampleIds()
1395 {
1396 // identifiers are ordered from left to right, bottom to top, front to back
1397 // globally
1398
1399 // build node distribution vector first.
1400 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1401 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1402 const dim_t numDOF=getNumDOF();
1403 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1404 m_nodeDistribution[k]=k*numDOF;
1405 }
1406 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1407 m_nodeId.resize(getNumNodes());
1408 m_dofId.resize(numDOF);
1409 m_elementId.resize(getNumElements());
1410 m_faceId.resize(getNumFaceElements());
1411
1412 #pragma omp parallel
1413 {
1414 #pragma omp for nowait
1415 // nodes
1416 for (dim_t i2=0; i2<m_N2; i2++) {
1417 for (dim_t i1=0; i1<m_N1; i1++) {
1418 for (dim_t i0=0; i0<m_N0; i0++) {
1419 m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] =
1420 (m_offset2+i2)*(m_gNE0+1)*(m_gNE1+1)
1421 +(m_offset1+i1)*(m_gNE0+1)
1422 +m_offset0+i0;
1423 }
1424 }
1425 }
1426
1427 // degrees of freedom
1428 #pragma omp for nowait
1429 for (dim_t k=0; k<numDOF; k++)
1430 m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k;
1431
1432 // elements
1433 #pragma omp for nowait
1434 for (dim_t i2=0; i2<m_NE2; i2++) {
1435 for (dim_t i1=0; i1<m_NE1; i1++) {
1436 for (dim_t i0=0; i0<m_NE0; i0++) {
1437 m_elementId[i0+i1*m_NE0+i2*m_NE0*m_NE1] =
1438 (m_offset2+i2)*m_gNE0*m_gNE1
1439 +(m_offset1+i1)*m_gNE0
1440 +m_offset0+i0;
1441 }
1442 }
1443 }
1444
1445 // face elements
1446 #pragma omp for
1447 for (dim_t k=0; k<getNumFaceElements(); k++)
1448 m_faceId[k]=k;
1449 } // end parallel section
1450
1451 m_nodeTags.assign(getNumNodes(), 0);
1452 updateTagsInUse(Nodes);
1453
1454 m_elementTags.assign(getNumElements(), 0);
1455 updateTagsInUse(Elements);
1456
1457 // generate face offset vector and set face tags
1458 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1459 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1460 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1461 m_faceOffset.assign(facesPerEdge.size(), -1);
1462 m_faceTags.clear();
1463 index_t offset=0;
1464 for (size_t i=0; i<facesPerEdge.size(); i++) {
1465 if (facesPerEdge[i]>0) {
1466 m_faceOffset[i]=offset;
1467 offset+=facesPerEdge[i];
1468 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1469 }
1470 }
1471 setTagMap("left", LEFT);
1472 setTagMap("right", RIGHT);
1473 setTagMap("bottom", BOTTOM);
1474 setTagMap("top", TOP);
1475 setTagMap("front", FRONT);
1476 setTagMap("back", BACK);
1477 updateTagsInUse(FaceElements);
1478 }
1479
1480 //private
1481 void Brick::createPattern()
1482 {
1483 const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1484 const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1485 const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1486 const index_t left = (m_offset0==0 ? 0 : 1);
1487 const index_t bottom = (m_offset1==0 ? 0 : 1);
1488 const index_t front = (m_offset2==0 ? 0 : 1);
1489
1490 // populate node->DOF mapping with own degrees of freedom.
1491 // The rest is assigned in the loop further down
1492 m_dofMap.assign(getNumNodes(), 0);
1493 #pragma omp parallel for
1494 for (index_t i=front; i<m_N2; i++) {
1495 for (index_t j=bottom; j<m_N1; j++) {
1496 for (index_t k=left; k<m_N0; k++) {
1497 m_dofMap[i*m_N0*m_N1+j*m_N0+k]=(i-front)*nDOF0*nDOF1+(j-bottom)*nDOF0+k-left;
1498 }
1499 }
1500 }
1501
1502 // build list of shared components and neighbours by looping through
1503 // all potential neighbouring ranks and checking if positions are
1504 // within bounds
1505 const dim_t numDOF=nDOF0*nDOF1*nDOF2;
1506 vector<IndexVector> colIndices(numDOF); // for the couple blocks
1507 RankVector neighbour;
1508 IndexVector offsetInShared(1,0);
1509 IndexVector sendShared, recvShared;
1510 int numShared=0;
1511 const int x=m_mpiInfo->rank%m_NX;
1512 const int y=m_mpiInfo->rank%(m_NX*m_NY)/m_NX;
1513 const int z=m_mpiInfo->rank/(m_NX*m_NY);
1514 for (int i2=-1; i2<2; i2++) {
1515 for (int i1=-1; i1<2; i1++) {
1516 for (int i0=-1; i0<2; i0++) {
1517 // skip this rank
1518 if (i0==0 && i1==0 && i2==0)
1519 continue;
1520 // location of neighbour rank
1521 const int nx=x+i0;
1522 const int ny=y+i1;
1523 const int nz=z+i2;
1524 if (nx>=0 && ny>=0 && nz>=0 && nx<m_NX && ny<m_NY && nz<m_NZ) {
1525 neighbour.push_back(nz*m_NX*m_NY+ny*m_NX+nx);
1526 if (i0==0 && i1==0) {
1527 // sharing front or back plane
1528 offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF1);
1529 for (dim_t i=0; i<nDOF1; i++) {
1530 const int firstDOF=(i2==-1 ? i*nDOF0
1531 : i*nDOF0 + nDOF0*nDOF1*(nDOF2-1));
1532 const int firstNode=(i2==-1 ? left+(i+bottom)*m_N0
1533 : left+(i+bottom)*m_N0+m_N0*m_N1*(m_N2-1));
1534 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1535 sendShared.push_back(firstDOF+j);
1536 recvShared.push_back(numDOF+numShared);
1537 if (j>0) {
1538 if (i>0)
1539 colIndices[firstDOF+j-1-nDOF0].push_back(numShared);
1540 colIndices[firstDOF+j-1].push_back(numShared);
1541 if (i<nDOF1-1)
1542 colIndices[firstDOF+j-1+nDOF0].push_back(numShared);
1543 }
1544 if (i>0)
1545 colIndices[firstDOF+j-nDOF0].push_back(numShared);
1546 colIndices[firstDOF+j].push_back(numShared);
1547 if (i<nDOF1-1)
1548 colIndices[firstDOF+j+nDOF0].push_back(numShared);
1549 if (j<nDOF0-1) {
1550 if (i>0)
1551 colIndices[firstDOF+j+1-nDOF0].push_back(numShared);
1552 colIndices[firstDOF+j+1].push_back(numShared);
1553 if (i<nDOF1-1)
1554 colIndices[firstDOF+j+1+nDOF0].push_back(numShared);
1555 }
1556 m_dofMap[firstNode+j]=numDOF+numShared;
1557 }
1558 }
1559 } else if (i0==0 && i2==0) {
1560 // sharing top or bottom plane
1561 offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF2);
1562 for (dim_t i=0; i<nDOF2; i++) {
1563 const int firstDOF=(i1==-1 ? i*nDOF0*nDOF1
1564 : nDOF0*((i+1)*nDOF1-1));
1565 const int firstNode=(i1==-1 ?
1566 left+(i+front)*m_N0*m_N1
1567 : left+m_N0*((i+1+front)*m_N1-1));
1568 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1569 sendShared.push_back(firstDOF+j);
1570 recvShared.push_back(numDOF+numShared);
1571 if (j>0) {
1572 if (i>0)
1573 colIndices[firstDOF+j-1-nDOF0*nDOF1].push_back(numShared);
1574 colIndices[firstDOF+j-1].push_back(numShared);
1575 if (i<nDOF2-1)
1576 colIndices[firstDOF+j-1+nDOF0*nDOF1].push_back(numShared);
1577 }
1578 if (i>0)
1579 colIndices[firstDOF+j-nDOF0*nDOF1].push_back(numShared);
1580 colIndices[firstDOF+j].push_back(numShared);
1581 if (i<nDOF2-1)
1582 colIndices[firstDOF+j+nDOF0*nDOF1].push_back(numShared);
1583 if (j<nDOF0-1) {
1584 if (i>0)
1585 colIndices[firstDOF+j+1-nDOF0*nDOF1].push_back(numShared);
1586 colIndices[firstDOF+j+1].push_back(numShared);
1587 if (i<nDOF2-1)
1588 colIndices[firstDOF+j+1+nDOF0*nDOF1].push_back(numShared);
1589 }
1590 m_dofMap[firstNode+j]=numDOF+numShared;
1591 }
1592 }
1593 } else if (i1==0 && i2==0) {
1594 // sharing left or right plane
1595 offsetInShared.push_back(offsetInShared.back()+nDOF1*nDOF2);
1596 for (dim_t i=0; i<nDOF2; i++) {
1597 const int firstDOF=(i0==-1 ? i*nDOF0*nDOF1
1598 : nDOF0*(1+i*nDOF1)-1);
1599 const int firstNode=(i0==-1 ?
1600 (bottom+(i+front)*m_N1)*m_N0
1601 : (bottom+1+(i+front)*m_N1)*m_N0-1);
1602 for (dim_t j=0; j<nDOF1; j++, numShared++) {
1603 sendShared.push_back(firstDOF+j*nDOF0);
1604 recvShared.push_back(numDOF+numShared);
1605 if (j>0) {
1606 if (i>0)
1607 colIndices[firstDOF+(j-1)*nDOF0-nDOF0*nDOF1].push_back(numShared);
1608 colIndices[firstDOF+(j-1)*nDOF0].push_back(numShared);
1609 if (i<nDOF2-1)
1610 colIndices[firstDOF+(j-1)*nDOF0+nDOF0*nDOF1].push_back(numShared);
1611 }
1612 if (i>0)
1613 colIndices[firstDOF+j*nDOF0-nDOF0*nDOF1].push_back(numShared);
1614 colIndices[firstDOF+j*nDOF0].push_back(numShared);
1615 if (i<nDOF2-1)
1616 colIndices[firstDOF+j*nDOF0+nDOF0*nDOF1].push_back(numShared);
1617 if (j<nDOF1-1) {
1618 if (i>0)
1619 colIndices[firstDOF+(j+1)*nDOF0-nDOF0*nDOF1].push_back(numShared);
1620 colIndices[firstDOF+(j+1)*nDOF0].push_back(numShared);
1621 if (i<nDOF2-1)
1622 colIndices[firstDOF+(j+1)*nDOF0+nDOF0*nDOF1].push_back(numShared);
1623 }
1624 m_dofMap[firstNode+j*m_N0]=numDOF+numShared;
1625 }
1626 }
1627 } else if (i0==0) {
1628 // sharing an edge in x direction
1629 offsetInShared.push_back(offsetInShared.back()+nDOF0);
1630 const int firstDOF=(i1+1)/2*nDOF0*(nDOF1-1)
1631 +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
1632 const int firstNode=(i1+1)/2*m_N0*(m_N1-1)
1633 +(i2+1)/2*m_N0*m_N1*(m_N2-1);
1634 for (dim_t i=0; i<nDOF0; i++, numShared++) {
1635 sendShared.push_back(firstDOF+i);
1636 recvShared.push_back(numDOF+numShared);
1637 if (i>0)
1638 colIndices[firstDOF+i-1].push_back(numShared);
1639 colIndices[firstDOF+i].push_back(numShared);
1640 if (i<nDOF0-1)
1641 colIndices[firstDOF+i+1].push_back(numShared);
1642 m_dofMap[firstNode+i]=numDOF+numShared;
1643 }
1644 } else if (i1==0) {
1645 // sharing an edge in y direction
1646 offsetInShared.push_back(offsetInShared.back()+nDOF1);
1647 const int firstDOF=(i0+1)/2*(nDOF0-1)
1648 +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
1649 const int firstNode=(i0+1)/2*(m_N0-1)
1650 +(i2+1)/2*m_N0*m_N1*(m_N2-1);
1651 for (dim_t i=0; i<nDOF1; i++, numShared++) {
1652 sendShared.push_back(firstDOF+i*nDOF0);
1653 recvShared.push_back(numDOF+numShared);
1654 if (i>0)
1655 colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared);
1656 colIndices[firstDOF+i*nDOF0].push_back(numShared);
1657 if (i<nDOF1-1)
1658 colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared);
1659 m_dofMap[firstNode+i*m_N0]=numDOF+numShared;
1660 }
1661 } else if (i2==0) {
1662 // sharing an edge in z direction
1663 offsetInShared.push_back(offsetInShared.back()+nDOF2);
1664 const int firstDOF=(i0+1)/2*(nDOF0-1)
1665 +(i1+1)/2*nDOF0*(nDOF1-1);
1666 const int firstNode=(i0+1)/2*(m_N0-1)
1667 +(i1+1)/2*m_N0*(m_N1-1);
1668 for (dim_t i=0; i<nDOF2; i++, numShared++) {
1669 sendShared.push_back(firstDOF+i*nDOF0*nDOF1);
1670 recvShared.push_back(numDOF+numShared);
1671 if (i>0)
1672 colIndices[firstDOF+(i-1)*nDOF0*nDOF1].push_back(numShared);
1673 colIndices[firstDOF+i*nDOF0*nDOF1].push_back(numShared);
1674 if (i<nDOF2-1)
1675 colIndices[firstDOF+(i+1)*nDOF0*nDOF1].push_back(numShared);
1676 m_dofMap[firstNode+i*m_N0*m_N1]=numDOF+numShared;
1677 }
1678 } else {
1679 // sharing a node
1680 const int dof=(i0+1)/2*(nDOF0-1)
1681 +(i1+1)/2*nDOF0*(nDOF1-1)
1682 +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
1683 const int node=(i0+1)/2*(m_N0-1)
1684 +(i1+1)/2*m_N0*(m_N1-1)
1685 +(i2+1)/2*m_N0*m_N1*(m_N2-1);
1686 offsetInShared.push_back(offsetInShared.back()+1);
1687 sendShared.push_back(dof);
1688 recvShared.push_back(numDOF+numShared);
1689 colIndices[dof].push_back(numShared);
1690 m_dofMap[node]=numDOF+numShared;
1691 ++numShared;
1692 }
1693 }
1694 }
1695 }
1696 }
1697
1698 // create connector
1699 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
1700 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
1701 &offsetInShared[0], 1, 0, m_mpiInfo);
1702 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
1703 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
1704 &offsetInShared[0], 1, 0, m_mpiInfo);
1705 m_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
1706 Paso_SharedComponents_free(snd_shcomp);
1707 Paso_SharedComponents_free(rcv_shcomp);
1708
1709 // create main and couple blocks
1710 Paso_Pattern *mainPattern = createMainPattern();
1711 Paso_Pattern *colPattern, *rowPattern;
1712 createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern);
1713
1714 // allocate paso distribution
1715 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
1716 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
1717
1718 // finally create the system matrix
1719 m_pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT,
1720 distribution, distribution, mainPattern, colPattern, rowPattern,
1721 m_connector, m_connector);
1722
1723 Paso_Distribution_free(distribution);
1724
1725 // useful debug output
1726 /*
1727 cout << "--- rcv_shcomp ---" << endl;
1728 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
1729 for (size_t i=0; i<neighbour.size(); i++) {
1730 cout << "neighbor[" << i << "]=" << neighbour[i]
1731 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
1732 }
1733 for (size_t i=0; i<recvShared.size(); i++) {
1734 cout << "shared[" << i << "]=" << recvShared[i] << endl;
1735 }
1736 cout << "--- snd_shcomp ---" << endl;
1737 for (size_t i=0; i<sendShared.size(); i++) {
1738 cout << "shared[" << i << "]=" << sendShared[i] << endl;
1739 }
1740 cout << "--- dofMap ---" << endl;
1741 for (size_t i=0; i<m_dofMap.size(); i++) {
1742 cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl;
1743 }
1744 cout << "--- colIndices ---" << endl;
1745 for (size_t i=0; i<colIndices.size(); i++) {
1746 cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl;
1747 }
1748 */
1749
1750 /*
1751 cout << "--- main_pattern ---" << endl;
1752 cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl;
1753 for (size_t i=0; i<mainPattern->numOutput+1; i++) {
1754 cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl;
1755 }
1756 for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) {
1757 cout << "index[" << i << "]=" << mainPattern->index[i] << endl;
1758 }
1759 */
1760
1761 /*
1762 cout << "--- colCouple_pattern ---" << endl;
1763 cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl;
1764 for (size_t i=0; i<colPattern->numOutput+1; i++) {
1765 cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl;
1766 }
1767 for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) {
1768 cout << "index[" << i << "]=" << colPattern->index[i] << endl;
1769 }
1770 */
1771
1772 /*
1773 cout << "--- rowCouple_pattern ---" << endl;
1774 cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl;
1775 for (size_t i=0; i<rowPattern->numOutput+1; i++) {
1776 cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl;
1777 }
1778 for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) {
1779 cout << "index[" << i << "]=" << rowPattern->index[i] << endl;
1780 }
1781 */
1782
1783 Paso_Pattern_free(mainPattern);
1784 Paso_Pattern_free(colPattern);
1785 Paso_Pattern_free(rowPattern);
1786 }
1787
1788 //protected
1789 void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in,
1790 bool reduced) const
1791 {
1792 const dim_t numComp = in.getDataPointSize();
1793 if (reduced) {
1794 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1795 const double c0 = .125;
1796 #pragma omp parallel for
1797 for (index_t k2=0; k2 < m_NE2; ++k2) {
1798 for (index_t k1=0; k1 < m_NE1; ++k1) {
1799 for (index_t k0=0; k0 < m_NE0; ++k0) {
1800 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1801 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1802 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1803 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1804 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1805 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1806 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1807 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1808 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1809 for (index_t i=0; i < numComp; ++i) {
1810 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i] + f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1811 } /* end of component loop i */
1812 } /* end of k0 loop */
1813 } /* end of k1 loop */
1814 } /* end of k2 loop */
1815 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1816 } else {
1817 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1818 const double c0 = .0094373878376559314545;
1819 const double c1 = .035220810900864519624;
1820 const double c2 = .13144585576580214704;
1821 const double c3 = .49056261216234406855;
1822 #pragma omp parallel for
1823 for (index_t k2=0; k2 < m_NE2; ++k2) {
1824 for (index_t k1=0; k1 < m_NE1; ++k1) {
1825 for (index_t k0=0; k0 < m_NE0; ++k0) {
1826 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1827 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1828 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1829 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1830 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1831 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1832 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1833 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1834 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1835 for (index_t i=0; i < numComp; ++i) {
1836 o[INDEX2(i,numComp,0)] = f_000[i]*c3 + f_111[i]*c0 + c2*(f_001[i] + f_010[i] + f_100[i]) + c1*(f_011[i] + f_101[i] + f_110[i]);
1837 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_100[i]*c3 + c2*(f_000[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_111[i]);
1838 o[INDEX2(i,numComp,2)] = f_010[i]*c3 + f_101[i]*c0 + c2*(f_000[i] + f_011[i] + f_110[i]) + c1*(f_001[i] + f_100[i] + f_111[i]);
1839 o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_110[i]*c3 + c2*(f_010[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_101[i]);
1840 o[INDEX2(i,numComp,4)] = f_001[i]*c3 + f_110[i]*c0 + c2*(f_000[i] + f_011[i] + f_101[i]) + c1*(f_010[i] + f_100[i] + f_111[i]);
1841 o[INDEX2(i,numComp,5)] = f_010[i]*c0 + f_101[i]*c3 + c2*(f_001[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_110[i]);
1842 o[INDEX2(i,numComp,6)] = f_011[i]*c3 + f_100[i]*c0 + c2*(f_001[i] + f_010[i] + f_111[i]) + c1*(f_000[i] + f_101[i] + f_110[i]);
1843 o[INDEX2(i,numComp,7)] = f_000[i]*c0 + f_111[i]*c3 + c2*(f_011[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_100[i]);
1844 } /* end of component loop i */
1845 } /* end of k0 loop */
1846 } /* end of k1 loop */
1847 } /* end of k2 loop */
1848 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1849 }
1850 }
1851
1852 //protected
1853 void Brick::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1854 bool reduced) const
1855 {
1856 const dim_t numComp = in.getDataPointSize();
1857 if (reduced) {
1858 const double c0 = .25;
1859 #pragma omp parallel
1860 {
1861 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1862 if (m_faceOffset[0] > -1) {
1863 #pragma omp for nowait
1864 for (index_t k2=0; k2 < m_NE2; ++k2) {
1865 for (index_t k1=0; k1 < m_NE1; ++k1) {
1866 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1867 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1868 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1869 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1870 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1871 for (index_t i=0; i < numComp; ++i) {
1872 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]);
1873 } /* end of component loop i */
1874 } /* end of k1 loop */
1875 } /* end of k2 loop */
1876 } /* end of face 0 */
1877 if (m_faceOffset[1] > -1) {
1878 #pragma omp for nowait
1879 for (index_t k2=0; k2 < m_NE2; ++k2) {
1880 for (index_t k1=0; k1 < m_NE1; ++k1) {
1881 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1882 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1883 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1884 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
1885 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1886 for (index_t i=0; i < numComp; ++i) {
1887 o[INDEX2(i,numComp,0)] = c0*(f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1888 } /* end of component loop i */
1889 } /* end of k1 loop */
1890 } /* end of k2 loop */
1891 } /* end of face 1 */
1892 if (m_faceOffset[2] > -1) {
1893 #pragma omp for nowait
1894 for (index_t k2=0; k2 < m_NE2; ++k2) {
1895 for (index_t k0=0; k0 < m_NE0; ++k0) {
1896 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
1897 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));