/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3759 - (show annotations)
Fri Jan 6 06:54:51 2012 UTC (7 years, 2 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Brick.cpp
File size: 379221 byte(s)
Implemented ownSample for face elements.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Brick.h>
15 extern "C" {
16 #include "paso/SystemMatrix.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33 int d1, int d2) :
34 RipleyDomain(3),
35 m_gNE0(n0),
36 m_gNE1(n1),
37 m_gNE2(n2),
38 m_l0(l0),
39 m_l1(l1),
40 m_l2(l2),
41 m_NX(d0),
42 m_NY(d1),
43 m_NZ(d2)
44 {
45 // ensure number of subdivisions is valid and nodes can be distributed
46 // among number of ranks
47 if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48 throw RipleyException("Invalid number of spatial subdivisions");
49
50 if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0 || (n2+1)%m_NZ > 0)
51 throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension");
52
53 if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2) || (m_NZ > 1 && (n2+1)/m_NZ<2))
54 throw RipleyException("Too few elements for the number of ranks");
55
56 // local number of elements (including overlap)
57 m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0);
58 if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1)
59 m_NE0++;
60 m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1);
61 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX>0 && m_mpiInfo->rank%(m_NX*m_NY)/m_NX<m_NY-1)
62 m_NE1++;
63 m_NE2 = (m_NZ>1 ? (n2+1)/m_NZ : n2);
64 if (m_mpiInfo->rank/(m_NX*m_NY)>0 && m_mpiInfo->rank/(m_NX*m_NY)<m_NZ-1)
65 m_NE2++;
66
67 // local number of nodes
68 m_N0 = m_NE0+1;
69 m_N1 = m_NE1+1;
70 m_N2 = m_NE2+1;
71
72 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
73 m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX);
74 if (m_offset0 > 0)
75 m_offset0--;
76 m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
77 if (m_offset1 > 0)
78 m_offset1--;
79 m_offset2 = (n2+1)/m_NZ*(m_mpiInfo->rank/(m_NX*m_NY));
80 if (m_offset2 > 0)
81 m_offset2--;
82
83 populateSampleIds();
84 createPattern();
85 }
86
87
88 Brick::~Brick()
89 {
90 }
91
92 string Brick::getDescription() const
93 {
94 return "ripley::Brick";
95 }
96
97 bool Brick::operator==(const AbstractDomain& other) const
98 {
99 const Brick* o=dynamic_cast<const Brick*>(&other);
100 if (o) {
101 return (RipleyDomain::operator==(other) &&
102 m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 && m_gNE2==o->m_gNE2
103 && m_l0==o->m_l0 && m_l1==o->m_l1 && m_l2==o->m_l2
104 && m_NX==o->m_NX && m_NY==o->m_NY && m_NZ==o->m_NZ);
105 }
106
107 return false;
108 }
109
110 void Brick::dump(const string& fileName) const
111 {
112 #if USE_SILO
113 string fn(fileName);
114 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
115 fn+=".silo";
116 }
117
118 const int NUM_SILO_FILES = 1;
119 const char* blockDirFmt = "/block%04d";
120 int driver=DB_HDF5;
121 string siloPath;
122 DBfile* dbfile = NULL;
123
124 #ifdef ESYS_MPI
125 PMPIO_baton_t* baton = NULL;
126 #endif
127
128 if (m_mpiInfo->size > 1) {
129 #ifdef ESYS_MPI
130 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
131 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
132 PMPIO_DefaultClose, (void*)&driver);
133 // try the fallback driver in case of error
134 if (!baton && driver != DB_PDB) {
135 driver = DB_PDB;
136 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
137 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
138 PMPIO_DefaultClose, (void*)&driver);
139 }
140 if (baton) {
141 char str[64];
142 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
143 siloPath = str;
144 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
145 }
146 #endif
147 } else {
148 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
149 getDescription().c_str(), driver);
150 // try the fallback driver in case of error
151 if (!dbfile && driver != DB_PDB) {
152 driver = DB_PDB;
153 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
154 getDescription().c_str(), driver);
155 }
156 }
157
158 if (!dbfile)
159 throw RipleyException("dump: Could not create Silo file");
160
161 /*
162 if (driver==DB_HDF5) {
163 // gzip level 1 already provides good compression with minimal
164 // performance penalty. Some tests showed that gzip levels >3 performed
165 // rather badly on escript data both in terms of time and space
166 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
167 }
168 */
169
170 boost::scoped_ptr<double> x(new double[m_N0]);
171 boost::scoped_ptr<double> y(new double[m_N1]);
172 boost::scoped_ptr<double> z(new double[m_N2]);
173 double* coords[3] = { x.get(), y.get(), z.get() };
174 pair<double,double> xdx = getFirstCoordAndSpacing(0);
175 pair<double,double> ydy = getFirstCoordAndSpacing(1);
176 pair<double,double> zdz = getFirstCoordAndSpacing(2);
177 #pragma omp parallel
178 {
179 #pragma omp for
180 for (dim_t i0 = 0; i0 < m_N0; i0++) {
181 coords[0][i0]=xdx.first+i0*xdx.second;
182 }
183 #pragma omp for
184 for (dim_t i1 = 0; i1 < m_N1; i1++) {
185 coords[1][i1]=ydy.first+i1*ydy.second;
186 }
187 #pragma omp for
188 for (dim_t i2 = 0; i2 < m_N2; i2++) {
189 coords[2][i2]=zdz.first+i2*zdz.second;
190 }
191 }
192 IndexVector dims = getNumNodesPerDim();
193 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
194 DB_COLLINEAR, NULL);
195
196 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
197 NULL, 0, DB_INT, DB_NODECENT, NULL);
198
199 // write element ids
200 dims = getNumElementsPerDim();
201 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
202 &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
203
204 // rank 0 writes multimesh and multivar
205 if (m_mpiInfo->rank == 0) {
206 vector<string> tempstrings;
207 vector<char*> names;
208 for (dim_t i=0; i<m_mpiInfo->size; i++) {
209 stringstream path;
210 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
211 tempstrings.push_back(path.str());
212 names.push_back((char*)tempstrings.back().c_str());
213 }
214 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
215 DBSetDir(dbfile, "/");
216 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
217 &types[0], NULL);
218 tempstrings.clear();
219 names.clear();
220 for (dim_t i=0; i<m_mpiInfo->size; i++) {
221 stringstream path;
222 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
223 tempstrings.push_back(path.str());
224 names.push_back((char*)tempstrings.back().c_str());
225 }
226 types.assign(m_mpiInfo->size, DB_QUADVAR);
227 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
228 &types[0], NULL);
229 tempstrings.clear();
230 names.clear();
231 for (dim_t i=0; i<m_mpiInfo->size; i++) {
232 stringstream path;
233 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
234 tempstrings.push_back(path.str());
235 names.push_back((char*)tempstrings.back().c_str());
236 }
237 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
238 &types[0], NULL);
239 }
240
241 if (m_mpiInfo->size > 1) {
242 #ifdef ESYS_MPI
243 PMPIO_HandOffBaton(baton, dbfile);
244 PMPIO_Finish(baton);
245 #endif
246 } else {
247 DBClose(dbfile);
248 }
249
250 #else // USE_SILO
251 throw RipleyException("dump(): no Silo support");
252 #endif
253 }
254
255 const int* Brick::borrowSampleReferenceIDs(int fsType) const
256 {
257 switch (fsType) {
258 case Nodes:
259 case ReducedNodes: //FIXME: reduced
260 return &m_nodeId[0];
261 case DegreesOfFreedom:
262 case ReducedDegreesOfFreedom: //FIXME: reduced
263 return &m_dofId[0];
264 case Elements:
265 case ReducedElements:
266 return &m_elementId[0];
267 case FaceElements:
268 case ReducedFaceElements:
269 return &m_faceId[0];
270 default:
271 break;
272 }
273
274 stringstream msg;
275 msg << "borrowSampleReferenceIDs() not implemented for function space type "
276 << fsType;
277 throw RipleyException(msg.str());
278 }
279
280 bool Brick::ownSample(int fsType, index_t id) const
281 {
282 if (getMPISize()==1)
283 return true;
284
285 switch (fsType) {
286 case Nodes:
287 case ReducedNodes: //FIXME: reduced
288 return (m_dofMap[id] < getNumDOF());
289 case DegreesOfFreedom:
290 case ReducedDegreesOfFreedom:
291 return true;
292 case Elements:
293 case ReducedElements:
294 {
295 // check ownership of element's _last_ node
296 const index_t x=id%m_NE0 + 1;
297 const index_t y=id%(m_NE0*m_NE1)/m_NE0 + 1;
298 const index_t z=id/(m_NE0*m_NE1) + 1;
299 return (m_dofMap[x + m_N0*y +m_N0*m_N1*z] < getNumDOF());
300 }
301 case FaceElements:
302 case ReducedFaceElements:
303 {
304 // check ownership of face element's last node
305 const IndexVector faces = getNumFacesPerBoundary();
306 dim_t n=0;
307 for (size_t i=0; i<faces.size(); i++) {
308 n+=faces[i];
309 if (id<n) {
310 const index_t j=id-n+faces[i];
311 if (i>=4) { // front or back
312 const index_t first=(i==4 ? 0 : m_N0*m_N1*(m_N2-1));
313 return (m_dofMap[first+j%m_NE0+1+(j/m_NE0+1)*m_N0] < getNumDOF());
314 } else if (i>=2) { // bottom or top
315 const index_t first=(i==2 ? 0 : m_N0*(m_N1-1));
316 return (m_dofMap[first+j%m_NE0+1+(j/m_NE0+1)*m_N0*m_N1] < getNumDOF());
317 } else { // left or right
318 const index_t first=(i==0 ? 0 : m_N0-1);
319 return (m_dofMap[first+(j%m_NE1+1)*m_N0+(j/m_NE1+1)*m_N0*m_N1] < getNumDOF());
320 }
321 }
322 }
323 return false;
324 }
325 default:
326 break;
327 }
328
329 stringstream msg;
330 msg << "ownSample() not implemented for "
331 << functionSpaceTypeAsString(fsType);
332 throw RipleyException(msg.str());
333 }
334
335 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
336 {
337 escript::Data& in = *const_cast<escript::Data*>(&cIn);
338 const dim_t numComp = in.getDataPointSize();
339 const double h0 = m_l0/m_gNE0;
340 const double h1 = m_l1/m_gNE1;
341 const double h2 = m_l1/m_gNE2;
342 const double C0 = .044658198738520451079;
343 const double C1 = .16666666666666666667;
344 const double C2 = .21132486540518711775;
345 const double C3 = .25;
346 const double C4 = .5;
347 const double C5 = .62200846792814621559;
348 const double C6 = .78867513459481288225;
349
350 if (out.getFunctionSpace().getTypeCode() == Elements) {
351 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
352 #pragma omp parallel for
353 for (index_t k2=0; k2 < m_NE2; ++k2) {
354 for (index_t k1=0; k1 < m_NE1; ++k1) {
355 for (index_t k0=0; k0 < m_NE0; ++k0) {
356 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
357 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
358 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
359 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
360 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
361 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
362 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
363 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
364 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
365 for (index_t i=0; i < numComp; ++i) {
366 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
367 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
368 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
369 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
370 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
371 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
372 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
373 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
374 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
375 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
376 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
377 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
378 o[INDEX3(i,0,0,numComp,3)] = V0;
379 o[INDEX3(i,1,0,numComp,3)] = V4;
380 o[INDEX3(i,2,0,numComp,3)] = V8;
381 o[INDEX3(i,0,1,numComp,3)] = V0;
382 o[INDEX3(i,1,1,numComp,3)] = V5;
383 o[INDEX3(i,2,1,numComp,3)] = V9;
384 o[INDEX3(i,0,2,numComp,3)] = V1;
385 o[INDEX3(i,1,2,numComp,3)] = V4;
386 o[INDEX3(i,2,2,numComp,3)] = V10;
387 o[INDEX3(i,0,3,numComp,3)] = V1;
388 o[INDEX3(i,1,3,numComp,3)] = V5;
389 o[INDEX3(i,2,3,numComp,3)] = V11;
390 o[INDEX3(i,0,4,numComp,3)] = V2;
391 o[INDEX3(i,1,4,numComp,3)] = V6;
392 o[INDEX3(i,2,4,numComp,3)] = V8;
393 o[INDEX3(i,0,5,numComp,3)] = V2;
394 o[INDEX3(i,1,5,numComp,3)] = V7;
395 o[INDEX3(i,2,5,numComp,3)] = V9;
396 o[INDEX3(i,0,6,numComp,3)] = V3;
397 o[INDEX3(i,1,6,numComp,3)] = V6;
398 o[INDEX3(i,2,6,numComp,3)] = V10;
399 o[INDEX3(i,0,7,numComp,3)] = V3;
400 o[INDEX3(i,1,7,numComp,3)] = V7;
401 o[INDEX3(i,2,7,numComp,3)] = V11;
402 } /* end of component loop i */
403 } /* end of k0 loop */
404 } /* end of k1 loop */
405 } /* end of k2 loop */
406 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
407 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
408 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
409 #pragma omp parallel for
410 for (index_t k2=0; k2 < m_NE2; ++k2) {
411 for (index_t k1=0; k1 < m_NE1; ++k1) {
412 for (index_t k0=0; k0 < m_NE0; ++k0) {
413 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
414 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
415 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
416 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
417 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
418 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
419 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
420 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
421 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
422 for (index_t i=0; i < numComp; ++i) {
423 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
424 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
425 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
426 } /* end of component loop i */
427 } /* end of k0 loop */
428 } /* end of k1 loop */
429 } /* end of k2 loop */
430 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
431 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
432 /*** GENERATOR SNIP_GRAD_FACES TOP */
433 #pragma omp parallel
434 {
435 if (m_faceOffset[0] > -1) {
436 #pragma omp for nowait
437 for (index_t k2=0; k2 < m_NE2; ++k2) {
438 for (index_t k1=0; k1 < m_NE1; ++k1) {
439 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
440 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
441 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
442 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
443 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
444 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
445 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
446 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
447 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
448 for (index_t i=0; i < numComp; ++i) {
449 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
450 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
451 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
452 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
453 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
454 o[INDEX3(i,1,0,numComp,3)] = V0;
455 o[INDEX3(i,2,0,numComp,3)] = V2;
456 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
457 o[INDEX3(i,1,1,numComp,3)] = V0;
458 o[INDEX3(i,2,1,numComp,3)] = V3;
459 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
460 o[INDEX3(i,1,2,numComp,3)] = V1;
461 o[INDEX3(i,2,2,numComp,3)] = V2;
462 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
463 o[INDEX3(i,1,3,numComp,3)] = V1;
464 o[INDEX3(i,2,3,numComp,3)] = V3;
465 } /* end of component loop i */
466 } /* end of k1 loop */
467 } /* end of k2 loop */
468 } /* end of face 0 */
469 if (m_faceOffset[1] > -1) {
470 #pragma omp for nowait
471 for (index_t k2=0; k2 < m_NE2; ++k2) {
472 for (index_t k1=0; k1 < m_NE1; ++k1) {
473 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
474 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
475 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
476 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
477 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
478 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
479 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
480 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
481 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
482 for (index_t i=0; i < numComp; ++i) {
483 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
484 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
485 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
486 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
487 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
488 o[INDEX3(i,1,0,numComp,3)] = V0;
489 o[INDEX3(i,2,0,numComp,3)] = V2;
490 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
491 o[INDEX3(i,1,1,numComp,3)] = V0;
492 o[INDEX3(i,2,1,numComp,3)] = V3;
493 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
494 o[INDEX3(i,1,2,numComp,3)] = V1;
495 o[INDEX3(i,2,2,numComp,3)] = V2;
496 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
497 o[INDEX3(i,1,3,numComp,3)] = V1;
498 o[INDEX3(i,2,3,numComp,3)] = V3;
499 } /* end of component loop i */
500 } /* end of k1 loop */
501 } /* end of k2 loop */
502 } /* end of face 1 */
503 if (m_faceOffset[2] > -1) {
504 #pragma omp for nowait
505 for (index_t k2=0; k2 < m_NE2; ++k2) {
506 for (index_t k0=0; k0 < m_NE0; ++k0) {
507 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
508 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
509 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
510 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
511 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
512 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
513 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
514 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
515 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
516 for (index_t i=0; i < numComp; ++i) {
517 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
518 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
519 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
520 o[INDEX3(i,0,0,numComp,3)] = V0;
521 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
522 o[INDEX3(i,2,0,numComp,3)] = V1;
523 o[INDEX3(i,0,1,numComp,3)] = V0;
524 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
525 o[INDEX3(i,2,1,numComp,3)] = V2;
526 o[INDEX3(i,0,2,numComp,3)] = V0;
527 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
528 o[INDEX3(i,2,2,numComp,3)] = V1;
529 o[INDEX3(i,0,3,numComp,3)] = V0;
530 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
531 o[INDEX3(i,2,3,numComp,3)] = V2;
532 } /* end of component loop i */
533 } /* end of k0 loop */
534 } /* end of k2 loop */
535 } /* end of face 2 */
536 if (m_faceOffset[3] > -1) {
537 #pragma omp for nowait
538 for (index_t k2=0; k2 < m_NE2; ++k2) {
539 for (index_t k0=0; k0 < m_NE0; ++k0) {
540 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
541 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
542 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
543 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
544 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
545 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
546 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
547 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
548 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
549 for (index_t i=0; i < numComp; ++i) {
550 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
551 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
552 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
553 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
554 o[INDEX3(i,0,0,numComp,3)] = V0;
555 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
556 o[INDEX3(i,2,0,numComp,3)] = V2;
557 o[INDEX3(i,0,1,numComp,3)] = V0;
558 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
559 o[INDEX3(i,2,1,numComp,3)] = V3;
560 o[INDEX3(i,0,2,numComp,3)] = V1;
561 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
562 o[INDEX3(i,2,2,numComp,3)] = V2;
563 o[INDEX3(i,0,3,numComp,3)] = V1;
564 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
565 o[INDEX3(i,2,3,numComp,3)] = V3;
566 } /* end of component loop i */
567 } /* end of k0 loop */
568 } /* end of k2 loop */
569 } /* end of face 3 */
570 if (m_faceOffset[4] > -1) {
571 #pragma omp for nowait
572 for (index_t k1=0; k1 < m_NE1; ++k1) {
573 for (index_t k0=0; k0 < m_NE0; ++k0) {
574 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
575 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
576 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
577 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
578 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
579 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
580 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
581 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
582 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
583 for (index_t i=0; i < numComp; ++i) {
584 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
585 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
586 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
587 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
588 o[INDEX3(i,0,0,numComp,3)] = V0;
589 o[INDEX3(i,1,0,numComp,3)] = V2;
590 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
591 o[INDEX3(i,0,1,numComp,3)] = V0;
592 o[INDEX3(i,1,1,numComp,3)] = V3;
593 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
594 o[INDEX3(i,0,2,numComp,3)] = V1;
595 o[INDEX3(i,1,2,numComp,3)] = V2;
596 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
597 o[INDEX3(i,0,3,numComp,3)] = V1;
598 o[INDEX3(i,1,3,numComp,3)] = V3;
599 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
600 } /* end of component loop i */
601 } /* end of k0 loop */
602 } /* end of k1 loop */
603 } /* end of face 4 */
604 if (m_faceOffset[5] > -1) {
605 #pragma omp for nowait
606 for (index_t k1=0; k1 < m_NE1; ++k1) {
607 for (index_t k0=0; k0 < m_NE0; ++k0) {
608 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
609 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
610 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
611 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
612 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
613 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
614 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
615 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
616 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
617 for (index_t i=0; i < numComp; ++i) {
618 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
619 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
620 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
621 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
622 o[INDEX3(i,0,0,numComp,3)] = V0;
623 o[INDEX3(i,1,0,numComp,3)] = V2;
624 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
625 o[INDEX3(i,0,1,numComp,3)] = V0;
626 o[INDEX3(i,1,1,numComp,3)] = V3;
627 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
628 o[INDEX3(i,0,2,numComp,3)] = V1;
629 o[INDEX3(i,1,2,numComp,3)] = V2;
630 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
631 o[INDEX3(i,0,3,numComp,3)] = V1;
632 o[INDEX3(i,1,3,numComp,3)] = V3;
633 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
634 } /* end of component loop i */
635 } /* end of k0 loop */
636 } /* end of k1 loop */
637 } /* end of face 5 */
638 } // end of parallel section
639 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
640 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
641 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
642 #pragma omp parallel
643 {
644 if (m_faceOffset[0] > -1) {
645 #pragma omp for nowait
646 for (index_t k2=0; k2 < m_NE2; ++k2) {
647 for (index_t k1=0; k1 < m_NE1; ++k1) {
648 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
649 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
650 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
651 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
652 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
653 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
654 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
655 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
656 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
657 for (index_t i=0; i < numComp; ++i) {
658 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
659 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
660 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
661 } /* end of component loop i */
662 } /* end of k1 loop */
663 } /* end of k2 loop */
664 } /* end of face 0 */
665 if (m_faceOffset[1] > -1) {
666 #pragma omp for nowait
667 for (index_t k2=0; k2 < m_NE2; ++k2) {
668 for (index_t k1=0; k1 < m_NE1; ++k1) {
669 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
670 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
671 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
672 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
673 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
674 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
675 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
676 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
677 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
678 for (index_t i=0; i < numComp; ++i) {
679 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
680 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
681 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
682 } /* end of component loop i */
683 } /* end of k1 loop */
684 } /* end of k2 loop */
685 } /* end of face 1 */
686 if (m_faceOffset[2] > -1) {
687 #pragma omp for nowait
688 for (index_t k2=0; k2 < m_NE2; ++k2) {
689 for (index_t k0=0; k0 < m_NE0; ++k0) {
690 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
691 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
692 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
693 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
694 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
695 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
696 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
697 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
698 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
699 for (index_t i=0; i < numComp; ++i) {
700 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
701 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
702 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
703 } /* end of component loop i */
704 } /* end of k0 loop */
705 } /* end of k2 loop */
706 } /* end of face 2 */
707 if (m_faceOffset[3] > -1) {
708 #pragma omp for nowait
709 for (index_t k2=0; k2 < m_NE2; ++k2) {
710 for (index_t k0=0; k0 < m_NE0; ++k0) {
711 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
712 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
713 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
714 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
715 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
716 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
717 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
718 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
719 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
720 for (index_t i=0; i < numComp; ++i) {
721 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
722 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
723 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
724 } /* end of component loop i */
725 } /* end of k0 loop */
726 } /* end of k2 loop */
727 } /* end of face 3 */
728 if (m_faceOffset[4] > -1) {
729 #pragma omp for nowait
730 for (index_t k1=0; k1 < m_NE1; ++k1) {
731 for (index_t k0=0; k0 < m_NE0; ++k0) {
732 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
733 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
734 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
735 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
736 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
737 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
738 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
739 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
740 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
741 for (index_t i=0; i < numComp; ++i) {
742 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
743 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
744 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
745 } /* end of component loop i */
746 } /* end of k0 loop */
747 } /* end of k1 loop */
748 } /* end of face 4 */
749 if (m_faceOffset[5] > -1) {
750 #pragma omp for nowait
751 for (index_t k1=0; k1 < m_NE1; ++k1) {
752 for (index_t k0=0; k0 < m_NE0; ++k0) {
753 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
754 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
755 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
756 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
757 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
758 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
759 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
760 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
761 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
762 for (index_t i=0; i < numComp; ++i) {
763 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
764 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
765 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
766 } /* end of component loop i */
767 } /* end of k0 loop */
768 } /* end of k1 loop */
769 } /* end of face 5 */
770 } // end of parallel section
771 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
772 } else {
773 stringstream msg;
774 msg << "setToGradient() not implemented for "
775 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
776 throw RipleyException(msg.str());
777 }
778 }
779
780 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
781 {
782 escript::Data& in = *const_cast<escript::Data*>(&arg);
783 const dim_t numComp = in.getDataPointSize();
784 const double h0 = m_l0/m_gNE0;
785 const double h1 = m_l1/m_gNE1;
786 const double h2 = m_l2/m_gNE2;
787 if (arg.getFunctionSpace().getTypeCode() == Elements) {
788 const double w_0 = h0*h1*h2/8.;
789 #pragma omp parallel
790 {
791 vector<double> int_local(numComp, 0);
792 #pragma omp for nowait
793 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
794 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
795 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
796 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
797 for (index_t i=0; i < numComp; ++i) {
798 const register double f_0 = f[INDEX2(i,0,numComp)];
799 const register double f_1 = f[INDEX2(i,1,numComp)];
800 const register double f_2 = f[INDEX2(i,2,numComp)];
801 const register double f_3 = f[INDEX2(i,3,numComp)];
802 const register double f_4 = f[INDEX2(i,4,numComp)];
803 const register double f_5 = f[INDEX2(i,5,numComp)];
804 const register double f_6 = f[INDEX2(i,6,numComp)];
805 const register double f_7 = f[INDEX2(i,7,numComp)];
806 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
807 } /* end of component loop i */
808 } /* end of k0 loop */
809 } /* end of k1 loop */
810 } /* end of k2 loop */
811
812 #pragma omp critical
813 for (index_t i=0; i<numComp; i++)
814 integrals[i]+=int_local[i];
815 } // end of parallel section
816 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
817 const double w_0 = h0*h1*h2;
818 #pragma omp parallel
819 {
820 vector<double> int_local(numComp, 0);
821 #pragma omp for nowait
822 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
823 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
824 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
825 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
826 for (index_t i=0; i < numComp; ++i) {
827 int_local[i]+=f[i]*w_0;
828 } /* end of component loop i */
829 } /* end of k0 loop */
830 } /* end of k1 loop */
831 } /* end of k2 loop */
832
833 #pragma omp critical
834 for (index_t i=0; i<numComp; i++)
835 integrals[i]+=int_local[i];
836 } // end of parallel section
837 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
838 const double w_0 = h1*h2/4.;
839 const double w_1 = h0*h2/4.;
840 const double w_2 = h0*h1/4.;
841 #pragma omp parallel
842 {
843 vector<double> int_local(numComp, 0);
844 if (m_faceOffset[0] > -1) {
845 #pragma omp for nowait
846 for (index_t k2=0; k2 < m_NE2; ++k2) {
847 for (index_t k1=0; k1 < m_NE1; ++k1) {
848 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
849 for (index_t i=0; i < numComp; ++i) {
850 const register double f_0 = f[INDEX2(i,0,numComp)];
851 const register double f_1 = f[INDEX2(i,1,numComp)];
852 const register double f_2 = f[INDEX2(i,2,numComp)];
853 const register double f_3 = f[INDEX2(i,3,numComp)];
854 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
855 } /* end of component loop i */
856 } /* end of k1 loop */
857 } /* end of k2 loop */
858 }
859
860 if (m_faceOffset[1] > -1) {
861 #pragma omp for nowait
862 for (index_t k2=0; k2 < m_NE2; ++k2) {
863 for (index_t k1=0; k1 < m_NE1; ++k1) {
864 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
865 for (index_t i=0; i < numComp; ++i) {
866 const register double f_0 = f[INDEX2(i,0,numComp)];
867 const register double f_1 = f[INDEX2(i,1,numComp)];
868 const register double f_2 = f[INDEX2(i,2,numComp)];
869 const register double f_3 = f[INDEX2(i,3,numComp)];
870 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
871 } /* end of component loop i */
872 } /* end of k1 loop */
873 } /* end of k2 loop */
874 }
875
876 if (m_faceOffset[2] > -1) {
877 #pragma omp for nowait
878 for (index_t k2=0; k2 < m_NE2; ++k2) {
879 for (index_t k0=0; k0 < m_NE0; ++k0) {
880 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
881 for (index_t i=0; i < numComp; ++i) {
882 const register double f_0 = f[INDEX2(i,0,numComp)];
883 const register double f_1 = f[INDEX2(i,1,numComp)];
884 const register double f_2 = f[INDEX2(i,2,numComp)];
885 const register double f_3 = f[INDEX2(i,3,numComp)];
886 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
887 } /* end of component loop i */
888 } /* end of k1 loop */
889 } /* end of k2 loop */
890 }
891
892 if (m_faceOffset[3] > -1) {
893 #pragma omp for nowait
894 for (index_t k2=0; k2 < m_NE2; ++k2) {
895 for (index_t k0=0; k0 < m_NE0; ++k0) {
896 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
897 for (index_t i=0; i < numComp; ++i) {
898 const register double f_0 = f[INDEX2(i,0,numComp)];
899 const register double f_1 = f[INDEX2(i,1,numComp)];
900 const register double f_2 = f[INDEX2(i,2,numComp)];
901 const register double f_3 = f[INDEX2(i,3,numComp)];
902 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
903 } /* end of component loop i */
904 } /* end of k1 loop */
905 } /* end of k2 loop */
906 }
907
908 if (m_faceOffset[4] > -1) {
909 #pragma omp for nowait
910 for (index_t k1=0; k1 < m_NE1; ++k1) {
911 for (index_t k0=0; k0 < m_NE0; ++k0) {
912 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
913 for (index_t i=0; i < numComp; ++i) {
914 const register double f_0 = f[INDEX2(i,0,numComp)];
915 const register double f_1 = f[INDEX2(i,1,numComp)];
916 const register double f_2 = f[INDEX2(i,2,numComp)];
917 const register double f_3 = f[INDEX2(i,3,numComp)];
918 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
919 } /* end of component loop i */
920 } /* end of k1 loop */
921 } /* end of k2 loop */
922 }
923
924 if (m_faceOffset[5] > -1) {
925 #pragma omp for nowait
926 for (index_t k1=0; k1 < m_NE1; ++k1) {
927 for (index_t k0=0; k0 < m_NE0; ++k0) {
928 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
929 for (index_t i=0; i < numComp; ++i) {
930 const register double f_0 = f[INDEX2(i,0,numComp)];
931 const register double f_1 = f[INDEX2(i,1,numComp)];
932 const register double f_2 = f[INDEX2(i,2,numComp)];
933 const register double f_3 = f[INDEX2(i,3,numComp)];
934 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
935 } /* end of component loop i */
936 } /* end of k1 loop */
937 } /* end of k2 loop */
938 }
939
940 #pragma omp critical
941 for (index_t i=0; i<numComp; i++)
942 integrals[i]+=int_local[i];
943 } // end of parallel section
944
945 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
946 const double w_0 = h1*h2;
947 const double w_1 = h0*h2;
948 const double w_2 = h0*h1;
949 #pragma omp parallel
950 {
951 vector<double> int_local(numComp, 0);
952 if (m_faceOffset[0] > -1) {
953 #pragma omp for nowait
954 for (index_t k2=0; k2 < m_NE2; ++k2) {
955 for (index_t k1=0; k1 < m_NE1; ++k1) {
956 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
957 for (index_t i=0; i < numComp; ++i) {
958 int_local[i]+=f[i]*w_0;
959 } /* end of component loop i */
960 } /* end of k1 loop */
961 } /* end of k2 loop */
962 }
963
964 if (m_faceOffset[1] > -1) {
965 #pragma omp for nowait
966 for (index_t k2=0; k2 < m_NE2; ++k2) {
967 for (index_t k1=0; k1 < m_NE1; ++k1) {
968 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
969 for (index_t i=0; i < numComp; ++i) {
970 int_local[i]+=f[i]*w_0;
971 } /* end of component loop i */
972 } /* end of k1 loop */
973 } /* end of k2 loop */
974 }
975
976 if (m_faceOffset[2] > -1) {
977 #pragma omp for nowait
978 for (index_t k2=0; k2 < m_NE2; ++k2) {
979 for (index_t k0=0; k0 < m_NE0; ++k0) {
980 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
981 for (index_t i=0; i < numComp; ++i) {
982 int_local[i]+=f[i]*w_1;
983 } /* end of component loop i */
984 } /* end of k1 loop */
985 } /* end of k2 loop */
986 }
987
988 if (m_faceOffset[3] > -1) {
989 #pragma omp for nowait
990 for (index_t k2=0; k2 < m_NE2; ++k2) {
991 for (index_t k0=0; k0 < m_NE0; ++k0) {
992 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
993 for (index_t i=0; i < numComp; ++i) {
994 int_local[i]+=f[i]*w_1;
995 } /* end of component loop i */
996 } /* end of k1 loop */
997 } /* end of k2 loop */
998 }
999
1000 if (m_faceOffset[4] > -1) {
1001 #pragma omp for nowait
1002 for (index_t k1=0; k1 < m_NE1; ++k1) {
1003 for (index_t k0=0; k0 < m_NE0; ++k0) {
1004 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1005 for (index_t i=0; i < numComp; ++i) {
1006 int_local[i]+=f[i]*w_2;
1007 } /* end of component loop i */
1008 } /* end of k1 loop */
1009 } /* end of k2 loop */
1010 }
1011
1012 if (m_faceOffset[5] > -1) {
1013 #pragma omp for nowait
1014 for (index_t k1=0; k1 < m_NE1; ++k1) {
1015 for (index_t k0=0; k0 < m_NE0; ++k0) {
1016 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1017 for (index_t i=0; i < numComp; ++i) {
1018 int_local[i]+=f[i]*w_2;
1019 } /* end of component loop i */
1020 } /* end of k1 loop */
1021 } /* end of k2 loop */
1022 }
1023
1024 #pragma omp critical
1025 for (index_t i=0; i<numComp; i++)
1026 integrals[i]+=int_local[i];
1027 } // end of parallel section
1028
1029 } else {
1030 stringstream msg;
1031 msg << "setToIntegrals() not implemented for "
1032 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
1033 throw RipleyException(msg.str());
1034 }
1035 }
1036
1037 void Brick::setToNormal(escript::Data& out) const
1038 {
1039 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1040 #pragma omp parallel
1041 {
1042 if (m_faceOffset[0] > -1) {
1043 #pragma omp for nowait
1044 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1045 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1046 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1047 // set vector at four quadrature points
1048 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1049 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1050 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1051 *o++ = -1.; *o++ = 0.; *o = 0.;
1052 }
1053 }
1054 }
1055
1056 if (m_faceOffset[1] > -1) {
1057 #pragma omp for nowait
1058 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1059 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1060 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1061 // set vector at four quadrature points
1062 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1063 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1064 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1065 *o++ = 1.; *o++ = 0.; *o = 0.;
1066 }
1067 }
1068 }
1069
1070 if (m_faceOffset[2] > -1) {
1071 #pragma omp for nowait
1072 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1073 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1074 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1075 // set vector at four quadrature points
1076 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1077 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1078 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1079 *o++ = 0.; *o++ = -1.; *o = 0.;
1080 }
1081 }
1082 }
1083
1084 if (m_faceOffset[3] > -1) {
1085 #pragma omp for nowait
1086 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1087 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1088 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1089 // set vector at four quadrature points
1090 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1091 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1092 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1093 *o++ = 0.; *o++ = 1.; *o = 0.;
1094 }
1095 }
1096 }
1097
1098 if (m_faceOffset[4] > -1) {
1099 #pragma omp for nowait
1100 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1101 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1102 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1103 // set vector at four quadrature points
1104 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1105 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1106 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1107 *o++ = 0.; *o++ = 0.; *o = -1.;
1108 }
1109 }
1110 }
1111
1112 if (m_faceOffset[5] > -1) {
1113 #pragma omp for nowait
1114 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1115 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1116 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1117 // set vector at four quadrature points
1118 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1119 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1120 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1121 *o++ = 0.; *o++ = 0.; *o = 1.;
1122 }
1123 }
1124 }
1125 } // end of parallel section
1126 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1127 #pragma omp parallel
1128 {
1129 if (m_faceOffset[0] > -1) {
1130 #pragma omp for nowait
1131 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1132 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1133 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1134 *o++ = -1.;
1135 *o++ = 0.;
1136 *o = 0.;
1137 }
1138 }
1139 }
1140
1141 if (m_faceOffset[1] > -1) {
1142 #pragma omp for nowait
1143 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1144 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1145 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1146 *o++ = 1.;
1147 *o++ = 0.;
1148 *o = 0.;
1149 }
1150 }
1151 }
1152
1153 if (m_faceOffset[2] > -1) {
1154 #pragma omp for nowait
1155 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1156 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1157 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1158 *o++ = 0.;
1159 *o++ = -1.;
1160 *o = 0.;
1161 }
1162 }
1163 }
1164
1165 if (m_faceOffset[3] > -1) {
1166 #pragma omp for nowait
1167 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1168 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1169 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1170 *o++ = 0.;
1171 *o++ = 1.;
1172 *o = 0.;
1173 }
1174 }
1175 }
1176
1177 if (m_faceOffset[4] > -1) {
1178 #pragma omp for nowait
1179 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1180 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1181 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1182 *o++ = 0.;
1183 *o++ = 0.;
1184 *o = -1.;
1185 }
1186 }
1187 }
1188
1189 if (m_faceOffset[5] > -1) {
1190 #pragma omp for nowait
1191 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1192 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1193 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1194 *o++ = 0.;
1195 *o++ = 0.;
1196 *o = 1.;
1197 }
1198 }
1199 }
1200 } // end of parallel section
1201
1202 } else {
1203 stringstream msg;
1204 msg << "setToNormal() not implemented for "
1205 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1206 throw RipleyException(msg.str());
1207 }
1208 }
1209
1210 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1211 bool reducedColOrder) const
1212 {
1213 if (reducedRowOrder || reducedColOrder)
1214 throw RipleyException("getPattern() not implemented for reduced order");
1215
1216 return m_pattern;
1217 }
1218
1219 void Brick::Print_Mesh_Info(const bool full) const
1220 {
1221 RipleyDomain::Print_Mesh_Info(full);
1222 if (full) {
1223 cout << " Id Coordinates" << endl;
1224 cout.precision(15);
1225 cout.setf(ios::scientific, ios::floatfield);
1226 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1227 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1228 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1229 for (index_t i=0; i < getNumNodes(); i++) {
1230 cout << " " << setw(5) << m_nodeId[i]
1231 << " " << xdx.first+(i%m_N0)*xdx.second
1232 << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1233 << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1234 }
1235 }
1236 }
1237
1238 IndexVector Brick::getNumNodesPerDim() const
1239 {
1240 IndexVector ret;
1241 ret.push_back(m_N0);
1242 ret.push_back(m_N1);
1243 ret.push_back(m_N2);
1244 return ret;
1245 }
1246
1247 IndexVector Brick::getNumElementsPerDim() const
1248 {
1249 IndexVector ret;
1250 ret.push_back(m_NE0);
1251 ret.push_back(m_NE1);
1252 ret.push_back(m_NE2);
1253 return ret;
1254 }
1255
1256 IndexVector Brick::getNumFacesPerBoundary() const
1257 {
1258 IndexVector ret(6, 0);
1259 //left
1260 if (m_offset0==0)
1261 ret[0]=m_NE1*m_NE2;
1262 //right
1263 if (m_mpiInfo->rank%m_NX==m_NX-1)
1264 ret[1]=m_NE1*m_NE2;
1265 //bottom
1266 if (m_offset1==0)
1267 ret[2]=m_NE0*m_NE2;
1268 //top
1269 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1270 ret[3]=m_NE0*m_NE2;
1271 //front
1272 if (m_offset2==0)
1273 ret[4]=m_NE0*m_NE1;
1274 //back
1275 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1276 ret[5]=m_NE0*m_NE1;
1277 return ret;
1278 }
1279
1280 pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1281 {
1282 if (dim==0)
1283 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1284 else if (dim==1)
1285 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1286 else if (dim==2)
1287 return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1288
1289 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1290 }
1291
1292 //protected
1293 dim_t Brick::getNumDOF() const
1294 {
1295 return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY*(m_gNE2+1)/m_NZ;
1296 }
1297
1298 //protected
1299 dim_t Brick::getNumFaceElements() const
1300 {
1301 const IndexVector faces = getNumFacesPerBoundary();
1302 dim_t n=0;
1303 for (size_t i=0; i<faces.size(); i++)
1304 n+=faces[i];
1305 return n;
1306 }
1307
1308 //protected
1309 void Brick::assembleCoordinates(escript::Data& arg) const
1310 {
1311 escriptDataC x = arg.getDataC();
1312 int numDim = m_numDim;
1313 if (!isDataPointShapeEqual(&x, 1, &numDim))
1314 throw RipleyException("setToX: Invalid Data object shape");
1315 if (!numSamplesEqual(&x, 1, getNumNodes()))
1316 throw RipleyException("setToX: Illegal number of samples in Data object");
1317
1318 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1319 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1320 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1321 arg.requireWrite();
1322 #pragma omp parallel for
1323 for (dim_t i2 = 0; i2 < m_N2; i2++) {
1324 for (dim_t i1 = 0; i1 < m_N1; i1++) {
1325 for (dim_t i0 = 0; i0 < m_N0; i0++) {
1326 double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1327 point[0] = xdx.first+i0*xdx.second;
1328 point[1] = ydy.first+i1*ydy.second;
1329 point[2] = zdz.first+i2*zdz.second;
1330 }
1331 }
1332 }
1333 }
1334
1335 //protected
1336 dim_t Brick::insertNeighbourNodes(IndexVector& index, index_t node) const
1337 {
1338 const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1339 const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1340 const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1341 const int x=node%nDOF0;
1342 const int y=node%(nDOF0*nDOF1)/nDOF0;
1343 const int z=node/(nDOF0*nDOF1);
1344 int num=0;
1345 // loop through potential neighbours and add to index if positions are
1346 // within bounds
1347 for (int i2=-1; i2<2; i2++) {
1348 for (int i1=-1; i1<2; i1++) {
1349 for (int i0=-1; i0<2; i0++) {
1350 // skip node itself
1351 if (i0==0 && i1==0 && i2==0)
1352 continue;
1353 // location of neighbour node
1354 const int nx=x+i0;
1355 const int ny=y+i1;
1356 const int nz=z+i2;
1357 if (nx>=0 && ny>=0 && nz>=0
1358 && nx<nDOF0 && ny<nDOF1 && nz<nDOF2) {
1359 index.push_back(nz*nDOF0*nDOF1+ny*nDOF0+nx);
1360 num++;
1361 }
1362 }
1363 }
1364 }
1365
1366 return num;
1367 }
1368
1369 //protected
1370 void Brick::nodesToDOF(escript::Data& out, escript::Data& in) const
1371 {
1372 const dim_t numComp = in.getDataPointSize();
1373 out.requireWrite();
1374
1375 const index_t left = (m_offset0==0 ? 0 : 1);
1376 const index_t bottom = (m_offset1==0 ? 0 : 1);
1377 const index_t front = (m_offset2==0 ? 0 : 1);
1378 const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1379 const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1380 const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1381 #pragma omp parallel for
1382 for (index_t i=0; i<nDOF2; i++) {
1383 for (index_t j=0; j<nDOF1; j++) {
1384 for (index_t k=0; k<nDOF0; k++) {
1385 const index_t n=k+left+(j+bottom)*m_N0+(i+front)*m_N0*m_N1;
1386 const double* src=in.getSampleDataRO(n);
1387 copy(src, src+numComp, out.getSampleDataRW(k+j*nDOF0+i*nDOF0*nDOF1));
1388 }
1389 }
1390 }
1391 }
1392
1393 //protected
1394 void Brick::dofToNodes(escript::Data& out, escript::Data& in) const
1395 {
1396 const dim_t numComp = in.getDataPointSize();
1397 Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp);
1398 in.requireWrite();
1399 Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0));
1400
1401 const dim_t numDOF = getNumDOF();
1402 out.requireWrite();
1403 const double* buffer = Paso_Coupler_finishCollect(coupler);
1404
1405 #pragma omp parallel for
1406 for (index_t i=0; i<getNumNodes(); i++) {
1407 const double* src=(m_dofMap[i]<numDOF ?
1408 in.getSampleDataRO(m_dofMap[i])
1409 : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1410 copy(src, src+numComp, out.getSampleDataRW(i));
1411 }
1412 }
1413
1414 //private
1415 void Brick::populateSampleIds()
1416 {
1417 // identifiers are ordered from left to right, bottom to top, front to back
1418 // globally
1419
1420 // build node distribution vector first.
1421 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1422 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1423 const dim_t numDOF=getNumDOF();
1424 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1425 m_nodeDistribution[k]=k*numDOF;
1426 }
1427 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1428 m_nodeId.resize(getNumNodes());
1429 m_dofId.resize(numDOF);
1430 m_elementId.resize(getNumElements());
1431 m_faceId.resize(getNumFaceElements());
1432
1433 #pragma omp parallel
1434 {
1435 #pragma omp for nowait
1436 // nodes
1437 for (dim_t i2=0; i2<m_N2; i2++) {
1438 for (dim_t i1=0; i1<m_N1; i1++) {
1439 for (dim_t i0=0; i0<m_N0; i0++) {
1440 m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] =
1441 (m_offset2+i2)*(m_gNE0+1)*(m_gNE1+1)
1442 +(m_offset1+i1)*(m_gNE0+1)
1443 +m_offset0+i0;
1444 }
1445 }
1446 }
1447
1448 // degrees of freedom
1449 #pragma omp for nowait
1450 for (dim_t k=0; k<numDOF; k++)
1451 m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k;
1452
1453 // elements
1454 #pragma omp for nowait
1455 for (dim_t i2=0; i2<m_NE2; i2++) {
1456 for (dim_t i1=0; i1<m_NE1; i1++) {
1457 for (dim_t i0=0; i0<m_NE0; i0++) {
1458 m_elementId[i0+i1*m_NE0+i2*m_NE0*m_NE1] =
1459 (m_offset2+i2)*m_gNE0*m_gNE1
1460 +(m_offset1+i1)*m_gNE0
1461 +m_offset0+i0;
1462 }
1463 }
1464 }
1465
1466 // face elements
1467 #pragma omp for
1468 for (dim_t k=0; k<getNumFaceElements(); k++)
1469 m_faceId[k]=k;
1470 } // end parallel section
1471
1472 m_nodeTags.assign(getNumNodes(), 0);
1473 updateTagsInUse(Nodes);
1474
1475 m_elementTags.assign(getNumElements(), 0);
1476 updateTagsInUse(Elements);
1477
1478 // generate face offset vector and set face tags
1479 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1480 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1481 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1482 m_faceOffset.assign(facesPerEdge.size(), -1);
1483 m_faceTags.clear();
1484 index_t offset=0;
1485 for (size_t i=0; i<facesPerEdge.size(); i++) {
1486 if (facesPerEdge[i]>0) {
1487 m_faceOffset[i]=offset;
1488 offset+=facesPerEdge[i];
1489 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1490 }
1491 }
1492 setTagMap("left", LEFT);
1493 setTagMap("right", RIGHT);
1494 setTagMap("bottom", BOTTOM);
1495 setTagMap("top", TOP);
1496 setTagMap("front", FRONT);
1497 setTagMap("back", BACK);
1498 updateTagsInUse(FaceElements);
1499 }
1500
1501 //private
1502 void Brick::createPattern()
1503 {
1504 const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1505 const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1506 const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1507 const index_t left = (m_offset0==0 ? 0 : 1);
1508 const index_t bottom = (m_offset1==0 ? 0 : 1);
1509 const index_t front = (m_offset2==0 ? 0 : 1);
1510
1511 // populate node->DOF mapping with own degrees of freedom.
1512 // The rest is assigned in the loop further down
1513 m_dofMap.assign(getNumNodes(), 0);
1514 #pragma omp parallel for
1515 for (index_t i=front; i<m_N2; i++) {
1516 for (index_t j=bottom; j<m_N1; j++) {
1517 for (index_t k=left; k<m_N0; k++) {
1518 m_dofMap[i*m_N0*m_N1+j*m_N0+k]=(i-front)*nDOF0*nDOF1+(j-bottom)*nDOF0+k-left;
1519 }
1520 }
1521 }
1522
1523 // build list of shared components and neighbours by looping through
1524 // all potential neighbouring ranks and checking if positions are
1525 // within bounds
1526 const dim_t numDOF=nDOF0*nDOF1*nDOF2;
1527 vector<IndexVector> colIndices(numDOF); // for the couple blocks
1528 RankVector neighbour;
1529 IndexVector offsetInShared(1,0);
1530 IndexVector sendShared, recvShared;
1531 int numShared=0;
1532 const int x=m_mpiInfo->rank%m_NX;
1533 const int y=m_mpiInfo->rank%(m_NX*m_NY)/m_NX;
1534 const int z=m_mpiInfo->rank/(m_NX*m_NY);
1535 for (int i2=-1; i2<2; i2++) {
1536 for (int i1=-1; i1<2; i1++) {
1537 for (int i0=-1; i0<2; i0++) {
1538 // skip this rank
1539 if (i0==0 && i1==0 && i2==0)
1540 continue;
1541 // location of neighbour rank
1542 const int nx=x+i0;
1543 const int ny=y+i1;
1544 const int nz=z+i2;
1545 if (nx>=0 && ny>=0 && nz>=0 && nx<m_NX && ny<m_NY && nz<m_NZ) {
1546 neighbour.push_back(nz*m_NX*m_NY+ny*m_NX+nx);
1547 if (i0==0 && i1==0) {
1548 // sharing front or back plane
1549 offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF1);
1550 for (dim_t i=0; i<nDOF1; i++) {
1551 const int firstDOF=(i2==-1 ? i*nDOF0
1552 : i*nDOF0 + nDOF0*nDOF1*(nDOF2-1));
1553 const int firstNode=(i2==-1 ? left+(i+bottom)*m_N0
1554 : left+(i+bottom)*m_N0+m_N0*m_N1*(m_N2-1));
1555 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1556 sendShared.push_back(firstDOF+j);
1557 recvShared.push_back(numDOF+numShared);
1558 if (j>0) {
1559 if (i>0)
1560 colIndices[firstDOF+j-1-nDOF0].push_back(numShared);
1561 colIndices[firstDOF+j-1].push_back(numShared);
1562 if (i<nDOF1-1)
1563 colIndices[firstDOF+j-1+nDOF0].push_back(numShared);
1564 }
1565 if (i>0)
1566 colIndices[firstDOF+j-nDOF0].push_back(numShared);
1567 colIndices[firstDOF+j].push_back(numShared);
1568 if (i<nDOF1-1)
1569 colIndices[firstDOF+j+nDOF0].push_back(numShared);
1570 if (j<nDOF0-1) {
1571 if (i>0)
1572 colIndices[firstDOF+j+1-nDOF0].push_back(numShared);
1573 colIndices[firstDOF+j+1].push_back(numShared);
1574 if (i<nDOF1-1)
1575 colIndices[firstDOF+j+1+nDOF0].push_back(numShared);
1576 }
1577 m_dofMap[firstNode+j]=numDOF+numShared;
1578 }
1579 }
1580 } else if (i0==0 && i2==0) {
1581 // sharing top or bottom plane
1582 offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF2);
1583 for (dim_t i=0; i<nDOF2; i++) {
1584 const int firstDOF=(i1==-1 ? i*nDOF0*nDOF1
1585 : nDOF0*((i+1)*nDOF1-1));
1586 const int firstNode=(i1==-1 ?
1587 left+(i+front)*m_N0*m_N1
1588 : left+m_N0*((i+1+front)*m_N1-1));
1589 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1590 sendShared.push_back(firstDOF+j);
1591 recvShared.push_back(numDOF+numShared);
1592 if (j>0) {
1593 if (i>0)
1594 colIndices[firstDOF+j-1-nDOF0*nDOF1].push_back(numShared);
1595 colIndices[firstDOF+j-1].push_back(numShared);
1596 if (i<nDOF2-1)
1597 colIndices[firstDOF+j-1+nDOF0*nDOF1].push_back(numShared);
1598 }
1599 if (i>0)
1600 colIndices[firstDOF+j-nDOF0*nDOF1].push_back(numShared);
1601 colIndices[firstDOF+j].push_back(numShared);
1602 if (i<nDOF2-1)
1603 colIndices[firstDOF+j+nDOF0*nDOF1].push_back(numShared);
1604 if (j<nDOF0-1) {
1605 if (i>0)
1606 colIndices[firstDOF+j+1-nDOF0*nDOF1].push_back(numShared);
1607 colIndices[firstDOF+j+1].push_back(numShared);
1608 if (i<nDOF2-1)
1609 colIndices[firstDOF+j+1+nDOF0*nDOF1].push_back(numShared);
1610 }
1611 m_dofMap[firstNode+j]=numDOF+numShared;
1612 }
1613 }
1614 } else if (i1==0 && i2==0) {
1615 // sharing left or right plane
1616 offsetInShared.push_back(offsetInShared.back()+nDOF1*nDOF2);
1617 for (dim_t i=0; i<nDOF2; i++) {
1618 const int firstDOF=(i0==-1 ? i*nDOF0*nDOF1
1619 : nDOF0*(1+i*nDOF1)-1);
1620 const int firstNode=(i0==-1 ?
1621 (bottom+(i+front)*m_N1)*m_N0
1622 : (bottom+1+(i+front)*m_N1)*m_N0-1);
1623 for (dim_t j=0; j<nDOF1; j++, numShared++) {
1624 sendShared.push_back(firstDOF+j*nDOF0);
1625 recvShared.push_back(numDOF+numShared);
1626 if (j>0) {
1627 if (i>0)
1628 colIndices[firstDOF+(j-1)*nDOF0-nDOF0*nDOF1].push_back(numShared);
1629 colIndices[firstDOF+(j-1)*nDOF0].push_back(numShared);
1630 if (i<nDOF2-1)
1631 colIndices[firstDOF+(j-1)*nDOF0+nDOF0*nDOF1].push_back(numShared);
1632 }
1633 if (i>0)
1634 colIndices[firstDOF+j*nDOF0-nDOF0*nDOF1].push_back(numShared);
1635 colIndices[firstDOF+j*nDOF0].push_back(numShared);
1636 if (i<nDOF2-1)
1637 colIndices[firstDOF+j*nDOF0+nDOF0*nDOF1].push_back(numShared);
1638 if (j<nDOF1-1) {
1639 if (i>0)
1640 colIndices[firstDOF+(j+1)*nDOF0-nDOF0*nDOF1].push_back(numShared);
1641 colIndices[firstDOF+(j+1)*nDOF0].push_back(numShared);
1642 if (i<nDOF2-1)
1643 colIndices[firstDOF+(j+1)*nDOF0+nDOF0*nDOF1].push_back(numShared);
1644 }
1645 m_dofMap[firstNode+j*m_N0]=numDOF+numShared;
1646 }
1647 }
1648 } else if (i0==0) {
1649 // sharing an edge in x direction
1650 offsetInShared.push_back(offsetInShared.back()+nDOF0);
1651 const int firstDOF=(i1+1)/2*nDOF0*(nDOF1-1)
1652 +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
1653 const int firstNode=(i1+1)/2*m_N0*(m_N1-1)
1654 +(i2+1)/2*m_N0*m_N1*(m_N2-1);
1655 for (dim_t i=0; i<nDOF0; i++, numShared++) {
1656 sendShared.push_back(firstDOF+i);
1657 recvShared.push_back(numDOF+numShared);
1658 if (i>0)
1659 colIndices[firstDOF+i-1].push_back(numShared);
1660 colIndices[firstDOF+i].push_back(numShared);
1661 if (i<nDOF0-1)
1662 colIndices[firstDOF+i+1].push_back(numShared);
1663 m_dofMap[firstNode+i]=numDOF+numShared;
1664 }
1665 } else if (i1==0) {
1666 // sharing an edge in y direction
1667 offsetInShared.push_back(offsetInShared.back()+nDOF1);
1668 const int firstDOF=(i0+1)/2*(nDOF0-1)
1669 +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
1670 const int firstNode=(i0+1)/2*(m_N0-1)
1671 +(i2+1)/2*m_N0*m_N1*(m_N2-1);
1672 for (dim_t i=0; i<nDOF1; i++, numShared++) {
1673 sendShared.push_back(firstDOF+i*nDOF0);
1674 recvShared.push_back(numDOF+numShared);
1675 if (i>0)
1676 colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared);
1677 colIndices[firstDOF+i*nDOF0].push_back(numShared);
1678 if (i<nDOF1-1)
1679 colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared);
1680 m_dofMap[firstNode+i*m_N0]=numDOF+numShared;
1681 }
1682 } else if (i2==0) {
1683 // sharing an edge in z direction
1684 offsetInShared.push_back(offsetInShared.back()+nDOF2);
1685 const int firstDOF=(i0+1)/2*(nDOF0-1)
1686 +(i1+1)/2*nDOF0*(nDOF1-1);
1687 const int firstNode=(i0+1)/2*(m_N0-1)
1688 +(i1+1)/2*m_N0*(m_N1-1);
1689 for (dim_t i=0; i<nDOF2; i++, numShared++) {
1690 sendShared.push_back(firstDOF+i*nDOF0*nDOF1);
1691 recvShared.push_back(numDOF+numShared);
1692 if (i>0)
1693 colIndices[firstDOF+(i-1)*nDOF0*nDOF1].push_back(numShared);
1694 colIndices[firstDOF+i*nDOF0*nDOF1].push_back(numShared);
1695 if (i<nDOF2-1)
1696 colIndices[firstDOF+(i+1)*nDOF0*nDOF1].push_back(numShared);
1697 m_dofMap[firstNode+i*m_N0*m_N1]=numDOF+numShared;
1698 }
1699 } else {
1700 // sharing a node
1701 const int dof=(i0+1)/2*(nDOF0-1)
1702 +(i1+1)/2*nDOF0*(nDOF1-1)
1703 +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
1704 const int node=(i0+1)/2*(m_N0-1)
1705 +(i1+1)/2*m_N0*(m_N1-1)
1706 +(i2+1)/2*m_N0*m_N1*(m_N2-1);
1707 offsetInShared.push_back(offsetInShared.back()+1);
1708 sendShared.push_back(dof);
1709 recvShared.push_back(numDOF+numShared);
1710 colIndices[dof].push_back(numShared);
1711 m_dofMap[node]=numDOF+numShared;
1712 ++numShared;
1713 }
1714 }
1715 }
1716 }
1717 }
1718
1719 // create connector
1720 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
1721 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
1722 &offsetInShared[0], 1, 0, m_mpiInfo);
1723 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
1724 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
1725 &offsetInShared[0], 1, 0, m_mpiInfo);
1726 m_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
1727 Paso_SharedComponents_free(snd_shcomp);
1728 Paso_SharedComponents_free(rcv_shcomp);
1729
1730 // create main and couple blocks
1731 Paso_Pattern *mainPattern = createMainPattern();
1732 Paso_Pattern *colPattern, *rowPattern;
1733 createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern);
1734
1735 // allocate paso distribution
1736 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
1737 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
1738
1739 // finally create the system matrix
1740 m_pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT,
1741 distribution, distribution, mainPattern, colPattern, rowPattern,
1742 m_connector, m_connector);
1743
1744 Paso_Distribution_free(distribution);
1745
1746 // useful debug output
1747 /*
1748 cout << "--- rcv_shcomp ---" << endl;
1749 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
1750 for (size_t i=0; i<neighbour.size(); i++) {
1751 cout << "neighbor[" << i << "]=" << neighbour[i]
1752 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
1753 }
1754 for (size_t i=0; i<recvShared.size(); i++) {
1755 cout << "shared[" << i << "]=" << recvShared[i] << endl;
1756 }
1757 cout << "--- snd_shcomp ---" << endl;
1758 for (size_t i=0; i<sendShared.size(); i++) {
1759 cout << "shared[" << i << "]=" << sendShared[i] << endl;
1760 }
1761 cout << "--- dofMap ---" << endl;
1762 for (size_t i=0; i<m_dofMap.size(); i++) {
1763 cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl;
1764 }
1765 cout << "--- colIndices ---" << endl;
1766 for (size_t i=0; i<colIndices.size(); i++) {
1767 cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl;
1768 }
1769 */
1770
1771 /*
1772 cout << "--- main_pattern ---" << endl;
1773 cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl;
1774 for (size_t i=0; i<mainPattern->numOutput+1; i++) {
1775 cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl;
1776 }
1777 for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) {
1778 cout << "index[" << i << "]=" << mainPattern->index[i] << endl;
1779 }
1780 */
1781
1782 /*
1783 cout << "--- colCouple_pattern ---" << endl;
1784 cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl;
1785 for (size_t i=0; i<colPattern->numOutput+1; i++) {
1786 cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl;
1787 }
1788 for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) {
1789 cout << "index[" << i << "]=" << colPattern->index[i] << endl;
1790 }
1791 */
1792
1793 /*
1794 cout << "--- rowCouple_pattern ---" << endl;
1795 cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl;
1796 for (size_t i=0; i<rowPattern->numOutput+1; i++) {
1797 cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl;
1798 }
1799 for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) {
1800 cout << "index[" << i << "]=" << rowPattern->index[i] << endl;
1801 }
1802 */
1803
1804 Paso_Pattern_free(mainPattern);
1805 Paso_Pattern_free(colPattern);
1806 Paso_Pattern_free(rowPattern);
1807 }
1808
1809 //protected
1810 void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in,
1811 bool reduced) const
1812 {
1813 const dim_t numComp = in.getDataPointSize();
1814 if (reduced) {
1815 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1816 const double c0 = .125;
1817 #pragma omp parallel for
1818 for (index_t k2=0; k2 < m_NE2; ++k2) {
1819 for (index_t k1=0; k1 < m_NE1; ++k1) {
1820 for (index_t k0=0; k0 < m_NE0; ++k0) {
1821 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1822 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1823 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1824 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1825 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1826 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1827 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1828 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1829 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1830 for (index_t i=0; i < numComp; ++i) {
1831 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i] + f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1832 } /* end of component loop i */
1833 } /* end of k0 loop */
1834 } /* end of k1 loop */
1835 } /* end of k2 loop */
1836 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1837 } else {
1838 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1839 const double c0 = .0094373878376559314545;
1840 const double c1 = .035220810900864519624;
1841 const double c2 = .13144585576580214704;
1842 const double c3 = .49056261216234406855;
1843 #pragma omp parallel for
1844 for (index_t k2=0; k2 < m_NE2; ++k2) {
1845 for (index_t k1=0; k1 < m_NE1; ++k1) {
1846 for (index_t k0=0; k0 < m_NE0; ++k0) {
1847 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1848 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1849 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1850 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1851 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1852 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1853 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1854 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1855 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1856 for (index_t i=0; i < numComp; ++i) {
1857 o[INDEX2(i,numComp,0)] = f_000[i]*c3 + f_111[i]*c0 + c2*(f_001[i] + f_010[i] + f_100[i]) + c1*(f_011[i] + f_101[i] + f_110[i]);
1858 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_100[i]*c3 + c2*(f_000[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_111[i]);
1859 o[INDEX2(i,numComp,2)] = f_010[i]*c3 + f_101[i]*c0 + c2*(f_000[i] + f_011[i] + f_110[i]) + c1*(f_001[i] + f_100[i] + f_111[i]);
1860 o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_110[i]*c3 + c2*(f_010[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_101[i]);
1861 o[INDEX2(i,numComp,4)] = f_001[i]*c3 + f_110[i]*c0 + c2*(f_000[i] + f_011[i] + f_101[i]) + c1*(f_010[i] + f_100[i] + f_111[i]);
1862 o[INDEX2(i,numComp,5)] = f_010[i]*c0 + f_101[i]*c3 + c2*(f_001[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_110[i]);
1863 o[INDEX2(i,numComp,6)] = f_011[i]*c3 + f_100[i]*c0 + c2*(f_001[i] + f_010[i] + f_111[i]) + c1*(f_000[i] + f_101[i] + f_110[i]);
1864 o[INDEX2(i,numComp,7)] = f_000[i]*c0 + f_111[i]*c3 + c2*(f_011[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_100[i]);
1865 } /* end of component loop i */
1866 } /* end of k0 loop */
1867 } /* end of k1 loop */
1868 } /* end of k2 loop */
1869 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1870 }
1871 }
1872
1873 //protected
1874 void Brick::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1875 bool reduced) const
1876 {
1877 const dim_t numComp = in.getDataPointSize();
1878 if (reduced) {
1879 const double c0 = .25;
1880 #pragma omp parallel
1881 {
1882 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1883 if (m_faceOffset[0] > -1) {
1884 #pragma omp for nowait
1885 for (index_t k2=0; k2 < m_NE2; ++k2) {
1886 for (index_t k1=0; k1 < m_NE1; ++k1) {
1887 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1888 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1889 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1890 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1891 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1892 for (index_t i=0; i < numComp; ++i) {
1893 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]);
1894 } /* end of component loop i */
1895 } /* end of k1 loop */
1896 } /* end of k2 loop */
1897 } /* end of face 0 */
1898 if (m_faceOffset[1] > -1) {
1899 #pragma omp for nowait
1900 for (index_t k2=0; k2 < m_NE2; ++k2) {
1901 for (index_t k1=0; k1 < m_NE1; ++k1) {
1902 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,