/[escript]/trunk/ripley/src/Brick.cpp
ViewVC logotype

Contents of /trunk/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3760 - (show annotations)
Mon Jan 9 05:21:18 2012 UTC (7 years, 3 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Brick.cpp
File size: 383491 byte(s)
-implemented addPDEToRHS() and setToSize()
-added a few missing calls to requireWrite()
-added assemblePDESystem() to Rectangle but haven't tested yet

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Brick.h>
15 extern "C" {
16 #include "paso/SystemMatrix.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33 int d1, int d2) :
34 RipleyDomain(3),
35 m_gNE0(n0),
36 m_gNE1(n1),
37 m_gNE2(n2),
38 m_l0(l0),
39 m_l1(l1),
40 m_l2(l2),
41 m_NX(d0),
42 m_NY(d1),
43 m_NZ(d2)
44 {
45 // ensure number of subdivisions is valid and nodes can be distributed
46 // among number of ranks
47 if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48 throw RipleyException("Invalid number of spatial subdivisions");
49
50 if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0 || (n2+1)%m_NZ > 0)
51 throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension");
52
53 if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2) || (m_NZ > 1 && (n2+1)/m_NZ<2))
54 throw RipleyException("Too few elements for the number of ranks");
55
56 // local number of elements (including overlap)
57 m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0);
58 if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1)
59 m_NE0++;
60 m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1);
61 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX>0 && m_mpiInfo->rank%(m_NX*m_NY)/m_NX<m_NY-1)
62 m_NE1++;
63 m_NE2 = (m_NZ>1 ? (n2+1)/m_NZ : n2);
64 if (m_mpiInfo->rank/(m_NX*m_NY)>0 && m_mpiInfo->rank/(m_NX*m_NY)<m_NZ-1)
65 m_NE2++;
66
67 // local number of nodes
68 m_N0 = m_NE0+1;
69 m_N1 = m_NE1+1;
70 m_N2 = m_NE2+1;
71
72 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
73 m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX);
74 if (m_offset0 > 0)
75 m_offset0--;
76 m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
77 if (m_offset1 > 0)
78 m_offset1--;
79 m_offset2 = (n2+1)/m_NZ*(m_mpiInfo->rank/(m_NX*m_NY));
80 if (m_offset2 > 0)
81 m_offset2--;
82
83 populateSampleIds();
84 createPattern();
85 }
86
87
88 Brick::~Brick()
89 {
90 }
91
92 string Brick::getDescription() const
93 {
94 return "ripley::Brick";
95 }
96
97 bool Brick::operator==(const AbstractDomain& other) const
98 {
99 const Brick* o=dynamic_cast<const Brick*>(&other);
100 if (o) {
101 return (RipleyDomain::operator==(other) &&
102 m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 && m_gNE2==o->m_gNE2
103 && m_l0==o->m_l0 && m_l1==o->m_l1 && m_l2==o->m_l2
104 && m_NX==o->m_NX && m_NY==o->m_NY && m_NZ==o->m_NZ);
105 }
106
107 return false;
108 }
109
110 void Brick::dump(const string& fileName) const
111 {
112 #if USE_SILO
113 string fn(fileName);
114 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
115 fn+=".silo";
116 }
117
118 const int NUM_SILO_FILES = 1;
119 const char* blockDirFmt = "/block%04d";
120 int driver=DB_HDF5;
121 string siloPath;
122 DBfile* dbfile = NULL;
123
124 #ifdef ESYS_MPI
125 PMPIO_baton_t* baton = NULL;
126 #endif
127
128 if (m_mpiInfo->size > 1) {
129 #ifdef ESYS_MPI
130 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
131 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
132 PMPIO_DefaultClose, (void*)&driver);
133 // try the fallback driver in case of error
134 if (!baton && driver != DB_PDB) {
135 driver = DB_PDB;
136 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
137 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
138 PMPIO_DefaultClose, (void*)&driver);
139 }
140 if (baton) {
141 char str[64];
142 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
143 siloPath = str;
144 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
145 }
146 #endif
147 } else {
148 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
149 getDescription().c_str(), driver);
150 // try the fallback driver in case of error
151 if (!dbfile && driver != DB_PDB) {
152 driver = DB_PDB;
153 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
154 getDescription().c_str(), driver);
155 }
156 }
157
158 if (!dbfile)
159 throw RipleyException("dump: Could not create Silo file");
160
161 /*
162 if (driver==DB_HDF5) {
163 // gzip level 1 already provides good compression with minimal
164 // performance penalty. Some tests showed that gzip levels >3 performed
165 // rather badly on escript data both in terms of time and space
166 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
167 }
168 */
169
170 boost::scoped_ptr<double> x(new double[m_N0]);
171 boost::scoped_ptr<double> y(new double[m_N1]);
172 boost::scoped_ptr<double> z(new double[m_N2]);
173 double* coords[3] = { x.get(), y.get(), z.get() };
174 pair<double,double> xdx = getFirstCoordAndSpacing(0);
175 pair<double,double> ydy = getFirstCoordAndSpacing(1);
176 pair<double,double> zdz = getFirstCoordAndSpacing(2);
177 #pragma omp parallel
178 {
179 #pragma omp for
180 for (dim_t i0 = 0; i0 < m_N0; i0++) {
181 coords[0][i0]=xdx.first+i0*xdx.second;
182 }
183 #pragma omp for
184 for (dim_t i1 = 0; i1 < m_N1; i1++) {
185 coords[1][i1]=ydy.first+i1*ydy.second;
186 }
187 #pragma omp for
188 for (dim_t i2 = 0; i2 < m_N2; i2++) {
189 coords[2][i2]=zdz.first+i2*zdz.second;
190 }
191 }
192 IndexVector dims = getNumNodesPerDim();
193 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
194 DB_COLLINEAR, NULL);
195
196 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
197 NULL, 0, DB_INT, DB_NODECENT, NULL);
198
199 // write element ids
200 dims = getNumElementsPerDim();
201 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
202 &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
203
204 // rank 0 writes multimesh and multivar
205 if (m_mpiInfo->rank == 0) {
206 vector<string> tempstrings;
207 vector<char*> names;
208 for (dim_t i=0; i<m_mpiInfo->size; i++) {
209 stringstream path;
210 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
211 tempstrings.push_back(path.str());
212 names.push_back((char*)tempstrings.back().c_str());
213 }
214 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
215 DBSetDir(dbfile, "/");
216 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
217 &types[0], NULL);
218 tempstrings.clear();
219 names.clear();
220 for (dim_t i=0; i<m_mpiInfo->size; i++) {
221 stringstream path;
222 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
223 tempstrings.push_back(path.str());
224 names.push_back((char*)tempstrings.back().c_str());
225 }
226 types.assign(m_mpiInfo->size, DB_QUADVAR);
227 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
228 &types[0], NULL);
229 tempstrings.clear();
230 names.clear();
231 for (dim_t i=0; i<m_mpiInfo->size; i++) {
232 stringstream path;
233 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
234 tempstrings.push_back(path.str());
235 names.push_back((char*)tempstrings.back().c_str());
236 }
237 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
238 &types[0], NULL);
239 }
240
241 if (m_mpiInfo->size > 1) {
242 #ifdef ESYS_MPI
243 PMPIO_HandOffBaton(baton, dbfile);
244 PMPIO_Finish(baton);
245 #endif
246 } else {
247 DBClose(dbfile);
248 }
249
250 #else // USE_SILO
251 throw RipleyException("dump(): no Silo support");
252 #endif
253 }
254
255 const int* Brick::borrowSampleReferenceIDs(int fsType) const
256 {
257 switch (fsType) {
258 case Nodes:
259 case ReducedNodes: //FIXME: reduced
260 return &m_nodeId[0];
261 case DegreesOfFreedom:
262 case ReducedDegreesOfFreedom: //FIXME: reduced
263 return &m_dofId[0];
264 case Elements:
265 case ReducedElements:
266 return &m_elementId[0];
267 case FaceElements:
268 case ReducedFaceElements:
269 return &m_faceId[0];
270 default:
271 break;
272 }
273
274 stringstream msg;
275 msg << "borrowSampleReferenceIDs() not implemented for function space type "
276 << fsType;
277 throw RipleyException(msg.str());
278 }
279
280 bool Brick::ownSample(int fsType, index_t id) const
281 {
282 if (getMPISize()==1)
283 return true;
284
285 switch (fsType) {
286 case Nodes:
287 case ReducedNodes: //FIXME: reduced
288 return (m_dofMap[id] < getNumDOF());
289 case DegreesOfFreedom:
290 case ReducedDegreesOfFreedom:
291 return true;
292 case Elements:
293 case ReducedElements:
294 {
295 // check ownership of element's _last_ node
296 const index_t x=id%m_NE0 + 1;
297 const index_t y=id%(m_NE0*m_NE1)/m_NE0 + 1;
298 const index_t z=id/(m_NE0*m_NE1) + 1;
299 return (m_dofMap[x + m_N0*y +m_N0*m_N1*z] < getNumDOF());
300 }
301 case FaceElements:
302 case ReducedFaceElements:
303 {
304 // check ownership of face element's last node
305 const IndexVector faces = getNumFacesPerBoundary();
306 dim_t n=0;
307 for (size_t i=0; i<faces.size(); i++) {
308 n+=faces[i];
309 if (id<n) {
310 const index_t j=id-n+faces[i];
311 if (i>=4) { // front or back
312 const index_t first=(i==4 ? 0 : m_N0*m_N1*(m_N2-1));
313 return (m_dofMap[first+j%m_NE0+1+(j/m_NE0+1)*m_N0] < getNumDOF());
314 } else if (i>=2) { // bottom or top
315 const index_t first=(i==2 ? 0 : m_N0*(m_N1-1));
316 return (m_dofMap[first+j%m_NE0+1+(j/m_NE0+1)*m_N0*m_N1] < getNumDOF());
317 } else { // left or right
318 const index_t first=(i==0 ? 0 : m_N0-1);
319 return (m_dofMap[first+(j%m_NE1+1)*m_N0+(j/m_NE1+1)*m_N0*m_N1] < getNumDOF());
320 }
321 }
322 }
323 return false;
324 }
325 default:
326 break;
327 }
328
329 stringstream msg;
330 msg << "ownSample() not implemented for "
331 << functionSpaceTypeAsString(fsType);
332 throw RipleyException(msg.str());
333 }
334
335 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
336 {
337 escript::Data& in = *const_cast<escript::Data*>(&cIn);
338 const dim_t numComp = in.getDataPointSize();
339 const double h0 = m_l0/m_gNE0;
340 const double h1 = m_l1/m_gNE1;
341 const double h2 = m_l1/m_gNE2;
342 const double C0 = .044658198738520451079;
343 const double C1 = .16666666666666666667;
344 const double C2 = .21132486540518711775;
345 const double C3 = .25;
346 const double C4 = .5;
347 const double C5 = .62200846792814621559;
348 const double C6 = .78867513459481288225;
349
350 if (out.getFunctionSpace().getTypeCode() == Elements) {
351 out.requireWrite();
352 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
353 #pragma omp parallel for
354 for (index_t k2=0; k2 < m_NE2; ++k2) {
355 for (index_t k1=0; k1 < m_NE1; ++k1) {
356 for (index_t k0=0; k0 < m_NE0; ++k0) {
357 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
358 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
359 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
360 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
361 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
362 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
363 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
364 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
365 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
366 for (index_t i=0; i < numComp; ++i) {
367 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
368 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
369 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
370 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
371 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
372 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
373 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
374 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
375 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
376 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
377 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
378 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
379 o[INDEX3(i,0,0,numComp,3)] = V0;
380 o[INDEX3(i,1,0,numComp,3)] = V4;
381 o[INDEX3(i,2,0,numComp,3)] = V8;
382 o[INDEX3(i,0,1,numComp,3)] = V0;
383 o[INDEX3(i,1,1,numComp,3)] = V5;
384 o[INDEX3(i,2,1,numComp,3)] = V9;
385 o[INDEX3(i,0,2,numComp,3)] = V1;
386 o[INDEX3(i,1,2,numComp,3)] = V4;
387 o[INDEX3(i,2,2,numComp,3)] = V10;
388 o[INDEX3(i,0,3,numComp,3)] = V1;
389 o[INDEX3(i,1,3,numComp,3)] = V5;
390 o[INDEX3(i,2,3,numComp,3)] = V11;
391 o[INDEX3(i,0,4,numComp,3)] = V2;
392 o[INDEX3(i,1,4,numComp,3)] = V6;
393 o[INDEX3(i,2,4,numComp,3)] = V8;
394 o[INDEX3(i,0,5,numComp,3)] = V2;
395 o[INDEX3(i,1,5,numComp,3)] = V7;
396 o[INDEX3(i,2,5,numComp,3)] = V9;
397 o[INDEX3(i,0,6,numComp,3)] = V3;
398 o[INDEX3(i,1,6,numComp,3)] = V6;
399 o[INDEX3(i,2,6,numComp,3)] = V10;
400 o[INDEX3(i,0,7,numComp,3)] = V3;
401 o[INDEX3(i,1,7,numComp,3)] = V7;
402 o[INDEX3(i,2,7,numComp,3)] = V11;
403 } /* end of component loop i */
404 } /* end of k0 loop */
405 } /* end of k1 loop */
406 } /* end of k2 loop */
407 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
408 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
409 out.requireWrite();
410 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
411 #pragma omp parallel for
412 for (index_t k2=0; k2 < m_NE2; ++k2) {
413 for (index_t k1=0; k1 < m_NE1; ++k1) {
414 for (index_t k0=0; k0 < m_NE0; ++k0) {
415 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
416 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
417 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
418 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
419 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
420 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
421 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
422 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
423 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
424 for (index_t i=0; i < numComp; ++i) {
425 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
426 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
427 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
428 } /* end of component loop i */
429 } /* end of k0 loop */
430 } /* end of k1 loop */
431 } /* end of k2 loop */
432 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
433 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
434 out.requireWrite();
435 /*** GENERATOR SNIP_GRAD_FACES TOP */
436 #pragma omp parallel
437 {
438 if (m_faceOffset[0] > -1) {
439 #pragma omp for nowait
440 for (index_t k2=0; k2 < m_NE2; ++k2) {
441 for (index_t k1=0; k1 < m_NE1; ++k1) {
442 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
443 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
444 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
445 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
446 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
447 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
448 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
449 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
450 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
451 for (index_t i=0; i < numComp; ++i) {
452 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
453 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
454 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
455 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
456 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
457 o[INDEX3(i,1,0,numComp,3)] = V0;
458 o[INDEX3(i,2,0,numComp,3)] = V2;
459 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
460 o[INDEX3(i,1,1,numComp,3)] = V0;
461 o[INDEX3(i,2,1,numComp,3)] = V3;
462 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
463 o[INDEX3(i,1,2,numComp,3)] = V1;
464 o[INDEX3(i,2,2,numComp,3)] = V2;
465 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
466 o[INDEX3(i,1,3,numComp,3)] = V1;
467 o[INDEX3(i,2,3,numComp,3)] = V3;
468 } /* end of component loop i */
469 } /* end of k1 loop */
470 } /* end of k2 loop */
471 } /* end of face 0 */
472 if (m_faceOffset[1] > -1) {
473 #pragma omp for nowait
474 for (index_t k2=0; k2 < m_NE2; ++k2) {
475 for (index_t k1=0; k1 < m_NE1; ++k1) {
476 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
477 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
478 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
479 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
480 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
481 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
482 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
483 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
484 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
485 for (index_t i=0; i < numComp; ++i) {
486 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
487 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
488 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
489 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
490 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
491 o[INDEX3(i,1,0,numComp,3)] = V0;
492 o[INDEX3(i,2,0,numComp,3)] = V2;
493 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
494 o[INDEX3(i,1,1,numComp,3)] = V0;
495 o[INDEX3(i,2,1,numComp,3)] = V3;
496 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
497 o[INDEX3(i,1,2,numComp,3)] = V1;
498 o[INDEX3(i,2,2,numComp,3)] = V2;
499 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
500 o[INDEX3(i,1,3,numComp,3)] = V1;
501 o[INDEX3(i,2,3,numComp,3)] = V3;
502 } /* end of component loop i */
503 } /* end of k1 loop */
504 } /* end of k2 loop */
505 } /* end of face 1 */
506 if (m_faceOffset[2] > -1) {
507 #pragma omp for nowait
508 for (index_t k2=0; k2 < m_NE2; ++k2) {
509 for (index_t k0=0; k0 < m_NE0; ++k0) {
510 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
511 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
512 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
513 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
514 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
515 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
516 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
517 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
518 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
519 for (index_t i=0; i < numComp; ++i) {
520 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
521 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
522 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
523 o[INDEX3(i,0,0,numComp,3)] = V0;
524 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
525 o[INDEX3(i,2,0,numComp,3)] = V1;
526 o[INDEX3(i,0,1,numComp,3)] = V0;
527 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
528 o[INDEX3(i,2,1,numComp,3)] = V2;
529 o[INDEX3(i,0,2,numComp,3)] = V0;
530 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
531 o[INDEX3(i,2,2,numComp,3)] = V1;
532 o[INDEX3(i,0,3,numComp,3)] = V0;
533 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
534 o[INDEX3(i,2,3,numComp,3)] = V2;
535 } /* end of component loop i */
536 } /* end of k0 loop */
537 } /* end of k2 loop */
538 } /* end of face 2 */
539 if (m_faceOffset[3] > -1) {
540 #pragma omp for nowait
541 for (index_t k2=0; k2 < m_NE2; ++k2) {
542 for (index_t k0=0; k0 < m_NE0; ++k0) {
543 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
544 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
545 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
546 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
547 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
548 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
549 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
550 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
551 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
552 for (index_t i=0; i < numComp; ++i) {
553 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
554 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
555 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
556 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
557 o[INDEX3(i,0,0,numComp,3)] = V0;
558 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
559 o[INDEX3(i,2,0,numComp,3)] = V2;
560 o[INDEX3(i,0,1,numComp,3)] = V0;
561 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
562 o[INDEX3(i,2,1,numComp,3)] = V3;
563 o[INDEX3(i,0,2,numComp,3)] = V1;
564 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
565 o[INDEX3(i,2,2,numComp,3)] = V2;
566 o[INDEX3(i,0,3,numComp,3)] = V1;
567 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
568 o[INDEX3(i,2,3,numComp,3)] = V3;
569 } /* end of component loop i */
570 } /* end of k0 loop */
571 } /* end of k2 loop */
572 } /* end of face 3 */
573 if (m_faceOffset[4] > -1) {
574 #pragma omp for nowait
575 for (index_t k1=0; k1 < m_NE1; ++k1) {
576 for (index_t k0=0; k0 < m_NE0; ++k0) {
577 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
578 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
579 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
580 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
581 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
582 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
583 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
584 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
585 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
586 for (index_t i=0; i < numComp; ++i) {
587 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
588 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
589 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
590 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
591 o[INDEX3(i,0,0,numComp,3)] = V0;
592 o[INDEX3(i,1,0,numComp,3)] = V2;
593 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
594 o[INDEX3(i,0,1,numComp,3)] = V0;
595 o[INDEX3(i,1,1,numComp,3)] = V3;
596 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
597 o[INDEX3(i,0,2,numComp,3)] = V1;
598 o[INDEX3(i,1,2,numComp,3)] = V2;
599 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
600 o[INDEX3(i,0,3,numComp,3)] = V1;
601 o[INDEX3(i,1,3,numComp,3)] = V3;
602 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
603 } /* end of component loop i */
604 } /* end of k0 loop */
605 } /* end of k1 loop */
606 } /* end of face 4 */
607 if (m_faceOffset[5] > -1) {
608 #pragma omp for nowait
609 for (index_t k1=0; k1 < m_NE1; ++k1) {
610 for (index_t k0=0; k0 < m_NE0; ++k0) {
611 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
612 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
613 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
614 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
615 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
616 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
617 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
618 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
619 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
620 for (index_t i=0; i < numComp; ++i) {
621 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
622 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
623 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
624 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
625 o[INDEX3(i,0,0,numComp,3)] = V0;
626 o[INDEX3(i,1,0,numComp,3)] = V2;
627 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
628 o[INDEX3(i,0,1,numComp,3)] = V0;
629 o[INDEX3(i,1,1,numComp,3)] = V3;
630 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
631 o[INDEX3(i,0,2,numComp,3)] = V1;
632 o[INDEX3(i,1,2,numComp,3)] = V2;
633 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
634 o[INDEX3(i,0,3,numComp,3)] = V1;
635 o[INDEX3(i,1,3,numComp,3)] = V3;
636 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
637 } /* end of component loop i */
638 } /* end of k0 loop */
639 } /* end of k1 loop */
640 } /* end of face 5 */
641 } // end of parallel section
642 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
643 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
644 out.requireWrite();
645 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
646 #pragma omp parallel
647 {
648 if (m_faceOffset[0] > -1) {
649 #pragma omp for nowait
650 for (index_t k2=0; k2 < m_NE2; ++k2) {
651 for (index_t k1=0; k1 < m_NE1; ++k1) {
652 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
653 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
654 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
655 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
656 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
657 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
658 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
659 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
660 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
661 for (index_t i=0; i < numComp; ++i) {
662 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
663 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
664 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
665 } /* end of component loop i */
666 } /* end of k1 loop */
667 } /* end of k2 loop */
668 } /* end of face 0 */
669 if (m_faceOffset[1] > -1) {
670 #pragma omp for nowait
671 for (index_t k2=0; k2 < m_NE2; ++k2) {
672 for (index_t k1=0; k1 < m_NE1; ++k1) {
673 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
674 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
675 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
676 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
677 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
678 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
679 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
680 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
681 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
682 for (index_t i=0; i < numComp; ++i) {
683 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
684 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
685 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
686 } /* end of component loop i */
687 } /* end of k1 loop */
688 } /* end of k2 loop */
689 } /* end of face 1 */
690 if (m_faceOffset[2] > -1) {
691 #pragma omp for nowait
692 for (index_t k2=0; k2 < m_NE2; ++k2) {
693 for (index_t k0=0; k0 < m_NE0; ++k0) {
694 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
695 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
696 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
697 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
698 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
699 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
700 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
701 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
702 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
703 for (index_t i=0; i < numComp; ++i) {
704 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
705 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
706 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
707 } /* end of component loop i */
708 } /* end of k0 loop */
709 } /* end of k2 loop */
710 } /* end of face 2 */
711 if (m_faceOffset[3] > -1) {
712 #pragma omp for nowait
713 for (index_t k2=0; k2 < m_NE2; ++k2) {
714 for (index_t k0=0; k0 < m_NE0; ++k0) {
715 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
716 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
717 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
718 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
719 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
720 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
721 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
722 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
723 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
724 for (index_t i=0; i < numComp; ++i) {
725 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
726 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
727 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
728 } /* end of component loop i */
729 } /* end of k0 loop */
730 } /* end of k2 loop */
731 } /* end of face 3 */
732 if (m_faceOffset[4] > -1) {
733 #pragma omp for nowait
734 for (index_t k1=0; k1 < m_NE1; ++k1) {
735 for (index_t k0=0; k0 < m_NE0; ++k0) {
736 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
737 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
738 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
739 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
740 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
741 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
742 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
743 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
744 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
745 for (index_t i=0; i < numComp; ++i) {
746 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
747 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
748 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
749 } /* end of component loop i */
750 } /* end of k0 loop */
751 } /* end of k1 loop */
752 } /* end of face 4 */
753 if (m_faceOffset[5] > -1) {
754 #pragma omp for nowait
755 for (index_t k1=0; k1 < m_NE1; ++k1) {
756 for (index_t k0=0; k0 < m_NE0; ++k0) {
757 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
758 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
759 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
760 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
761 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
762 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
763 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
764 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
765 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
766 for (index_t i=0; i < numComp; ++i) {
767 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
768 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
769 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
770 } /* end of component loop i */
771 } /* end of k0 loop */
772 } /* end of k1 loop */
773 } /* end of face 5 */
774 } // end of parallel section
775 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
776 } else {
777 stringstream msg;
778 msg << "setToGradient() not implemented for "
779 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
780 throw RipleyException(msg.str());
781 }
782 }
783
784 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
785 {
786 escript::Data& in = *const_cast<escript::Data*>(&arg);
787 const dim_t numComp = in.getDataPointSize();
788 const double h0 = m_l0/m_gNE0;
789 const double h1 = m_l1/m_gNE1;
790 const double h2 = m_l2/m_gNE2;
791 if (arg.getFunctionSpace().getTypeCode() == Elements) {
792 const double w_0 = h0*h1*h2/8.;
793 #pragma omp parallel
794 {
795 vector<double> int_local(numComp, 0);
796 #pragma omp for nowait
797 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
798 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
799 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
800 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
801 for (index_t i=0; i < numComp; ++i) {
802 const register double f_0 = f[INDEX2(i,0,numComp)];
803 const register double f_1 = f[INDEX2(i,1,numComp)];
804 const register double f_2 = f[INDEX2(i,2,numComp)];
805 const register double f_3 = f[INDEX2(i,3,numComp)];
806 const register double f_4 = f[INDEX2(i,4,numComp)];
807 const register double f_5 = f[INDEX2(i,5,numComp)];
808 const register double f_6 = f[INDEX2(i,6,numComp)];
809 const register double f_7 = f[INDEX2(i,7,numComp)];
810 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
811 } /* end of component loop i */
812 } /* end of k0 loop */
813 } /* end of k1 loop */
814 } /* end of k2 loop */
815
816 #pragma omp critical
817 for (index_t i=0; i<numComp; i++)
818 integrals[i]+=int_local[i];
819 } // end of parallel section
820 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
821 const double w_0 = h0*h1*h2;
822 #pragma omp parallel
823 {
824 vector<double> int_local(numComp, 0);
825 #pragma omp for nowait
826 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
827 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
828 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
829 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
830 for (index_t i=0; i < numComp; ++i) {
831 int_local[i]+=f[i]*w_0;
832 } /* end of component loop i */
833 } /* end of k0 loop */
834 } /* end of k1 loop */
835 } /* end of k2 loop */
836
837 #pragma omp critical
838 for (index_t i=0; i<numComp; i++)
839 integrals[i]+=int_local[i];
840 } // end of parallel section
841 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
842 const double w_0 = h1*h2/4.;
843 const double w_1 = h0*h2/4.;
844 const double w_2 = h0*h1/4.;
845 #pragma omp parallel
846 {
847 vector<double> int_local(numComp, 0);
848 if (m_faceOffset[0] > -1) {
849 #pragma omp for nowait
850 for (index_t k2=0; k2 < m_NE2; ++k2) {
851 for (index_t k1=0; k1 < m_NE1; ++k1) {
852 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
853 for (index_t i=0; i < numComp; ++i) {
854 const register double f_0 = f[INDEX2(i,0,numComp)];
855 const register double f_1 = f[INDEX2(i,1,numComp)];
856 const register double f_2 = f[INDEX2(i,2,numComp)];
857 const register double f_3 = f[INDEX2(i,3,numComp)];
858 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
859 } /* end of component loop i */
860 } /* end of k1 loop */
861 } /* end of k2 loop */
862 }
863
864 if (m_faceOffset[1] > -1) {
865 #pragma omp for nowait
866 for (index_t k2=0; k2 < m_NE2; ++k2) {
867 for (index_t k1=0; k1 < m_NE1; ++k1) {
868 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
869 for (index_t i=0; i < numComp; ++i) {
870 const register double f_0 = f[INDEX2(i,0,numComp)];
871 const register double f_1 = f[INDEX2(i,1,numComp)];
872 const register double f_2 = f[INDEX2(i,2,numComp)];
873 const register double f_3 = f[INDEX2(i,3,numComp)];
874 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
875 } /* end of component loop i */
876 } /* end of k1 loop */
877 } /* end of k2 loop */
878 }
879
880 if (m_faceOffset[2] > -1) {
881 #pragma omp for nowait
882 for (index_t k2=0; k2 < m_NE2; ++k2) {
883 for (index_t k0=0; k0 < m_NE0; ++k0) {
884 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
885 for (index_t i=0; i < numComp; ++i) {
886 const register double f_0 = f[INDEX2(i,0,numComp)];
887 const register double f_1 = f[INDEX2(i,1,numComp)];
888 const register double f_2 = f[INDEX2(i,2,numComp)];
889 const register double f_3 = f[INDEX2(i,3,numComp)];
890 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
891 } /* end of component loop i */
892 } /* end of k1 loop */
893 } /* end of k2 loop */
894 }
895
896 if (m_faceOffset[3] > -1) {
897 #pragma omp for nowait
898 for (index_t k2=0; k2 < m_NE2; ++k2) {
899 for (index_t k0=0; k0 < m_NE0; ++k0) {
900 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
901 for (index_t i=0; i < numComp; ++i) {
902 const register double f_0 = f[INDEX2(i,0,numComp)];
903 const register double f_1 = f[INDEX2(i,1,numComp)];
904 const register double f_2 = f[INDEX2(i,2,numComp)];
905 const register double f_3 = f[INDEX2(i,3,numComp)];
906 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
907 } /* end of component loop i */
908 } /* end of k1 loop */
909 } /* end of k2 loop */
910 }
911
912 if (m_faceOffset[4] > -1) {
913 #pragma omp for nowait
914 for (index_t k1=0; k1 < m_NE1; ++k1) {
915 for (index_t k0=0; k0 < m_NE0; ++k0) {
916 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
917 for (index_t i=0; i < numComp; ++i) {
918 const register double f_0 = f[INDEX2(i,0,numComp)];
919 const register double f_1 = f[INDEX2(i,1,numComp)];
920 const register double f_2 = f[INDEX2(i,2,numComp)];
921 const register double f_3 = f[INDEX2(i,3,numComp)];
922 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
923 } /* end of component loop i */
924 } /* end of k1 loop */
925 } /* end of k2 loop */
926 }
927
928 if (m_faceOffset[5] > -1) {
929 #pragma omp for nowait
930 for (index_t k1=0; k1 < m_NE1; ++k1) {
931 for (index_t k0=0; k0 < m_NE0; ++k0) {
932 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
933 for (index_t i=0; i < numComp; ++i) {
934 const register double f_0 = f[INDEX2(i,0,numComp)];
935 const register double f_1 = f[INDEX2(i,1,numComp)];
936 const register double f_2 = f[INDEX2(i,2,numComp)];
937 const register double f_3 = f[INDEX2(i,3,numComp)];
938 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
939 } /* end of component loop i */
940 } /* end of k1 loop */
941 } /* end of k2 loop */
942 }
943
944 #pragma omp critical
945 for (index_t i=0; i<numComp; i++)
946 integrals[i]+=int_local[i];
947 } // end of parallel section
948
949 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
950 const double w_0 = h1*h2;
951 const double w_1 = h0*h2;
952 const double w_2 = h0*h1;
953 #pragma omp parallel
954 {
955 vector<double> int_local(numComp, 0);
956 if (m_faceOffset[0] > -1) {
957 #pragma omp for nowait
958 for (index_t k2=0; k2 < m_NE2; ++k2) {
959 for (index_t k1=0; k1 < m_NE1; ++k1) {
960 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
961 for (index_t i=0; i < numComp; ++i) {
962 int_local[i]+=f[i]*w_0;
963 } /* end of component loop i */
964 } /* end of k1 loop */
965 } /* end of k2 loop */
966 }
967
968 if (m_faceOffset[1] > -1) {
969 #pragma omp for nowait
970 for (index_t k2=0; k2 < m_NE2; ++k2) {
971 for (index_t k1=0; k1 < m_NE1; ++k1) {
972 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
973 for (index_t i=0; i < numComp; ++i) {
974 int_local[i]+=f[i]*w_0;
975 } /* end of component loop i */
976 } /* end of k1 loop */
977 } /* end of k2 loop */
978 }
979
980 if (m_faceOffset[2] > -1) {
981 #pragma omp for nowait
982 for (index_t k2=0; k2 < m_NE2; ++k2) {
983 for (index_t k0=0; k0 < m_NE0; ++k0) {
984 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
985 for (index_t i=0; i < numComp; ++i) {
986 int_local[i]+=f[i]*w_1;
987 } /* end of component loop i */
988 } /* end of k1 loop */
989 } /* end of k2 loop */
990 }
991
992 if (m_faceOffset[3] > -1) {
993 #pragma omp for nowait
994 for (index_t k2=0; k2 < m_NE2; ++k2) {
995 for (index_t k0=0; k0 < m_NE0; ++k0) {
996 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
997 for (index_t i=0; i < numComp; ++i) {
998 int_local[i]+=f[i]*w_1;
999 } /* end of component loop i */
1000 } /* end of k1 loop */
1001 } /* end of k2 loop */
1002 }
1003
1004 if (m_faceOffset[4] > -1) {
1005 #pragma omp for nowait
1006 for (index_t k1=0; k1 < m_NE1; ++k1) {
1007 for (index_t k0=0; k0 < m_NE0; ++k0) {
1008 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1009 for (index_t i=0; i < numComp; ++i) {
1010 int_local[i]+=f[i]*w_2;
1011 } /* end of component loop i */
1012 } /* end of k1 loop */
1013 } /* end of k2 loop */
1014 }
1015
1016 if (m_faceOffset[5] > -1) {
1017 #pragma omp for nowait
1018 for (index_t k1=0; k1 < m_NE1; ++k1) {
1019 for (index_t k0=0; k0 < m_NE0; ++k0) {
1020 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1021 for (index_t i=0; i < numComp; ++i) {
1022 int_local[i]+=f[i]*w_2;
1023 } /* end of component loop i */
1024 } /* end of k1 loop */
1025 } /* end of k2 loop */
1026 }
1027
1028 #pragma omp critical
1029 for (index_t i=0; i<numComp; i++)
1030 integrals[i]+=int_local[i];
1031 } // end of parallel section
1032
1033 } else {
1034 stringstream msg;
1035 msg << "setToIntegrals() not implemented for "
1036 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
1037 throw RipleyException(msg.str());
1038 }
1039 }
1040
1041 void Brick::setToNormal(escript::Data& out) const
1042 {
1043 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1044 out.requireWrite();
1045 #pragma omp parallel
1046 {
1047 if (m_faceOffset[0] > -1) {
1048 #pragma omp for nowait
1049 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1050 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1051 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1052 // set vector at four quadrature points
1053 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1054 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1055 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1056 *o++ = -1.; *o++ = 0.; *o = 0.;
1057 }
1058 }
1059 }
1060
1061 if (m_faceOffset[1] > -1) {
1062 #pragma omp for nowait
1063 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1064 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1065 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1066 // set vector at four quadrature points
1067 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1068 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1069 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1070 *o++ = 1.; *o++ = 0.; *o = 0.;
1071 }
1072 }
1073 }
1074
1075 if (m_faceOffset[2] > -1) {
1076 #pragma omp for nowait
1077 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1078 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1079 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1080 // set vector at four quadrature points
1081 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1082 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1083 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1084 *o++ = 0.; *o++ = -1.; *o = 0.;
1085 }
1086 }
1087 }
1088
1089 if (m_faceOffset[3] > -1) {
1090 #pragma omp for nowait
1091 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1092 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1093 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1094 // set vector at four quadrature points
1095 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1096 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1097 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1098 *o++ = 0.; *o++ = 1.; *o = 0.;
1099 }
1100 }
1101 }
1102
1103 if (m_faceOffset[4] > -1) {
1104 #pragma omp for nowait
1105 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1106 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1107 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1108 // set vector at four quadrature points
1109 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1110 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1111 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1112 *o++ = 0.; *o++ = 0.; *o = -1.;
1113 }
1114 }
1115 }
1116
1117 if (m_faceOffset[5] > -1) {
1118 #pragma omp for nowait
1119 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1120 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1121 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1122 // set vector at four quadrature points
1123 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1124 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1125 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1126 *o++ = 0.; *o++ = 0.; *o = 1.;
1127 }
1128 }
1129 }
1130 } // end of parallel section
1131 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1132 out.requireWrite();
1133 #pragma omp parallel
1134 {
1135 if (m_faceOffset[0] > -1) {
1136 #pragma omp for nowait
1137 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1138 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1139 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1140 *o++ = -1.;
1141 *o++ = 0.;
1142 *o = 0.;
1143 }
1144 }
1145 }
1146
1147 if (m_faceOffset[1] > -1) {
1148 #pragma omp for nowait
1149 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1150 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1151 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1152 *o++ = 1.;
1153 *o++ = 0.;
1154 *o = 0.;
1155 }
1156 }
1157 }
1158
1159 if (m_faceOffset[2] > -1) {
1160 #pragma omp for nowait
1161 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1162 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1163 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1164 *o++ = 0.;
1165 *o++ = -1.;
1166 *o = 0.;
1167 }
1168 }
1169 }
1170
1171 if (m_faceOffset[3] > -1) {
1172 #pragma omp for nowait
1173 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1174 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1175 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1176 *o++ = 0.;
1177 *o++ = 1.;
1178 *o = 0.;
1179 }
1180 }
1181 }
1182
1183 if (m_faceOffset[4] > -1) {
1184 #pragma omp for nowait
1185 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1186 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1187 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1188 *o++ = 0.;
1189 *o++ = 0.;
1190 *o = -1.;
1191 }
1192 }
1193 }
1194
1195 if (m_faceOffset[5] > -1) {
1196 #pragma omp for nowait
1197 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1198 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1199 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1200 *o++ = 0.;
1201 *o++ = 0.;
1202 *o = 1.;
1203 }
1204 }
1205 }
1206 } // end of parallel section
1207
1208 } else {
1209 stringstream msg;
1210 msg << "setToNormal() not implemented for "
1211 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1212 throw RipleyException(msg.str());
1213 }
1214 }
1215
1216 void Brick::setToSize(escript::Data& out) const
1217 {
1218 if (out.getFunctionSpace().getTypeCode() == Elements
1219 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1220 out.requireWrite();
1221 const dim_t numQuad=out.getNumDataPointsPerSample();
1222 const double xSize=getFirstCoordAndSpacing(0).second;
1223 const double ySize=getFirstCoordAndSpacing(1).second;
1224 const double zSize=getFirstCoordAndSpacing(2).second;
1225 const double size=min(min(xSize,ySize),zSize);
1226 #pragma omp parallel for
1227 for (index_t k = 0; k < getNumElements(); ++k) {
1228 double* o = out.getSampleDataRW(k);
1229 fill(o, o+numQuad, size);
1230 }
1231 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1232 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1233 out.requireWrite();
1234 const dim_t numQuad=out.getNumDataPointsPerSample();
1235 const double xSize=getFirstCoordAndSpacing(0).second;
1236 const double ySize=getFirstCoordAndSpacing(1).second;
1237 const double zSize=getFirstCoordAndSpacing(2).second;
1238 #pragma omp parallel
1239 {
1240 if (m_faceOffset[0] > -1) {
1241 const double size=min(ySize,zSize);
1242 #pragma omp for nowait
1243 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1244 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1245 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1246 fill(o, o+numQuad, size);
1247 }
1248 }
1249 }
1250
1251 if (m_faceOffset[1] > -1) {
1252 const double size=min(ySize,zSize);
1253 #pragma omp for nowait
1254 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1255 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1256 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1257 fill(o, o+numQuad, size);
1258 }
1259 }
1260 }
1261
1262 if (m_faceOffset[2] > -1) {
1263 const double size=min(xSize,zSize);
1264 #pragma omp for nowait
1265 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1266 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1267 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1268 fill(o, o+numQuad, size);
1269 }
1270 }
1271 }
1272
1273 if (m_faceOffset[3] > -1) {
1274 const double size=min(xSize,zSize);
1275 #pragma omp for nowait
1276 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1277 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1278 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1279 fill(o, o+numQuad, size);
1280 }
1281 }
1282 }
1283
1284 if (m_faceOffset[4] > -1) {
1285 const double size=min(xSize,ySize);
1286 #pragma omp for nowait
1287 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1288 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1289 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1290 fill(o, o+numQuad, size);
1291 }
1292 }
1293 }
1294
1295 if (m_faceOffset[5] > -1) {
1296 const double size=min(xSize,ySize);
1297 #pragma omp for nowait
1298 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1299 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1300 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1301 fill(o, o+numQuad, size);
1302 }
1303 }
1304 }
1305 } // end of parallel section
1306
1307 } else {
1308 stringstream msg;
1309 msg << "setToSize() not implemented for "
1310 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1311 throw RipleyException(msg.str());
1312 }
1313 }
1314
1315 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1316 bool reducedColOrder) const
1317 {
1318 if (reducedRowOrder || reducedColOrder)
1319 throw RipleyException("getPattern() not implemented for reduced order");
1320
1321 return m_pattern;
1322 }
1323
1324 void Brick::Print_Mesh_Info(const bool full) const
1325 {
1326 RipleyDomain::Print_Mesh_Info(full);
1327 if (full) {
1328 cout << " Id Coordinates" << endl;
1329 cout.precision(15);
1330 cout.setf(ios::scientific, ios::floatfield);
1331 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1332 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1333 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1334 for (index_t i=0; i < getNumNodes(); i++) {
1335 cout << " " << setw(5) << m_nodeId[i]
1336 << " " << xdx.first+(i%m_N0)*xdx.second
1337 << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1338 << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1339 }
1340 }
1341 }
1342
1343 IndexVector Brick::getNumNodesPerDim() const
1344 {
1345 IndexVector ret;
1346 ret.push_back(m_N0);
1347 ret.push_back(m_N1);
1348 ret.push_back(m_N2);
1349 return ret;
1350 }
1351
1352 IndexVector Brick::getNumElementsPerDim() const
1353 {
1354 IndexVector ret;
1355 ret.push_back(m_NE0);
1356 ret.push_back(m_NE1);
1357 ret.push_back(m_NE2);
1358 return ret;
1359 }
1360
1361 IndexVector Brick::getNumFacesPerBoundary() const
1362 {
1363 IndexVector ret(6, 0);
1364 //left
1365 if (m_offset0==0)
1366 ret[0]=m_NE1*m_NE2;
1367 //right
1368 if (m_mpiInfo->rank%m_NX==m_NX-1)
1369 ret[1]=m_NE1*m_NE2;
1370 //bottom
1371 if (m_offset1==0)
1372 ret[2]=m_NE0*m_NE2;
1373 //top
1374 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1375 ret[3]=m_NE0*m_NE2;
1376 //front
1377 if (m_offset2==0)
1378 ret[4]=m_NE0*m_NE1;
1379 //back
1380 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1381 ret[5]=m_NE0*m_NE1;
1382 return ret;
1383 }
1384
1385 pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1386 {
1387 if (dim==0)
1388 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1389 else if (dim==1)
1390 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1391 else if (dim==2)
1392 return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1393
1394 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1395 }
1396
1397 //protected
1398 dim_t Brick::getNumDOF() const
1399 {
1400 return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY*(m_gNE2+1)/m_NZ;
1401 }
1402
1403 //protected
1404 dim_t Brick::getNumFaceElements() const
1405 {
1406 const IndexVector faces = getNumFacesPerBoundary();
1407 dim_t n=0;
1408 for (size_t i=0; i<faces.size(); i++)
1409 n+=faces[i];
1410 return n;
1411 }
1412
1413 //protected
1414 void Brick::assembleCoordinates(escript::Data& arg) const
1415 {
1416 escriptDataC x = arg.getDataC();
1417 int numDim = m_numDim;
1418 if (!isDataPointShapeEqual(&x, 1, &numDim))
1419 throw RipleyException("setToX: Invalid Data object shape");
1420 if (!numSamplesEqual(&x, 1, getNumNodes()))
1421 throw RipleyException("setToX: Illegal number of samples in Data object");
1422
1423 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1424 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1425 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1426 arg.requireWrite();
1427 #pragma omp parallel for
1428 for (dim_t i2 = 0; i2 < m_N2; i2++) {
1429 for (dim_t i1 = 0; i1 < m_N1; i1++) {
1430 for (dim_t i0 = 0; i0 < m_N0; i0++) {
1431 double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1432 point[0] = xdx.first+i0*xdx.second;
1433 point[1] = ydy.first+i1*ydy.second;
1434 point[2] = zdz.first+i2*zdz.second;
1435 }
1436 }
1437 }
1438 }
1439
1440 //protected
1441 dim_t Brick::insertNeighbourNodes(IndexVector& index, index_t node) const
1442 {
1443 const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1444 const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1445 const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1446 const int x=node%nDOF0;
1447 const int y=node%(nDOF0*nDOF1)/nDOF0;
1448 const int z=node/(nDOF0*nDOF1);
1449 int num=0;
1450 // loop through potential neighbours and add to index if positions are
1451 // within bounds
1452 for (int i2=-1; i2<2; i2++) {
1453 for (int i1=-1; i1<2; i1++) {
1454 for (int i0=-1; i0<2; i0++) {
1455 // skip node itself
1456 if (i0==0 && i1==0 && i2==0)
1457 continue;
1458 // location of neighbour node
1459 const int nx=x+i0;
1460 const int ny=y+i1;
1461 const int nz=z+i2;
1462 if (nx>=0 && ny>=0 && nz>=0
1463 && nx<nDOF0 && ny<nDOF1 && nz<nDOF2) {
1464 index.push_back(nz*nDOF0*nDOF1+ny*nDOF0+nx);
1465 num++;
1466 }
1467 }
1468 }
1469 }
1470
1471 return num;
1472 }
1473
1474 //protected
1475 void Brick::nodesToDOF(escript::Data& out, escript::Data& in) const
1476 {
1477 const dim_t numComp = in.getDataPointSize();
1478 out.requireWrite();
1479
1480 const index_t left = (m_offset0==0 ? 0 : 1);
1481 const index_t bottom = (m_offset1==0 ? 0 : 1);
1482 const index_t front = (m_offset2==0 ? 0 : 1);
1483 const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1484 const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1485 const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1486 #pragma omp parallel for
1487 for (index_t i=0; i<nDOF2; i++) {
1488 for (index_t j=0; j<nDOF1; j++) {
1489 for (index_t k=0; k<nDOF0; k++) {
1490 const index_t n=k+left+(j+bottom)*m_N0+(i+front)*m_N0*m_N1;
1491 const double* src=in.getSampleDataRO(n);
1492 copy(src, src+numComp, out.getSampleDataRW(k+j*nDOF0+i*nDOF0*nDOF1));
1493 }
1494 }
1495 }
1496 }
1497
1498 //protected
1499 void Brick::dofToNodes(escript::Data& out, escript::Data& in) const
1500 {
1501 const dim_t numComp = in.getDataPointSize();
1502 Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp);
1503 in.requireWrite();
1504 Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0));
1505
1506 const dim_t numDOF = getNumDOF();
1507 out.requireWrite();
1508 const double* buffer = Paso_Coupler_finishCollect(coupler);
1509
1510 #pragma omp parallel for
1511 for (index_t i=0; i<getNumNodes(); i++) {
1512 const double* src=(m_dofMap[i]<numDOF ?
1513 in.getSampleDataRO(m_dofMap[i])
1514 : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1515 copy(src, src+numComp, out.getSampleDataRW(i));
1516 }
1517 }
1518
1519 //private
1520 void Brick::populateSampleIds()
1521 {
1522 // identifiers are ordered from left to right, bottom to top, front to back
1523 // globally
1524
1525 // build node distribution vector first.
1526 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1527 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1528 const dim_t numDOF=getNumDOF();
1529 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1530 m_nodeDistribution[k]=k*numDOF;
1531 }
1532 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1533 m_nodeId.resize(getNumNodes());
1534 m_dofId.resize(numDOF);
1535 m_elementId.resize(getNumElements());
1536 m_faceId.resize(getNumFaceElements());
1537
1538 #pragma omp parallel
1539 {
1540 #pragma omp for nowait
1541 // nodes
1542 for (dim_t i2=0; i2<m_N2; i2++) {
1543 for (dim_t i1=0; i1<m_N1; i1++) {
1544 for (dim_t i0=0; i0<m_N0; i0++) {
1545 m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] =
1546 (m_offset2+i2)*(m_gNE0+1)*(m_gNE1+1)
1547 +(m_offset1+i1)*(m_gNE0+1)
1548 +m_offset0+i0;
1549 }
1550 }
1551 }
1552
1553 // degrees of freedom
1554 #pragma omp for nowait
1555 for (dim_t k=0; k<numDOF; k++)
1556 m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k;
1557
1558 // elements
1559 #pragma omp for nowait
1560 for (dim_t i2=0; i2<m_NE2; i2++) {
1561 for (dim_t i1=0; i1<m_NE1; i1++) {
1562 for (dim_t i0=0; i0<m_NE0; i0++) {
1563 m_elementId[i0+i1*m_NE0+i2*m_NE0*m_NE1] =
1564 (m_offset2+i2)*m_gNE0*m_gNE1
1565 +(m_offset1+i1)*m_gNE0
1566 +m_offset0+i0;
1567 }
1568 }
1569 }
1570
1571 // face elements
1572 #pragma omp for
1573 for (dim_t k=0; k<getNumFaceElements(); k++)
1574 m_faceId[k]=k;
1575 } // end parallel section
1576
1577 m_nodeTags.assign(getNumNodes(), 0);
1578 updateTagsInUse(Nodes);
1579
1580 m_elementTags.assign(getNumElements(), 0);
1581 updateTagsInUse(Elements);
1582
1583 // generate face offset vector and set face tags
1584 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1585 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1586 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1587 m_faceOffset.assign(facesPerEdge.size(), -1);
1588 m_faceTags.clear();
1589 index_t offset=0;
1590 for (size_t i=0; i<facesPerEdge.size(); i++) {
1591 if (facesPerEdge[i]>0) {
1592 m_faceOffset[i]=offset;
1593 offset+=facesPerEdge[i];
1594 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1595 }
1596 }
1597 setTagMap("left", LEFT);
1598 setTagMap("right", RIGHT);
1599 setTagMap("bottom", BOTTOM);
1600 setTagMap("top", TOP);
1601 setTagMap("front", FRONT);
1602 setTagMap("back", BACK);
1603 updateTagsInUse(FaceElements);
1604 }
1605
1606 //private
1607 void Brick::createPattern()
1608 {
1609 const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1610 const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1611 const dim_t nDOF2 = (m_gNE2+1)/m_NZ;
1612 const index_t left = (m_offset0==0 ? 0 : 1);
1613 const index_t bottom = (m_offset1==0 ? 0 : 1);
1614 const index_t front = (m_offset2==0 ? 0 : 1);
1615
1616 // populate node->DOF mapping with own degrees of freedom.
1617 // The rest is assigned in the loop further down
1618 m_dofMap.assign(getNumNodes(), 0);
1619 #pragma omp parallel for
1620 for (index_t i=front; i<m_N2; i++) {
1621 for (index_t j=bottom; j<m_N1; j++) {
1622 for (index_t k=left; k<m_N0; k++) {
1623 m_dofMap[i*m_N0*m_N1+j*m_N0+k]=(i-front)*nDOF0*nDOF1+(j-bottom)*nDOF0+k-left;
1624 }
1625 }
1626 }
1627
1628 // build list of shared components and neighbours by looping through
1629 // all potential neighbouring ranks and checking if positions are
1630 // within bounds
1631 const dim_t numDOF=nDOF0*nDOF1*nDOF2;
1632 vector<IndexVector> colIndices(numDOF); // for the couple blocks
1633 RankVector neighbour;
1634 IndexVector offsetInShared(1,0);
1635 IndexVector sendShared, recvShared;
1636 int numShared=0;
1637 const int x=m_mpiInfo->rank%m_NX;
1638 const int y=m_mpiInfo->rank%(m_NX*m_NY)/m_NX;
1639 const int z=m_mpiInfo->rank/(m_NX*m_NY);
1640 for (int i2=-1; i2<2; i2++) {
1641 for (int i1=-1; i1<2; i1++) {
1642 for (int i0=-1; i0<2; i0++) {
1643 // skip this rank
1644 if (i0==0 && i1==0 && i2==0)
1645 continue;
1646 // location of neighbour rank
1647 const int nx=x+i0;
1648 const int ny=y+i1;
1649 const int nz=z+i2;
1650 if (nx>=0 && ny>=0 && nz>=0 && nx<m_NX && ny<m_NY && nz<m_NZ) {
1651 neighbour.push_back(nz*m_NX*m_NY+ny*m_NX+nx);
1652 if (i0==0 && i1==0) {
1653 // sharing front or back plane
1654 offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF1);
1655 for (dim_t i=0; i<nDOF1; i++) {
1656 const int firstDOF=(i2==-1 ? i*nDOF0
1657 : i*nDOF0 + nDOF0*nDOF1*(nDOF2-1));
1658 const int firstNode=(i2==-1 ? left+(i+bottom)*m_N0
1659 : left+(i+bottom)*m_N0+m_N0*m_N1*(m_N2-1));
1660 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1661 sendShared.push_back(firstDOF+j);
1662 recvShared.push_back(numDOF+numShared);
1663 if (j>0) {
1664 if (i>0)
1665 colIndices[firstDOF+j-1-nDOF0].push_back(numShared);
1666 colIndices[firstDOF+j-1].push_back(numShared);
1667 if (i<nDOF1-1)
1668 colIndices[firstDOF+j-1+nDOF0].push_back(numShared);
1669 }
1670 if (i>0)
1671 colIndices[firstDOF+j-nDOF0].push_back(numShared);
1672 colIndices[firstDOF+j].push_back(numShared);
1673 if (i<nDOF1-1)
1674 colIndices[firstDOF+j+nDOF0].push_back(numShared);
1675 if (j<nDOF0-1) {
1676 if (i>0)
1677 colIndices[firstDOF+j+1-nDOF0].push_back(numShared);
1678 colIndices[firstDOF+j+1].push_back(numShared);
1679 if (i<nDOF1-1)
1680 colIndices[firstDOF+j+1+nDOF0].push_back(numShared);
1681 }
1682 m_dofMap[firstNode+j]=numDOF+numShared;
1683 }
1684 }
1685 } else if (i0==0 && i2==0) {
1686 // sharing top or bottom plane
1687 offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF2);
1688 for (dim_t i=0; i<nDOF2; i++) {
1689 const int firstDOF=(i1==-1 ? i*nDOF0*nDOF1
1690 : nDOF0*((i+1)*nDOF1-1));
1691 const int firstNode=(i1==-1 ?
1692 left+(i+front)*m_N0*m_N1
1693 : left+m_N0*((i+1+front)*m_N1-1));
1694 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1695 sendShared.push_back(firstDOF+j);
1696 recvShared.push_back(numDOF+numShared);
1697 if (j>0) {
1698 if (i>0)
1699 colIndices[firstDOF+j-1-nDOF0*nDOF1].push_back(numShared);
1700 colIndices[firstDOF+j-1].push_back(numShared);
1701 if (i<nDOF2-1)
1702 colIndices[firstDOF+j-1+nDOF0*nDOF1].push_back(numShared);
1703 }
1704 if (i>0)
1705 colIndices[firstDOF+j-nDOF0*nDOF1].push_back(numShared);
1706 colIndices[firstDOF+j].push_back(numShared);
1707 if (i<nDOF2-1)
1708 colIndices[firstDOF+j+nDOF0*nDOF1].push_back(numShared);
1709 if (j<nDOF0-1) {
1710 if (i>0)
1711 colIndices[firstDOF+j+1-nDOF0*nDOF1].push_back(numShared);
1712 colIndices[firstDOF+j+1].push_back(numShared);
1713 if (i<nDOF2-1)
1714 colIndices[firstDOF+j+1+nDOF0*nDOF1].push_back(numShared);
1715 }
1716 m_dofMap[firstNode+j]=numDOF+numShared;
1717 }
1718 }
1719 } else if (i1==0 && i2==0) {
1720 // sharing left or right plane
1721 offsetInShared.push_back(offsetInShared.back()+nDOF1*nDOF2);
1722 for (dim_t i=0; i<nDOF2; i++) {
1723 const int firstDOF=(i0==-1 ? i*nDOF0*nDOF1
1724 : nDOF0*(1+i*nDOF1)-1);
1725 const int firstNode=(i0==-1 ?
1726 (bottom+(i+front)*m_N1)*m_N0
1727 : (bottom+1+(i+front)*m_N1)*m_N0-1);
1728 for (dim_t j=0; j<nDOF1; j++, numShared++) {
1729 sendShared.push_back(firstDOF+j*nDOF0);
1730 recvShared.push_back(numDOF+numShared);
1731 if (j>0) {
1732 if (i>0)
1733 colIndices[firstDOF+(j-1)*nDOF0-nDOF0*nDOF1].push_back(numShared);
1734 colIndices[firstDOF+(j-1)*nDOF0].push_back(numShared);
1735 if (i<nDOF2-1)
1736 colIndices[firstDOF+(j-1)*nDOF0+nDOF0*nDOF1].push_back(numShared);
1737 }
1738 if (i>0)
1739 colIndices[firstDOF+j*nDOF0-nDOF0*nDOF1].push_back(numShared);
1740 colIndices[firstDOF+j*nDOF0].push_back(numShared);
1741 if (i<nDOF2-1)
1742 colIndices[firstDOF+j*nDOF0+nDOF0*nDOF1].push_back(numShared);
1743 if (j<nDOF1-1) {
1744 if (i>0)
1745 colIndices[firstDOF+(j+1)*nDOF0-nDOF0*nDOF1].push_back(numShared);
1746 colIndices[firstDOF+(j+1)*nDOF0].push_back(numShared);
1747 if (i<nDOF2-1)
1748 colIndices[firstDOF+(j+1)*nDOF0+nDOF0*nDOF1].push_back(numShared);
1749 }
1750 m_dofMap[firstNode+j*m_N0]=numDOF+numShared;
1751 }
1752 }
1753 } else if (i0==0) {
1754 // sharing an edge in x direction
1755 offsetInShared.push_back(offsetInShared.back()+nDOF0);
1756 const int firstDOF=(i1+1)/2*nDOF0*(nDOF1-1)
1757 +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
1758 const int firstNode=(i1+1)/2*m_N0*(m_N1-1)
1759 +(i2+1)/2*m_N0*m_N1*(m_N2-1);
1760 for (dim_t i=0; i<nDOF0; i++, numShared++) {
1761 sendShared.push_back(firstDOF+i);
1762 recvShared.push_back(numDOF+numShared);
1763 if (i>0)
1764 colIndices[firstDOF+i-1].push_back(numShared);
1765 colIndices[firstDOF+i].push_back(numShared);
1766 if (i<nDOF0-1)
1767 colIndices[firstDOF+i+1].push_back(numShared);
1768 m_dofMap[firstNode+i]=numDOF+numShared;
1769 }
1770 } else if (i1==0) {
1771 // sharing an edge in y direction
1772 offsetInShared.push_back(offsetInShared.back()+nDOF1);
1773 const int firstDOF=(i0+1)/2*(nDOF0-1)
1774 +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
1775 const int firstNode=(i0+1)/2*(m_N0-1)
1776 +(i2+1)/2*m_N0*m_N1*(m_N2-1);
1777 for (dim_t i=0; i<nDOF1; i++, numShared++) {
1778 sendShared.push_back(firstDOF+i*nDOF0);
1779 recvShared.push_back(numDOF+numShared);
1780 if (i>0)
1781 colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared);
1782 colIndices[firstDOF+i*nDOF0].push_back(numShared);
1783 if (i<nDOF1-1)
1784 colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared);
1785 m_dofMap[firstNode+i*m_N0]=numDOF+numShared;
1786 }
1787 } else if (i2==0) {
1788 // sharing an edge in z direction
1789 offsetInShared.push_back(offsetInShared.back()+nDOF2);
1790 const int firstDOF=(i0+1)/2*(nDOF0-1)
1791 +(i1+1)/2*nDOF0*(nDOF1-1);
1792 const int firstNode=(i0+1)/2*(m_N0-1)
1793 +(i1+1)/2*m_N0*(m_N1-1);
1794 for (dim_t i=0; i<nDOF2; i++, numShared++) {
1795 sendShared.push_back(firstDOF+i*nDOF0*nDOF1);
1796 recvShared.push_back(numDOF+numShared);
1797 if (i>0)
1798 colIndices[firstDOF+(i-1)*nDOF0*nDOF1].push_back(numShared);
1799 colIndices[firstDOF+i*nDOF0*nDOF1].push_back(numShared);
1800 if (i<nDOF2-1)
1801 colIndices[firstDOF+(i+1)*nDOF0*nDOF1].push_back(numShared);
1802 m_dofMap[firstNode+i*m_N0*m_N1]=numDOF+numShared;
1803 }
1804 } else {
1805 // sharing a node
1806 const int dof=(i0+1)/2*(nDOF0-1)
1807 +(i1+1)/2*nDOF0*(nDOF1-1)
1808 +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
1809 const int node=(i0+1)/2*(m_N0-1)
1810 +(i1+1)/2*m_N0*(m_N1-1)
1811 +(i2+1)/2*m_N0*m_N1*(m_N2-1);
1812 offsetInShared.push_back(offsetInShared.back()+1);
1813 sendShared.push_back(dof);
1814 recvShared.push_back(numDOF+numShared);
1815 colIndices[dof].push_back(numShared);
1816 m_dofMap[node]=numDOF+numShared;
1817 ++numShared;
1818 }
1819 }
1820 }
1821 }
1822 }
1823
1824 // create connector
1825 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
1826 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
1827 &offsetInShared[0], 1, 0, m_mpiInfo);
1828 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
1829 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
1830 &offsetInShared[0], 1, 0, m_mpiInfo);
1831 m_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
1832 Paso_SharedComponents_free(snd_shcomp);
1833 Paso_SharedComponents_free(rcv_shcomp);
1834
1835 // create main and couple blocks
1836 Paso_Pattern *mainPattern = createMainPattern();
1837 Paso_Pattern *colPattern, *rowPattern;
1838 createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern);
1839
1840 // allocate paso distribution
1841 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
1842 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
1843
1844 // finally create the system matrix
1845 m_pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT,
1846 distribution, distribution, mainPattern, colPattern, rowPattern,
1847 m_connector, m_connector);
1848
1849 Paso_Distribution_free(distribution);
1850
1851 // useful debug output
1852 /*
1853 cout << "--- rcv_shcomp ---" << endl;
1854 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
1855 for (size_t i=0; i<neighbour.size(); i++) {
1856 cout << "neighbor[" << i << "]=" << neighbour[i]
1857 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
1858 }
1859 for (size_t i=0; i<recvShared.size(); i++) {
1860 cout << "shared[" << i << "]=" << recvShared[i] << endl;
1861 }
1862 cout << "--- snd_shcomp ---" << endl;
1863 for (size_t i=0; i<sendShared.size(); i++) {
1864 cout << "shared[" << i << "]=" << sendShared[i] << endl;
1865 }
1866 cout << "--- dofMap ---" << endl;
1867 for (size_t i=0; i<m_dofMap.size(); i++) {
1868 cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl;
1869 }
1870 cout << "--- colIndices ---" << endl;
1871 for (size_t i=0; i<colIndices.size(); i++) {
1872 cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl;
1873 }
1874 */
1875
1876 /*
1877 cout << "--- main_pattern ---" << endl;
1878 cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl;
1879 for (size_t i=0; i<mainPattern->numOutput+1; i++) {
1880 cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl;
1881 }
1882 for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) {
1883 cout << "index[" << i << "]=" << mainPattern->index[i] << endl;
1884 }
1885 */
1886
1887 /*
1888 cout << "--- colCouple_pattern ---" << endl;
1889 cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl;
1890 for (size_t i=0; i<colPattern->numOutput+1; i++) {
1891 cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl;
1892 }
1893 for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) {
1894 cout << "index[" << i << "]=" << colPattern->index[i] << endl;
1895 }
1896 */
1897
1898 /*
1899 cout << "--- rowCouple_pattern ---" << endl;
1900 cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl;
1901 for (size_t i=0; i<rowPattern->numOutput+1; i++) {
1902 cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl;
1903 }
1904 for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) {
1905 cout << "index[" << i << "]=" << rowPattern->index[i] << endl;
1906 }
1907 */
1908
1909 Paso_Pattern_free(mainPattern);
1910 Paso_Pattern_free(colPattern);
1911 Paso_Pattern_free(rowPattern);
1912 }
1913
1914 //protected
1915 void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in,
1916 bool reduced) const
1917 {
1918 const dim_t numComp = in.getDataPointSize();
1919 if (reduced) {
1920 out.requireWrite();
1921 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1922 const double c0 = .125;
1923 #pragma omp parallel for
1924 for (index_t k2=0; k2 < m_NE2; ++k2) {
1925 for (index_t k1=0; k1 < m_NE1; ++k1) {
1926 for (index_t k0=0; k0 < m_NE0; ++k0) {
1927 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1928 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1929 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1930 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1931 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1932 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1933 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1934 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1935 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1936 for (index_t i=0; i < numComp; ++i) {
1937 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i] + f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1938 } /* end of component loop i */
1939 } /* end of k0 loop */
1940 } /* end of k1 loop */
1941 } /* end of k2 loop */
1942 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1943 } else {
1944 out.requireWrite();
1945 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1946 const double c0 = .0094373878376559314545;
1947 const double c1 = .035220810900864519624;
1948 const double c2 = .13144585576580214704;
1949 const double c3 = .49056261216234406855;
1950 #pragma omp parallel for
1951 for (index_t k2=0; k2 < m_NE2; ++k2) {
1952 for (index_t k1=0; k1 < m_NE1; ++k1) {
1953 for (index_t k0=0; k0 < m_NE0; ++k0) {
1954 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1955 const register double* f_001 = in.getSampleDa