/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3754 - (show annotations)
Wed Jan 4 07:07:37 2012 UTC (7 years, 2 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Brick.cpp
File size: 379414 byte(s)
checkpoint. 3D single PDEs almost working

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Brick.h>
15 extern "C" {
16 #include "paso/SystemMatrix.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33 int d1, int d2) :
34 RipleyDomain(3),
35 m_gNE0(n0),
36 m_gNE1(n1),
37 m_gNE2(n2),
38 m_l0(l0),
39 m_l1(l1),
40 m_l2(l2),
41 m_NX(d0),
42 m_NY(d1),
43 m_NZ(d2)
44 {
45 // ensure number of subdivisions is valid and nodes can be distributed
46 // among number of ranks
47 if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48 throw RipleyException("Invalid number of spatial subdivisions");
49
50 if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0 || (n2+1)%m_NZ > 0)
51 throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension");
52
53 if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2) || (m_NZ > 1 && (n2+1)/m_NZ<2))
54 throw RipleyException("Too few elements for the number of ranks");
55
56 // local number of elements (including overlap)
57 m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0);
58 if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1)
59 m_NE0++;
60 m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1);
61 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX>0 && m_mpiInfo->rank%(m_NX*m_NY)/m_NX<m_NY-1)
62 m_NE1++;
63 m_NE2 = (m_NZ>1 ? (n2+1)/m_NZ : n2);
64 if (m_mpiInfo->rank/(m_NX*m_NY)>0 && m_mpiInfo->rank/(m_NX*m_NY)<m_NZ-1)
65 m_NE2++;
66
67 // local number of nodes
68 m_N0 = m_NE0+1;
69 m_N1 = m_NE1+1;
70 m_N2 = m_NE2+1;
71
72 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
73 m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX);
74 if (m_offset0 > 0)
75 m_offset0--;
76 m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
77 if (m_offset1 > 0)
78 m_offset1--;
79 m_offset2 = (n2+1)/m_NZ*(m_mpiInfo->rank/(m_NX*m_NY));
80 if (m_offset2 > 0)
81 m_offset2--;
82
83 populateSampleIds();
84 }
85
86
87 Brick::~Brick()
88 {
89 }
90
91 string Brick::getDescription() const
92 {
93 return "ripley::Brick";
94 }
95
96 bool Brick::operator==(const AbstractDomain& other) const
97 {
98 const Brick* o=dynamic_cast<const Brick*>(&other);
99 if (o) {
100 return (RipleyDomain::operator==(other) &&
101 m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 && m_gNE2==o->m_gNE2
102 && m_l0==o->m_l0 && m_l1==o->m_l1 && m_l2==o->m_l2
103 && m_NX==o->m_NX && m_NY==o->m_NY && m_NZ==o->m_NZ);
104 }
105
106 return false;
107 }
108
109 void Brick::dump(const string& fileName) const
110 {
111 #if USE_SILO
112 string fn(fileName);
113 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
114 fn+=".silo";
115 }
116
117 const int NUM_SILO_FILES = 1;
118 const char* blockDirFmt = "/block%04d";
119 int driver=DB_HDF5;
120 string siloPath;
121 DBfile* dbfile = NULL;
122
123 #ifdef ESYS_MPI
124 PMPIO_baton_t* baton = NULL;
125 #endif
126
127 if (m_mpiInfo->size > 1) {
128 #ifdef ESYS_MPI
129 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
130 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
131 PMPIO_DefaultClose, (void*)&driver);
132 // try the fallback driver in case of error
133 if (!baton && driver != DB_PDB) {
134 driver = DB_PDB;
135 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
136 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
137 PMPIO_DefaultClose, (void*)&driver);
138 }
139 if (baton) {
140 char str[64];
141 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
142 siloPath = str;
143 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
144 }
145 #endif
146 } else {
147 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
148 getDescription().c_str(), driver);
149 // try the fallback driver in case of error
150 if (!dbfile && driver != DB_PDB) {
151 driver = DB_PDB;
152 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
153 getDescription().c_str(), driver);
154 }
155 }
156
157 if (!dbfile)
158 throw RipleyException("dump: Could not create Silo file");
159
160 /*
161 if (driver==DB_HDF5) {
162 // gzip level 1 already provides good compression with minimal
163 // performance penalty. Some tests showed that gzip levels >3 performed
164 // rather badly on escript data both in terms of time and space
165 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
166 }
167 */
168
169 boost::scoped_ptr<double> x(new double[m_N0]);
170 boost::scoped_ptr<double> y(new double[m_N1]);
171 boost::scoped_ptr<double> z(new double[m_N2]);
172 double* coords[3] = { x.get(), y.get(), z.get() };
173 pair<double,double> xdx = getFirstCoordAndSpacing(0);
174 pair<double,double> ydy = getFirstCoordAndSpacing(1);
175 pair<double,double> zdz = getFirstCoordAndSpacing(2);
176 #pragma omp parallel
177 {
178 #pragma omp for
179 for (dim_t i0 = 0; i0 < m_N0; i0++) {
180 coords[0][i0]=xdx.first+i0*xdx.second;
181 }
182 #pragma omp for
183 for (dim_t i1 = 0; i1 < m_N1; i1++) {
184 coords[1][i1]=ydy.first+i1*ydy.second;
185 }
186 #pragma omp for
187 for (dim_t i2 = 0; i2 < m_N2; i2++) {
188 coords[2][i2]=zdz.first+i2*zdz.second;
189 }
190 }
191 IndexVector dims = getNumNodesPerDim();
192 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
193 DB_COLLINEAR, NULL);
194
195 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
196 NULL, 0, DB_INT, DB_NODECENT, NULL);
197
198 // write element ids
199 dims = getNumElementsPerDim();
200 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
201 &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
202
203 // rank 0 writes multimesh and multivar
204 if (m_mpiInfo->rank == 0) {
205 vector<string> tempstrings;
206 vector<char*> names;
207 for (dim_t i=0; i<m_mpiInfo->size; i++) {
208 stringstream path;
209 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
210 tempstrings.push_back(path.str());
211 names.push_back((char*)tempstrings.back().c_str());
212 }
213 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
214 DBSetDir(dbfile, "/");
215 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
216 &types[0], NULL);
217 tempstrings.clear();
218 names.clear();
219 for (dim_t i=0; i<m_mpiInfo->size; i++) {
220 stringstream path;
221 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
222 tempstrings.push_back(path.str());
223 names.push_back((char*)tempstrings.back().c_str());
224 }
225 types.assign(m_mpiInfo->size, DB_QUADVAR);
226 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
227 &types[0], NULL);
228 tempstrings.clear();
229 names.clear();
230 for (dim_t i=0; i<m_mpiInfo->size; i++) {
231 stringstream path;
232 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
233 tempstrings.push_back(path.str());
234 names.push_back((char*)tempstrings.back().c_str());
235 }
236 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
237 &types[0], NULL);
238 }
239
240 if (m_mpiInfo->size > 1) {
241 #ifdef ESYS_MPI
242 PMPIO_HandOffBaton(baton, dbfile);
243 PMPIO_Finish(baton);
244 #endif
245 } else {
246 DBClose(dbfile);
247 }
248
249 #else // USE_SILO
250 throw RipleyException("dump(): no Silo support");
251 #endif
252 }
253
254 const int* Brick::borrowSampleReferenceIDs(int fsType) const
255 {
256 switch (fsType) {
257 case Nodes:
258 case ReducedNodes: //FIXME: reduced
259 return &m_nodeId[0];
260 case DegreesOfFreedom: //FIXME
261 case ReducedDegreesOfFreedom: //FIXME
262 return &m_dofId[0];
263 case Elements:
264 case ReducedElements:
265 return &m_elementId[0];
266 case ReducedFaceElements:
267 return &m_faceId[0];
268 default:
269 break;
270 }
271
272 stringstream msg;
273 msg << "borrowSampleReferenceIDs() not implemented for function space type "
274 << fsType;
275 throw RipleyException(msg.str());
276 }
277
278 bool Brick::ownSample(int fsCode, index_t id) const
279 {
280 #ifdef ESYS_MPI
281 if (fsCode == Nodes) {
282 const index_t left = (m_offset0==0 ? 0 : 1);
283 const index_t bottom = (m_offset1==0 ? 0 : 1);
284 const index_t front = (m_offset2==0 ? 0 : 1);
285 const index_t right = (m_mpiInfo->rank%m_NX==m_NX-1 ? m_N0 : m_N0-1);
286 const index_t top = (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1 ? m_N1 : m_N1-1);
287 const index_t back = (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1 ? m_N2 : m_N2-1);
288 const index_t x=id%m_N0;
289 const index_t y=id%(m_N0*m_N1)/m_N0;
290 const index_t z=id/(m_N0*m_N1);
291 return (x>=left && x<right && y>=bottom && y<top && z>=front && z<back);
292
293 } else {
294 stringstream msg;
295 msg << "ownSample() not implemented for "
296 << functionSpaceTypeAsString(fsCode);
297 throw RipleyException(msg.str());
298 }
299 #else
300 return true;
301 #endif
302 }
303
304 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
305 {
306 escript::Data& in = *const_cast<escript::Data*>(&cIn);
307 const dim_t numComp = in.getDataPointSize();
308 const double h0 = m_l0/m_gNE0;
309 const double h1 = m_l1/m_gNE1;
310 const double h2 = m_l1/m_gNE2;
311 const double C0 = .044658198738520451079;
312 const double C1 = .16666666666666666667;
313 const double C2 = .21132486540518711775;
314 const double C3 = .25;
315 const double C4 = .5;
316 const double C5 = .62200846792814621559;
317 const double C6 = .78867513459481288225;
318
319 if (out.getFunctionSpace().getTypeCode() == Elements) {
320 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
321 #pragma omp parallel for
322 for (index_t k2=0; k2 < m_NE2; ++k2) {
323 for (index_t k1=0; k1 < m_NE1; ++k1) {
324 for (index_t k0=0; k0 < m_NE0; ++k0) {
325 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
326 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
327 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
328 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
329 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
330 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
331 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
332 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
333 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
334 for (index_t i=0; i < numComp; ++i) {
335 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
336 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
337 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
338 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
339 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
340 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
341 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
342 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
343 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
344 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
345 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
346 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
347 o[INDEX3(i,0,0,numComp,3)] = V0;
348 o[INDEX3(i,1,0,numComp,3)] = V4;
349 o[INDEX3(i,2,0,numComp,3)] = V8;
350 o[INDEX3(i,0,1,numComp,3)] = V0;
351 o[INDEX3(i,1,1,numComp,3)] = V5;
352 o[INDEX3(i,2,1,numComp,3)] = V9;
353 o[INDEX3(i,0,2,numComp,3)] = V1;
354 o[INDEX3(i,1,2,numComp,3)] = V4;
355 o[INDEX3(i,2,2,numComp,3)] = V10;
356 o[INDEX3(i,0,3,numComp,3)] = V1;
357 o[INDEX3(i,1,3,numComp,3)] = V5;
358 o[INDEX3(i,2,3,numComp,3)] = V11;
359 o[INDEX3(i,0,4,numComp,3)] = V2;
360 o[INDEX3(i,1,4,numComp,3)] = V6;
361 o[INDEX3(i,2,4,numComp,3)] = V8;
362 o[INDEX3(i,0,5,numComp,3)] = V2;
363 o[INDEX3(i,1,5,numComp,3)] = V7;
364 o[INDEX3(i,2,5,numComp,3)] = V9;
365 o[INDEX3(i,0,6,numComp,3)] = V3;
366 o[INDEX3(i,1,6,numComp,3)] = V6;
367 o[INDEX3(i,2,6,numComp,3)] = V10;
368 o[INDEX3(i,0,7,numComp,3)] = V3;
369 o[INDEX3(i,1,7,numComp,3)] = V7;
370 o[INDEX3(i,2,7,numComp,3)] = V11;
371 } /* end of component loop i */
372 } /* end of k0 loop */
373 } /* end of k1 loop */
374 } /* end of k2 loop */
375 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
376 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
377 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
378 #pragma omp parallel for
379 for (index_t k2=0; k2 < m_NE2; ++k2) {
380 for (index_t k1=0; k1 < m_NE1; ++k1) {
381 for (index_t k0=0; k0 < m_NE0; ++k0) {
382 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
383 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
384 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
385 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
386 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
387 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
388 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
389 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
390 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
391 for (index_t i=0; i < numComp; ++i) {
392 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
393 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
394 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
395 } /* end of component loop i */
396 } /* end of k0 loop */
397 } /* end of k1 loop */
398 } /* end of k2 loop */
399 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
400 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
401 /*** GENERATOR SNIP_GRAD_FACES TOP */
402 #pragma omp parallel
403 {
404 if (m_faceOffset[0] > -1) {
405 #pragma omp for nowait
406 for (index_t k2=0; k2 < m_NE2; ++k2) {
407 for (index_t k1=0; k1 < m_NE1; ++k1) {
408 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
409 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
410 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
411 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
412 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
413 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
414 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
415 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
416 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
417 for (index_t i=0; i < numComp; ++i) {
418 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
419 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
420 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
421 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
422 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
423 o[INDEX3(i,1,0,numComp,3)] = V0;
424 o[INDEX3(i,2,0,numComp,3)] = V2;
425 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
426 o[INDEX3(i,1,1,numComp,3)] = V0;
427 o[INDEX3(i,2,1,numComp,3)] = V3;
428 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
429 o[INDEX3(i,1,2,numComp,3)] = V1;
430 o[INDEX3(i,2,2,numComp,3)] = V2;
431 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
432 o[INDEX3(i,1,3,numComp,3)] = V1;
433 o[INDEX3(i,2,3,numComp,3)] = V3;
434 } /* end of component loop i */
435 } /* end of k1 loop */
436 } /* end of k2 loop */
437 } /* end of face 0 */
438 if (m_faceOffset[1] > -1) {
439 #pragma omp for nowait
440 for (index_t k2=0; k2 < m_NE2; ++k2) {
441 for (index_t k1=0; k1 < m_NE1; ++k1) {
442 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
443 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
444 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
445 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
446 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
447 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
448 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
449 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
450 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
451 for (index_t i=0; i < numComp; ++i) {
452 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
453 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
454 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
455 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
456 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
457 o[INDEX3(i,1,0,numComp,3)] = V0;
458 o[INDEX3(i,2,0,numComp,3)] = V2;
459 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
460 o[INDEX3(i,1,1,numComp,3)] = V0;
461 o[INDEX3(i,2,1,numComp,3)] = V3;
462 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
463 o[INDEX3(i,1,2,numComp,3)] = V1;
464 o[INDEX3(i,2,2,numComp,3)] = V2;
465 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
466 o[INDEX3(i,1,3,numComp,3)] = V1;
467 o[INDEX3(i,2,3,numComp,3)] = V3;
468 } /* end of component loop i */
469 } /* end of k1 loop */
470 } /* end of k2 loop */
471 } /* end of face 1 */
472 if (m_faceOffset[2] > -1) {
473 #pragma omp for nowait
474 for (index_t k2=0; k2 < m_NE2; ++k2) {
475 for (index_t k0=0; k0 < m_NE0; ++k0) {
476 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
477 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
478 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
479 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
480 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
481 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
482 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
483 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
484 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
485 for (index_t i=0; i < numComp; ++i) {
486 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
487 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
488 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
489 o[INDEX3(i,0,0,numComp,3)] = V0;
490 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
491 o[INDEX3(i,2,0,numComp,3)] = V1;
492 o[INDEX3(i,0,1,numComp,3)] = V0;
493 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
494 o[INDEX3(i,2,1,numComp,3)] = V2;
495 o[INDEX3(i,0,2,numComp,3)] = V0;
496 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
497 o[INDEX3(i,2,2,numComp,3)] = V1;
498 o[INDEX3(i,0,3,numComp,3)] = V0;
499 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
500 o[INDEX3(i,2,3,numComp,3)] = V2;
501 } /* end of component loop i */
502 } /* end of k0 loop */
503 } /* end of k2 loop */
504 } /* end of face 2 */
505 if (m_faceOffset[3] > -1) {
506 #pragma omp for nowait
507 for (index_t k2=0; k2 < m_NE2; ++k2) {
508 for (index_t k0=0; k0 < m_NE0; ++k0) {
509 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
510 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
511 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
512 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
513 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
514 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
515 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
516 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
517 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
518 for (index_t i=0; i < numComp; ++i) {
519 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
520 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
521 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
522 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
523 o[INDEX3(i,0,0,numComp,3)] = V0;
524 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
525 o[INDEX3(i,2,0,numComp,3)] = V2;
526 o[INDEX3(i,0,1,numComp,3)] = V0;
527 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
528 o[INDEX3(i,2,1,numComp,3)] = V3;
529 o[INDEX3(i,0,2,numComp,3)] = V1;
530 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
531 o[INDEX3(i,2,2,numComp,3)] = V2;
532 o[INDEX3(i,0,3,numComp,3)] = V1;
533 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
534 o[INDEX3(i,2,3,numComp,3)] = V3;
535 } /* end of component loop i */
536 } /* end of k0 loop */
537 } /* end of k2 loop */
538 } /* end of face 3 */
539 if (m_faceOffset[4] > -1) {
540 #pragma omp for nowait
541 for (index_t k1=0; k1 < m_NE1; ++k1) {
542 for (index_t k0=0; k0 < m_NE0; ++k0) {
543 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
544 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
545 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
546 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
547 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
548 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
549 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
550 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
551 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
552 for (index_t i=0; i < numComp; ++i) {
553 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
554 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
555 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
556 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
557 o[INDEX3(i,0,0,numComp,3)] = V0;
558 o[INDEX3(i,1,0,numComp,3)] = V2;
559 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
560 o[INDEX3(i,0,1,numComp,3)] = V0;
561 o[INDEX3(i,1,1,numComp,3)] = V3;
562 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
563 o[INDEX3(i,0,2,numComp,3)] = V1;
564 o[INDEX3(i,1,2,numComp,3)] = V2;
565 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
566 o[INDEX3(i,0,3,numComp,3)] = V1;
567 o[INDEX3(i,1,3,numComp,3)] = V3;
568 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
569 } /* end of component loop i */
570 } /* end of k0 loop */
571 } /* end of k1 loop */
572 } /* end of face 4 */
573 if (m_faceOffset[5] > -1) {
574 #pragma omp for nowait
575 for (index_t k1=0; k1 < m_NE1; ++k1) {
576 for (index_t k0=0; k0 < m_NE0; ++k0) {
577 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
578 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
579 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
580 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
581 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
582 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
583 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
584 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
585 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
586 for (index_t i=0; i < numComp; ++i) {
587 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
588 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
589 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
590 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
591 o[INDEX3(i,0,0,numComp,3)] = V0;
592 o[INDEX3(i,1,0,numComp,3)] = V2;
593 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
594 o[INDEX3(i,0,1,numComp,3)] = V0;
595 o[INDEX3(i,1,1,numComp,3)] = V3;
596 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
597 o[INDEX3(i,0,2,numComp,3)] = V1;
598 o[INDEX3(i,1,2,numComp,3)] = V2;
599 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
600 o[INDEX3(i,0,3,numComp,3)] = V1;
601 o[INDEX3(i,1,3,numComp,3)] = V3;
602 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
603 } /* end of component loop i */
604 } /* end of k0 loop */
605 } /* end of k1 loop */
606 } /* end of face 5 */
607 } // end of parallel section
608 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
609 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
610 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
611 #pragma omp parallel
612 {
613 if (m_faceOffset[0] > -1) {
614 #pragma omp for nowait
615 for (index_t k2=0; k2 < m_NE2; ++k2) {
616 for (index_t k1=0; k1 < m_NE1; ++k1) {
617 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
618 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
619 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
620 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
621 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
622 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
623 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
624 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
625 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
626 for (index_t i=0; i < numComp; ++i) {
627 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
628 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
629 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
630 } /* end of component loop i */
631 } /* end of k1 loop */
632 } /* end of k2 loop */
633 } /* end of face 0 */
634 if (m_faceOffset[1] > -1) {
635 #pragma omp for nowait
636 for (index_t k2=0; k2 < m_NE2; ++k2) {
637 for (index_t k1=0; k1 < m_NE1; ++k1) {
638 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
639 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
640 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
641 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
642 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
643 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
644 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
645 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
646 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
647 for (index_t i=0; i < numComp; ++i) {
648 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
649 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
650 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
651 } /* end of component loop i */
652 } /* end of k1 loop */
653 } /* end of k2 loop */
654 } /* end of face 1 */
655 if (m_faceOffset[2] > -1) {
656 #pragma omp for nowait
657 for (index_t k2=0; k2 < m_NE2; ++k2) {
658 for (index_t k0=0; k0 < m_NE0; ++k0) {
659 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
660 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
661 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
662 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
663 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
664 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
665 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
666 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
667 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
668 for (index_t i=0; i < numComp; ++i) {
669 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
670 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
671 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
672 } /* end of component loop i */
673 } /* end of k0 loop */
674 } /* end of k2 loop */
675 } /* end of face 2 */
676 if (m_faceOffset[3] > -1) {
677 #pragma omp for nowait
678 for (index_t k2=0; k2 < m_NE2; ++k2) {
679 for (index_t k0=0; k0 < m_NE0; ++k0) {
680 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
681 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
682 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
683 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
684 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
685 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
686 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
687 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
688 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
689 for (index_t i=0; i < numComp; ++i) {
690 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
691 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
692 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
693 } /* end of component loop i */
694 } /* end of k0 loop */
695 } /* end of k2 loop */
696 } /* end of face 3 */
697 if (m_faceOffset[4] > -1) {
698 #pragma omp for nowait
699 for (index_t k1=0; k1 < m_NE1; ++k1) {
700 for (index_t k0=0; k0 < m_NE0; ++k0) {
701 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
702 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
703 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
704 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
705 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
706 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
707 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
708 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
709 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
710 for (index_t i=0; i < numComp; ++i) {
711 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
712 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
713 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
714 } /* end of component loop i */
715 } /* end of k0 loop */
716 } /* end of k1 loop */
717 } /* end of face 4 */
718 if (m_faceOffset[5] > -1) {
719 #pragma omp for nowait
720 for (index_t k1=0; k1 < m_NE1; ++k1) {
721 for (index_t k0=0; k0 < m_NE0; ++k0) {
722 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
723 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
724 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
725 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
726 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
727 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
728 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
729 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
730 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
731 for (index_t i=0; i < numComp; ++i) {
732 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
733 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
734 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
735 } /* end of component loop i */
736 } /* end of k0 loop */
737 } /* end of k1 loop */
738 } /* end of face 5 */
739 } // end of parallel section
740 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
741 } else {
742 stringstream msg;
743 msg << "setToGradient() not implemented for "
744 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
745 throw RipleyException(msg.str());
746 }
747 }
748
749 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
750 {
751 escript::Data& in = *const_cast<escript::Data*>(&arg);
752 const dim_t numComp = in.getDataPointSize();
753 const double h0 = m_l0/m_gNE0;
754 const double h1 = m_l1/m_gNE1;
755 const double h2 = m_l2/m_gNE2;
756 if (arg.getFunctionSpace().getTypeCode() == Elements) {
757 const double w_0 = h0*h1*h2/8.;
758 #pragma omp parallel
759 {
760 vector<double> int_local(numComp, 0);
761 #pragma omp for nowait
762 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
763 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
764 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
765 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
766 for (index_t i=0; i < numComp; ++i) {
767 const register double f_0 = f[INDEX2(i,0,numComp)];
768 const register double f_1 = f[INDEX2(i,1,numComp)];
769 const register double f_2 = f[INDEX2(i,2,numComp)];
770 const register double f_3 = f[INDEX2(i,3,numComp)];
771 const register double f_4 = f[INDEX2(i,4,numComp)];
772 const register double f_5 = f[INDEX2(i,5,numComp)];
773 const register double f_6 = f[INDEX2(i,6,numComp)];
774 const register double f_7 = f[INDEX2(i,7,numComp)];
775 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
776 } /* end of component loop i */
777 } /* end of k0 loop */
778 } /* end of k1 loop */
779 } /* end of k2 loop */
780
781 #pragma omp critical
782 for (index_t i=0; i<numComp; i++)
783 integrals[i]+=int_local[i];
784 } // end of parallel section
785 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
786 const double w_0 = h0*h1*h2;
787 #pragma omp parallel
788 {
789 vector<double> int_local(numComp, 0);
790 #pragma omp for nowait
791 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
792 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
793 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
794 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
795 for (index_t i=0; i < numComp; ++i) {
796 int_local[i]+=f[i]*w_0;
797 } /* end of component loop i */
798 } /* end of k0 loop */
799 } /* end of k1 loop */
800 } /* end of k2 loop */
801
802 #pragma omp critical
803 for (index_t i=0; i<numComp; i++)
804 integrals[i]+=int_local[i];
805 } // end of parallel section
806 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
807 const double w_0 = h1*h2/4.;
808 const double w_1 = h0*h2/4.;
809 const double w_2 = h0*h1/4.;
810 #pragma omp parallel
811 {
812 vector<double> int_local(numComp, 0);
813 if (m_faceOffset[0] > -1) {
814 #pragma omp for nowait
815 for (index_t k2=0; k2 < m_NE2; ++k2) {
816 for (index_t k1=0; k1 < m_NE1; ++k1) {
817 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
818 for (index_t i=0; i < numComp; ++i) {
819 const register double f_0 = f[INDEX2(i,0,numComp)];
820 const register double f_1 = f[INDEX2(i,1,numComp)];
821 const register double f_2 = f[INDEX2(i,2,numComp)];
822 const register double f_3 = f[INDEX2(i,3,numComp)];
823 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
824 } /* end of component loop i */
825 } /* end of k1 loop */
826 } /* end of k2 loop */
827 }
828
829 if (m_faceOffset[1] > -1) {
830 #pragma omp for nowait
831 for (index_t k2=0; k2 < m_NE2; ++k2) {
832 for (index_t k1=0; k1 < m_NE1; ++k1) {
833 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
834 for (index_t i=0; i < numComp; ++i) {
835 const register double f_0 = f[INDEX2(i,0,numComp)];
836 const register double f_1 = f[INDEX2(i,1,numComp)];
837 const register double f_2 = f[INDEX2(i,2,numComp)];
838 const register double f_3 = f[INDEX2(i,3,numComp)];
839 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
840 } /* end of component loop i */
841 } /* end of k1 loop */
842 } /* end of k2 loop */
843 }
844
845 if (m_faceOffset[2] > -1) {
846 #pragma omp for nowait
847 for (index_t k2=0; k2 < m_NE2; ++k2) {
848 for (index_t k0=0; k0 < m_NE0; ++k0) {
849 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
850 for (index_t i=0; i < numComp; ++i) {
851 const register double f_0 = f[INDEX2(i,0,numComp)];
852 const register double f_1 = f[INDEX2(i,1,numComp)];
853 const register double f_2 = f[INDEX2(i,2,numComp)];
854 const register double f_3 = f[INDEX2(i,3,numComp)];
855 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
856 } /* end of component loop i */
857 } /* end of k1 loop */
858 } /* end of k2 loop */
859 }
860
861 if (m_faceOffset[3] > -1) {
862 #pragma omp for nowait
863 for (index_t k2=0; k2 < m_NE2; ++k2) {
864 for (index_t k0=0; k0 < m_NE0; ++k0) {
865 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
866 for (index_t i=0; i < numComp; ++i) {
867 const register double f_0 = f[INDEX2(i,0,numComp)];
868 const register double f_1 = f[INDEX2(i,1,numComp)];
869 const register double f_2 = f[INDEX2(i,2,numComp)];
870 const register double f_3 = f[INDEX2(i,3,numComp)];
871 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
872 } /* end of component loop i */
873 } /* end of k1 loop */
874 } /* end of k2 loop */
875 }
876
877 if (m_faceOffset[4] > -1) {
878 #pragma omp for nowait
879 for (index_t k1=0; k1 < m_NE1; ++k1) {
880 for (index_t k0=0; k0 < m_NE0; ++k0) {
881 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
882 for (index_t i=0; i < numComp; ++i) {
883 const register double f_0 = f[INDEX2(i,0,numComp)];
884 const register double f_1 = f[INDEX2(i,1,numComp)];
885 const register double f_2 = f[INDEX2(i,2,numComp)];
886 const register double f_3 = f[INDEX2(i,3,numComp)];
887 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
888 } /* end of component loop i */
889 } /* end of k1 loop */
890 } /* end of k2 loop */
891 }
892
893 if (m_faceOffset[5] > -1) {
894 #pragma omp for nowait
895 for (index_t k1=0; k1 < m_NE1; ++k1) {
896 for (index_t k0=0; k0 < m_NE0; ++k0) {
897 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
898 for (index_t i=0; i < numComp; ++i) {
899 const register double f_0 = f[INDEX2(i,0,numComp)];
900 const register double f_1 = f[INDEX2(i,1,numComp)];
901 const register double f_2 = f[INDEX2(i,2,numComp)];
902 const register double f_3 = f[INDEX2(i,3,numComp)];
903 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
904 } /* end of component loop i */
905 } /* end of k1 loop */
906 } /* end of k2 loop */
907 }
908
909 #pragma omp critical
910 for (index_t i=0; i<numComp; i++)
911 integrals[i]+=int_local[i];
912 } // end of parallel section
913
914 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
915 const double w_0 = h1*h2;
916 const double w_1 = h0*h2;
917 const double w_2 = h0*h1;
918 #pragma omp parallel
919 {
920 vector<double> int_local(numComp, 0);
921 if (m_faceOffset[0] > -1) {
922 #pragma omp for nowait
923 for (index_t k2=0; k2 < m_NE2; ++k2) {
924 for (index_t k1=0; k1 < m_NE1; ++k1) {
925 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
926 for (index_t i=0; i < numComp; ++i) {
927 int_local[i]+=f[i]*w_0;
928 } /* end of component loop i */
929 } /* end of k1 loop */
930 } /* end of k2 loop */
931 }
932
933 if (m_faceOffset[1] > -1) {
934 #pragma omp for nowait
935 for (index_t k2=0; k2 < m_NE2; ++k2) {
936 for (index_t k1=0; k1 < m_NE1; ++k1) {
937 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
938 for (index_t i=0; i < numComp; ++i) {
939 int_local[i]+=f[i]*w_0;
940 } /* end of component loop i */
941 } /* end of k1 loop */
942 } /* end of k2 loop */
943 }
944
945 if (m_faceOffset[2] > -1) {
946 #pragma omp for nowait
947 for (index_t k2=0; k2 < m_NE2; ++k2) {
948 for (index_t k0=0; k0 < m_NE0; ++k0) {
949 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
950 for (index_t i=0; i < numComp; ++i) {
951 int_local[i]+=f[i]*w_1;
952 } /* end of component loop i */
953 } /* end of k1 loop */
954 } /* end of k2 loop */
955 }
956
957 if (m_faceOffset[3] > -1) {
958 #pragma omp for nowait
959 for (index_t k2=0; k2 < m_NE2; ++k2) {
960 for (index_t k0=0; k0 < m_NE0; ++k0) {
961 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
962 for (index_t i=0; i < numComp; ++i) {
963 int_local[i]+=f[i]*w_1;
964 } /* end of component loop i */
965 } /* end of k1 loop */
966 } /* end of k2 loop */
967 }
968
969 if (m_faceOffset[4] > -1) {
970 #pragma omp for nowait
971 for (index_t k1=0; k1 < m_NE1; ++k1) {
972 for (index_t k0=0; k0 < m_NE0; ++k0) {
973 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
974 for (index_t i=0; i < numComp; ++i) {
975 int_local[i]+=f[i]*w_2;
976 } /* end of component loop i */
977 } /* end of k1 loop */
978 } /* end of k2 loop */
979 }
980
981 if (m_faceOffset[5] > -1) {
982 #pragma omp for nowait
983 for (index_t k1=0; k1 < m_NE1; ++k1) {
984 for (index_t k0=0; k0 < m_NE0; ++k0) {
985 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
986 for (index_t i=0; i < numComp; ++i) {
987 int_local[i]+=f[i]*w_2;
988 } /* end of component loop i */
989 } /* end of k1 loop */
990 } /* end of k2 loop */
991 }
992
993 #pragma omp critical
994 for (index_t i=0; i<numComp; i++)
995 integrals[i]+=int_local[i];
996 } // end of parallel section
997
998 } else {
999 stringstream msg;
1000 msg << "setToIntegrals() not implemented for "
1001 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
1002 throw RipleyException(msg.str());
1003 }
1004 }
1005
1006 void Brick::setToNormal(escript::Data& out) const
1007 {
1008 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1009 #pragma omp parallel
1010 {
1011 if (m_faceOffset[0] > -1) {
1012 #pragma omp for nowait
1013 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1014 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1015 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1016 // set vector at four quadrature points
1017 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1018 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1019 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1020 *o++ = -1.; *o++ = 0.; *o = 0.;
1021 }
1022 }
1023 }
1024
1025 if (m_faceOffset[1] > -1) {
1026 #pragma omp for nowait
1027 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1028 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1029 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1030 // set vector at four quadrature points
1031 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1032 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1033 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1034 *o++ = 1.; *o++ = 0.; *o = 0.;
1035 }
1036 }
1037 }
1038
1039 if (m_faceOffset[2] > -1) {
1040 #pragma omp for nowait
1041 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1042 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1043 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1044 // set vector at four quadrature points
1045 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1046 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1047 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1048 *o++ = 0.; *o++ = -1.; *o = 0.;
1049 }
1050 }
1051 }
1052
1053 if (m_faceOffset[3] > -1) {
1054 #pragma omp for nowait
1055 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1056 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1057 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1058 // set vector at four quadrature points
1059 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1060 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1061 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1062 *o++ = 0.; *o++ = 1.; *o = 0.;
1063 }
1064 }
1065 }
1066
1067 if (m_faceOffset[4] > -1) {
1068 #pragma omp for nowait
1069 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1070 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1071 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1072 // set vector at four quadrature points
1073 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1074 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1075 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1076 *o++ = 0.; *o++ = 0.; *o = -1.;
1077 }
1078 }
1079 }
1080
1081 if (m_faceOffset[5] > -1) {
1082 #pragma omp for nowait
1083 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1084 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1085 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1086 // set vector at four quadrature points
1087 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1088 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1089 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1090 *o++ = 0.; *o++ = 0.; *o = 1.;
1091 }
1092 }
1093 }
1094 } // end of parallel section
1095 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1096 #pragma omp parallel
1097 {
1098 if (m_faceOffset[0] > -1) {
1099 #pragma omp for nowait
1100 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1101 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1102 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1103 *o++ = -1.;
1104 *o++ = 0.;
1105 *o = 0.;
1106 }
1107 }
1108 }
1109
1110 if (m_faceOffset[1] > -1) {
1111 #pragma omp for nowait
1112 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1113 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1114 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1115 *o++ = 1.;
1116 *o++ = 0.;
1117 *o = 0.;
1118 }
1119 }
1120 }
1121
1122 if (m_faceOffset[2] > -1) {
1123 #pragma omp for nowait
1124 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1125 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1126 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1127 *o++ = 0.;
1128 *o++ = -1.;
1129 *o = 0.;
1130 }
1131 }
1132 }
1133
1134 if (m_faceOffset[3] > -1) {
1135 #pragma omp for nowait
1136 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1137 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1138 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1139 *o++ = 0.;
1140 *o++ = 1.;
1141 *o = 0.;
1142 }
1143 }
1144 }
1145
1146 if (m_faceOffset[4] > -1) {
1147 #pragma omp for nowait
1148 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1149 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1150 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1151 *o++ = 0.;
1152 *o++ = 0.;
1153 *o = -1.;
1154 }
1155 }
1156 }
1157
1158 if (m_faceOffset[5] > -1) {
1159 #pragma omp for nowait
1160 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1161 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1162 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1163 *o++ = 0.;
1164 *o++ = 0.;
1165 *o = 1.;
1166 }
1167 }
1168 }
1169 } // end of parallel section
1170
1171 } else {
1172 stringstream msg;
1173 msg << "setToNormal() not implemented for "
1174 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1175 throw RipleyException(msg.str());
1176 }
1177 }
1178
1179 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1180 bool reducedColOrder) const
1181 {
1182 if (reducedRowOrder || reducedColOrder)
1183 throw RipleyException("getPattern() not implemented for reduced order");
1184
1185 // connector
1186 RankVector neighbour;
1187 IndexVector offsetInShared(1,0);
1188 IndexVector sendShared, recvShared;
1189 const IndexVector faces=getNumFacesPerBoundary();
1190 const index_t nDOF0 = (m_gNE0+1)/m_NX;
1191 const index_t nDOF1 = (m_gNE1+1)/m_NY;
1192 const index_t nDOF2 = (m_gNE2+1)/m_NZ;
1193 const int numDOF=nDOF0*nDOF1*nDOF2;
1194 const index_t left = (m_offset0==0 ? 0 : 1);
1195 const index_t bottom = (m_offset1==0 ? 0 : 1);
1196 const index_t front = (m_offset2==0 ? 0 : 1);
1197 vector<IndexVector> colIndices(numDOF); // for the couple blocks
1198 int numShared=0;
1199
1200 m_dofMap.assign(getNumNodes(), 0);
1201 #pragma omp parallel for
1202 for (index_t i=front; i<m_N2; i++) {
1203 for (index_t j=bottom; j<m_N1; j++) {
1204 for (index_t k=left; k<m_N0; k++) {
1205 m_dofMap[i*m_N0*m_N1+j*m_N0+k]=(i-front)*nDOF0*nDOF1+(j-bottom)*nDOF0+k-left;
1206 }
1207 }
1208 }
1209
1210 // build list of shared components and neighbours by looping through
1211 // all potential neighbouring ranks and checking if positions are
1212 // within bounds
1213 const int x=m_mpiInfo->rank%m_NX;
1214 const int y=m_mpiInfo->rank%(m_NX*m_NY)/m_NX;
1215 const int z=m_mpiInfo->rank/(m_NX*m_NY);
1216 for (int i2=-1; i2<2; i2++) {
1217 for (int i1=-1; i1<2; i1++) {
1218 for (int i0=-1; i0<2; i0++) {
1219 // skip rank itself
1220 if (i0==0 && i1==0 && i2==0)
1221 continue;
1222 // location of neighbour rank
1223 const int nx=x+i0;
1224 const int ny=y+i1;
1225 const int nz=z+i2;
1226 if (nx>=0 && ny>=0 && nz>=0 && nx<m_NX && ny<m_NY && nz<m_NZ) {
1227 neighbour.push_back(nz*m_NX*m_NY+ny*m_NX+nx);
1228 if (i0==0 && i1==0) {
1229 // sharing front or back plane
1230 offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF1);
1231 for (dim_t i=0; i<nDOF1; i++) {
1232 const int firstDOF=(i2==-1 ? i*nDOF0 : nDOF0*nDOF1*(nDOF2-1)+i*nDOF0);
1233 const int firstNode=(i2==-1 ? left+i*m_N0 : left+m_N0*m_N1*(m_N2-1)+i*m_N0);
1234 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1235 sendShared.push_back(firstDOF+j);
1236 recvShared.push_back(numDOF+numShared);
1237 if (j>0) {
1238 if (i>0)
1239 colIndices[firstDOF+j-1-nDOF0].push_back(numShared);
1240 colIndices[firstDOF+j-1].push_back(numShared);
1241 if (i<nDOF1-1)
1242 colIndices[firstDOF+j-1+nDOF0].push_back(numShared);
1243 }
1244 if (i>0)
1245 colIndices[firstDOF+j-nDOF0].push_back(numShared);
1246 colIndices[firstDOF+j].push_back(numShared);
1247 if (i<nDOF1-1)
1248 colIndices[firstDOF+j+nDOF0].push_back(numShared);
1249 if (j<nDOF0-1) {
1250 if (i>0)
1251 colIndices[firstDOF+j+1-nDOF0].push_back(numShared);
1252 colIndices[firstDOF+j+1].push_back(numShared);
1253 if (i<nDOF1-1)
1254 colIndices[firstDOF+j+1+nDOF0].push_back(numShared);
1255 }
1256 m_dofMap[firstNode+j]=numDOF+numShared;
1257 }
1258 }
1259 } else if (i0==0 && i2==0) {
1260 // sharing top or bottom plane
1261 offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF2);
1262 for (dim_t i=0; i<nDOF2; i++) {
1263 const int firstDOF=(i1==-1 ? i*nDOF0*nDOF1 : nDOF0*((i+1)*nDOF1-1));
1264 const int firstNode=(i1==-1 ? left+i*m_N0*m_N1 : left+m_N0*((i+1)*m_N1-1));
1265 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1266 sendShared.push_back(firstDOF+j);
1267 recvShared.push_back(numDOF+numShared);
1268 if (j>0) {
1269 if (i>0)
1270 colIndices[firstDOF+j-1-nDOF0*nDOF1].push_back(numShared);
1271 colIndices[firstDOF+j-1].push_back(numShared);
1272 if (i<nDOF2-1)
1273 colIndices[firstDOF+j-1+nDOF0*nDOF1].push_back(numShared);
1274 }
1275 if (i>0)
1276 colIndices[firstDOF+j-nDOF0*nDOF1].push_back(numShared);
1277 colIndices[firstDOF+j].push_back(numShared);
1278 if (i<nDOF2-1)
1279 colIndices[firstDOF+j+nDOF0*nDOF1].push_back(numShared);
1280 if (j<nDOF0-1) {
1281 if (i>0)
1282 colIndices[firstDOF+j+1-nDOF0*nDOF1].push_back(numShared);
1283 colIndices[firstDOF+j+1].push_back(numShared);
1284 if (i<nDOF2-1)
1285 colIndices[firstDOF+j+1+nDOF0*nDOF1].push_back(numShared);
1286 }
1287 m_dofMap[firstNode+j]=numDOF+numShared;
1288 }
1289 }
1290 } else if (i1==0 && i2==0) {
1291 // sharing left or right plane
1292 offsetInShared.push_back(offsetInShared.back()+nDOF1*nDOF2);
1293 for (dim_t i=0; i<nDOF2; i++) {
1294 const int firstDOF=(i0==-1 ? i*nDOF0*nDOF1 : nDOF0*(1+i*nDOF1)-1);
1295 const int firstNode=(i0==-1 ? (bottom+i*m_N1)*m_N0 : (bottom+1+i*m_N1)*m_N0-1);
1296 for (dim_t j=0; j<nDOF1; j++, numShared++) {
1297 sendShared.push_back(firstDOF+j*nDOF0);
1298 recvShared.push_back(numDOF+numShared);
1299 if (j>0) {
1300 if (i>0)
1301 colIndices[firstDOF+(j-1)*nDOF0-nDOF0*nDOF1].push_back(numShared);
1302 colIndices[firstDOF+(j-1)*nDOF0].push_back(numShared);
1303 if (i<nDOF2-1)
1304 colIndices[firstDOF+(j-1)*nDOF0+nDOF0*nDOF1].push_back(numShared);
1305 }
1306 if (i>0)
1307 colIndices[firstDOF+j*nDOF0-nDOF0*nDOF1].push_back(numShared);
1308 colIndices[firstDOF+j*nDOF0].push_back(numShared);
1309 if (i<nDOF2-1)
1310 colIndices[firstDOF+j*nDOF0+nDOF0*nDOF1].push_back(numShared);
1311 if (j<nDOF1-1) {
1312 if (i>0)
1313 colIndices[firstDOF+(j+1)*nDOF0-nDOF0*nDOF1].push_back(numShared);
1314 colIndices[firstDOF+(j+1)*nDOF0].push_back(numShared);
1315 if (i<nDOF2-1)
1316 colIndices[firstDOF+(j+1)*nDOF0+nDOF0*nDOF1].push_back(numShared);
1317 }
1318 m_dofMap[firstNode+j*m_N0]=numDOF+numShared;
1319 }
1320 }
1321 } else if (i0==0) {
1322 // sharing an edge
1323 offsetInShared.push_back(offsetInShared.back()+nDOF0);
1324 const int firstDOF=(i1+1)/2*nDOF0*(nDOF1-1)
1325 +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
1326 const int firstNode=(i1+1)/2*m_N0*(m_N1-1)
1327 +(i2+1)/2*m_N0*m_N1*(m_N2-1);
1328 // + left ?!
1329 for (dim_t i=0; i<nDOF0; i++, numShared++) {
1330 sendShared.push_back(firstDOF+i);
1331 recvShared.push_back(numDOF+numShared);
1332 // FIXME: colIndices
1333 if (i>0) {
1334 colIndices[firstDOF+i-1].push_back(numShared);
1335 }
1336 colIndices[firstDOF+i].push_back(numShared);
1337 if (i<nDOF0-1) {
1338 colIndices[firstDOF+i+1].push_back(numShared);
1339 }
1340 m_dofMap[firstNode+i]=numDOF+numShared;
1341 }
1342
1343 } else if (i1==0) {
1344 // sharing an edge
1345 offsetInShared.push_back(offsetInShared.back()+nDOF1);
1346 } else if (i2==0) {
1347 // sharing an edge
1348 offsetInShared.push_back(offsetInShared.back()+nDOF2);
1349 } else {
1350 // sharing a node
1351 const int dof=(i0+1)/2*(nDOF0-1)
1352 +(i1+1)/2*nDOF0*(nDOF1-1)
1353 +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
1354 const int node=(i0+1)/2*(m_N0-1)
1355 +(i1+1)/2*m_N0*(m_N1-1)
1356 +(i2+1)/2*m_N0*m_N1*(m_N2-1);
1357 offsetInShared.push_back(offsetInShared.back()+1);
1358 sendShared.push_back(dof);
1359 recvShared.push_back(numDOF+numShared);
1360 colIndices[dof].push_back(numShared);
1361 m_dofMap[node]=numDOF+numShared;
1362 ++numShared;
1363 }
1364 }
1365 }
1366 }
1367 }
1368
1369 /*
1370 cout << "--- rcv_shcomp ---" << endl;
1371 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
1372 for (size_t i=0; i<neighbour.size(); i++) {
1373 cout << "neighbor[" << i << "]=" << neighbour[i]
1374 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
1375 }
1376 for (size_t i=0; i<recvShared.size(); i++) {
1377 cout << "shared[" << i << "]=" << recvShared[i] << endl;
1378 }
1379 cout << "--- snd_shcomp ---" << endl;
1380 for (size_t i=0; i<sendShared.size(); i++) {
1381 cout << "shared[" << i << "]=" << sendShared[i] << endl;
1382 }
1383 cout << "--- dofMap ---" << endl;
1384 for (size_t i=0; i<m_dofMap.size(); i++) {
1385 cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl;
1386 }
1387 cout << "--- colIndices ---" << endl;
1388 for (size_t i=0; i<colIndices.size(); i++) {
1389 cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl;
1390 }
1391 */
1392
1393 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
1394 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
1395 &offsetInShared[0], 1, 0, m_mpiInfo);
1396 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
1397 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
1398 &offsetInShared[0], 1, 0, m_mpiInfo);
1399 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
1400 Paso_SharedComponents_free(snd_shcomp);
1401 Paso_SharedComponents_free(rcv_shcomp);
1402
1403 // create patterns
1404 dim_t M, N;
1405 IndexVector ptr(1,0);
1406 IndexVector index;
1407
1408 // main pattern
1409 for (index_t i=0; i<numDOF; i++) {
1410 // always add the node itself
1411 index.push_back(i);
1412 const int num=insertNeighbours(index, i);
1413 ptr.push_back(ptr.back()+num+1);
1414 }
1415 M=N=ptr.size()-1;
1416 // paso will manage the memory
1417 index_t* indexC = MEMALLOC(index.size(),index_t);
1418 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1419 copy(index.begin(), index.end(), indexC);
1420 copy(ptr.begin(), ptr.end(), ptrC);
1421 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
1422 M, N, ptrC, indexC);
1423
1424 /*
1425 cout << "--- main_pattern ---" << endl;
1426 cout << "M=" << M << ", N=" << N << endl;
1427 for (size_t i=0; i<ptr.size(); i++) {
1428 cout << "ptr[" << i << "]=" << ptr[i] << endl;
1429 }
1430 for (size_t i=0; i<index.size(); i++) {
1431 cout << "index[" << i << "]=" << index[i] << endl;
1432 }
1433 */
1434
1435 // column & row couple patterns
1436 ptr.assign(1, 0);
1437 index.clear();
1438
1439 for (index_t i=0; i<numDOF; i++) {
1440 index.insert(index.end(), colIndices[i].begin(), colIndices[i].end());
1441 ptr.push_back(ptr.back()+colIndices[i].size());
1442 }
1443
1444 // paso will manage the memory
1445 indexC = MEMALLOC(index.size(), index_t);
1446 ptrC = MEMALLOC(ptr.size(), index_t);
1447 copy(index.begin(), index.end(), indexC);
1448 copy(ptr.begin(), ptr.end(), ptrC);
1449 M=ptr.size()-1;
1450 N=numShared;
1451 Paso_Pattern *colCouplePattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
1452 M, N, ptrC, indexC);
1453
1454 /*
1455 cout << "--- colCouple_pattern ---" << endl;
1456 cout << "M=" << M << ", N=" << N << endl;
1457 for (size_t i=0; i<ptr.size(); i++) {
1458 cout << "ptr[" << i << "]=" << ptr[i] << endl;
1459 }
1460 for (size_t i=0; i<index.size(); i++) {
1461 cout << "index[" << i << "]=" << index[i] << endl;
1462 }
1463 */
1464
1465 // now build the row couple pattern
1466 IndexVector ptr2(1,0);
1467 IndexVector index2;
1468 for (dim_t id=0; id<N; id++) {
1469 dim_t n=0;
1470 for (dim_t i=0; i<M; i++) {
1471 for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
1472 if (index[j]==id) {
1473 index2.push_back(i);
1474 n++;
1475 break;
1476 }
1477 }
1478 }
1479 ptr2.push_back(ptr2.back()+n);
1480 }
1481
1482 // paso will manage the memory
1483 indexC = MEMALLOC(index2.size(), index_t);
1484 ptrC = MEMALLOC(ptr2.size(), index_t);
1485 copy(index2.begin(), index2.end(), indexC);
1486 copy(ptr2.begin(), ptr2.end(), ptrC);
1487 Paso_Pattern *rowCouplePattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
1488 N, M, ptrC, indexC);
1489
1490 /*
1491 cout << "--- rowCouple_pattern ---" << endl;
1492 cout << "M=" << N << ", N=" << M << endl;
1493 for (size_t i=0; i<ptr2.size(); i++) {
1494 cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1495 }
1496 for (size_t i=0; i<index2.size(); i++) {
1497 cout << "index[" << i << "]=" << index2[i] << endl;
1498 }
1499 */
1500
1501 // allocate paso distribution
1502 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
1503 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
1504
1505 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
1506 MATRIX_FORMAT_DEFAULT, distribution, distribution,
1507 mainPattern, colCouplePattern, rowCouplePattern,
1508 connector, connector);
1509 Paso_Pattern_free(mainPattern);
1510 Paso_Pattern_free(colCouplePattern);
1511 Paso_Pattern_free(rowCouplePattern);
1512 Paso_Distribution_free(distribution);
1513 return pattern;
1514 }
1515
1516 void Brick::Print_Mesh_Info(const bool full) const
1517 {
1518 RipleyDomain::Print_Mesh_Info(full);
1519 if (full) {
1520 cout << " Id Coordinates" << endl;
1521 cout.precision(15);
1522 cout.setf(ios::scientific, ios::floatfield);
1523 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1524 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1525 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1526 for (index_t i=0; i < getNumNodes(); i++) {
1527 cout << " " << setw(5) << m_nodeId[i]
1528 << " " << xdx.first+(i%m_N0)*xdx.second
1529 << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1530 << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1531 }
1532 }
1533 }
1534
1535 IndexVector Brick::getNumNodesPerDim() const
1536 {
1537 IndexVector ret;
1538 ret.push_back(m_N0);
1539 ret.push_back(m_N1);
1540 ret.push_back(m_N2);
1541 return ret;
1542 }
1543
1544 IndexVector Brick::getNumElementsPerDim() const
1545 {
1546 IndexVector ret;
1547 ret.push_back(m_NE0);
1548 ret.push_back(m_NE1);
1549 ret.push_back(m_NE2);
1550 return ret;
1551 }
1552
1553 IndexVector Brick::getNumFacesPerBoundary() const
1554 {
1555 IndexVector ret(6, 0);
1556 //left
1557 if (m_offset0==0)
1558 ret[0]=m_NE1*m_NE2;
1559 //right
1560 if (m_mpiInfo->rank%m_NX==m_NX-1)
1561 ret[1]=m_NE1*m_NE2;
1562 //bottom
1563 if (m_offset1==0)
1564 ret[2]=m_NE0*m_NE2;
1565 //top
1566 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1567 ret[3]=m_NE0*m_NE2;
1568 //front
1569 if (m_offset2==0)
1570 ret[4]=m_NE0*m_NE1;
1571 //back
1572 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1573 ret[5]=m_NE0*m_NE1;
1574 return ret;
1575 }
1576
1577 pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1578 {
1579 if (dim==0)
1580 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1581 else if (dim==1)
1582 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1583 else if (dim==2)
1584 return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1585
1586 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1587 }
1588
1589 //protected
1590 dim_t Brick::getNumDOF() const
1591 {
1592 return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY*(m_gNE2+1)/m_NZ;
1593 }
1594
1595 //protected
1596 dim_t Brick::getNumFaceElements() const
1597 {
1598 const IndexVector faces = getNumFacesPerBoundary();
1599 dim_t n=0;
1600 for (size_t i=0; i<faces.size(); i++)
1601 n+=faces[i];
1602 return n;
1603 }
1604
1605 //protected
1606 void Brick::assembleCoordinates(escript::Data& arg) const
1607 {
1608 escriptDataC x = arg.getDataC();
1609 int numDim = m_numDim;
1610 if (!isDataPointShapeEqual(&x, 1, &numDim))
1611 throw RipleyException("setToX: Invalid Data object shape");
1612 if (!numSamplesEqual(&x, 1, getNumNodes()))
1613 throw RipleyException("setToX: Illegal number of samples in Data object");
1614
1615 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1616 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1617 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1618 arg.requireWrite();
1619 #pragma omp parallel for
1620 for (dim_t i2 = 0; i2 < m_N2; i2++) {
1621 for (dim_t i1 = 0; i1 < m_N1; i1++) {
1622 for (dim_t i0 = 0; i0 < m_N0; i0++) {
1623 double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1624 point[0] = xdx.first+i0*xdx.second;
1625 point[1] = ydy.first+i1*ydy.second;
1626 point[2] = zdz.first+i2*zdz.second;
1627 }
1628 }
1629 }
1630 }
1631
1632 //private
1633 void Brick::populateSampleIds()
1634 {
1635 // identifiers are ordered from left to right, bottom to top, front to back
1636 // globally
1637
1638 // build node distribution vector first.
1639 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1640 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1641 const dim_t numDOF=getNumDOF();
1642 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1643 m_nodeDistribution[k]=k*numDOF;
1644 }
1645 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1646 m_nodeId.resize(getNumNodes());
1647 m_dofId.resize(numDOF);
1648 m_elementId.resize(getNumElements());
1649 m_faceId.resize(getNumFaceElements());
1650
1651 #pragma omp parallel
1652 {
1653 #pragma omp for nowait
1654 // nodes
1655 for (dim_t i2=0; i2<m_N2; i2++) {
1656 for (dim_t i1=0; i1<m_N1; i1++) {
1657 for (dim_t i0=0; i0<m_N0; i0++) {
1658 m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] =
1659 (m_offset2+i2)*(m_gNE0+1)*(m_gNE1+1)
1660 +(m_offset1+i1)*(m_gNE0+1)
1661 +m_offset0+i0;
1662 }
1663 }
1664 }
1665
1666 // degrees of freedom
1667 #pragma omp for nowait
1668 for (dim_t k=0; k<numDOF; k++)
1669 m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k;
1670
1671 // elements
1672 #pragma omp for nowait
1673 for (dim_t k=0; k<getNumElements(); k++)
1674 m_elementId[k]=k;
1675
1676 // face elements
1677 #pragma omp for
1678 for (dim_t k=0; k<getNumFaceElements(); k++)
1679 m_faceId[k]=k;
1680 } // end parallel section
1681
1682 m_nodeTags.assign(getNumNodes(), 0);
1683 updateTagsInUse(Nodes);
1684
1685 m_elementTags.assign(getNumElements(), 0);
1686 updateTagsInUse(Elements);
1687
1688 // generate face offset vector and set face tags
1689 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1690 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1691 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1692 m_faceOffset.assign(facesPerEdge.size(), -1);
1693 m_faceTags.clear();
1694 index_t offset=0;
1695 for (size_t i=0; i<facesPerEdge.size(); i++) {
1696 if (facesPerEdge[i]>0) {
1697 m_faceOffset[i]=offset;
1698 offset+=facesPerEdge[i];
1699 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1700 }
1701 }
1702 setTagMap("left", LEFT);
1703 setTagMap("right", RIGHT);
1704 setTagMap("bottom", BOTTOM);
1705 setTagMap("top", TOP);
1706 setTagMap("front", FRONT);
1707 setTagMap("back", BACK);
1708 updateTagsInUse(FaceElements);
1709 }
1710
1711 //private
1712 int Brick::insertNeighbours(IndexVector& index, index_t node) const
1713 {
1714 const index_t nDOF0 = (m_gNE0+1)/m_NX;
1715 const index_t nDOF1 = (m_gNE1+1)/m_NY;
1716 const index_t nDOF2 = (m_gNE2+1)/m_NZ;
1717 const int x=node%nDOF0;
1718 const int y=node%(nDOF0*nDOF1)/nDOF0;
1719 const int z=node/(nDOF0*nDOF1);
1720 int num=0;
1721 // loop through potential neighbours and add to index if positions are
1722 // within bounds
1723 for (int i2=-1; i2<2; i2++) {
1724 for (int i1=-1; i1<2; i1++) {
1725 for (int i0=-1; i0<2; i0++) {
1726 // skip node itself
1727 if (i0==0 && i1==0 && i2==0)
1728 continue;
1729 // location of neighbour node
1730 const int nx=x+i0;
1731 const int ny=y+i1;
1732 const int nz=z+i2;
1733 if (nx>=0 && ny>=0 && nz>=0
1734 && nx<nDOF0 && ny<nDOF1 && nz<nDOF2) {
1735 index.push_back(nz*nDOF0*nDOF1+ny*nDOF0+nx);
1736 num++;
1737 }
1738 }
1739 }
1740 }
1741
1742 return num;
1743 }
1744
1745 //protected
1746 void Brick::addToSystemMatrix(Paso_SystemMatrix* mat,
1747 const IndexVector& nodes_Eq, dim_t num_Eq, const IndexVector& nodes_Sol,
1748 dim_t num_Sol, const vector<double>& array) const
1749 {
1750 const dim_t numMyCols = mat->pattern->mainPattern->numInput;
1751 const dim_t numMyRows = mat->pattern->mainPattern->numOutput;
1752
1753 const index_t* mainBlock_ptr = mat->mainBlock->pattern->ptr;
1754 const index_t* mainBlock_index = mat->mainBlock->pattern->index;
1755 double* mainBlock_val = mat->mainBlock->val;
1756 const index_t* col_coupleBlock_ptr = mat->col_coupleBlock->pattern->ptr;
1757 const index_t* col_coupleBlock_index = mat->col_coupleBlock->pattern->index;
1758 double* col_coupleBlock_val = mat->col_coupleBlock->val;
1759 const index_t* row_coupleBlock_ptr = mat->row_coupleBlock->pattern->ptr;
1760 const index_t* row_coupleBlock_index = mat->row_coupleBlock->pattern->index;
1761 double* row_coupleBlock_val = mat->row_coupleBlock->val;
1762
1763 for (dim_t k_Eq = 0; k_Eq < nodes_Eq.size(); ++k_Eq) {
1764 // down columns of array
1765 const dim_t j_Eq = nodes_Eq[k_Eq];
1766 const dim_t i_row = j_Eq;
1767 //printf("row:%d\n", i_row);
1768 // only look at the matrix rows stored on this processor
1769 if (i_row < numMyRows) {
1770 for (dim_t k_Sol = 0; k_Sol < nodes_Sol.size(); ++k_Sol) {
1771 const dim_t i_col = nodes_Sol[k_Sol];
1772 if (i_col < numMyCols) {
1773 for (dim_t k = mainBlock_ptr[i_row]; k < mainBlock_ptr[i_row + 1]; ++k) {
1774 if (mainBlock_index[k] == i_col) {
1775 mainBlock_val[k] += array[INDEX2(k_Eq, k_Sol, nodes_Eq.size())];
1776 break;
1777 }
1778 }
1779 } else {
1780 for (dim_t k = col_coupleBlock_ptr[i_row]; k < col_coupleBlock_ptr[i_row + 1]; ++k) {
1781 //cout << "col:" << i_col-numMyCols << " colIdx:" << col_coupleBlock_index[k] << endl;
1782 if (col_coupleBlock_index[k] == i_col - numMyCols) {
1783 col_coupleBlock_val[k] += array[INDEX2(k_Eq, k_Sol, nodes_Eq.size())];
1784 break;
1785 }
1786 }
1787 }
1788 }
1789 } else {
1790 for (dim_t k_Sol = 0; k_Sol < nodes_Sol.size(); ++k_Sol) {
1791 // across rows of array
1792 const dim_t i_col = nodes_Sol[k_Sol];
1793 if (i_col < numMyCols) {
1794 for (dim_t k = row_coupleBlock_ptr[i_row - numMyRows];
1795 k < row_coupleBlock_ptr[i_row - numMyRows + 1]; ++k)
1796 {
1797 //cout << "col:" << i_col << " rowIdx:" << row_coupleBlock_index[k] << endl;
1798 if (row_coupleBlock_index[k] == i_col) {
1799 row_coupleBlock_val[k] += array[INDEX2(k_Eq, k_Sol, nodes_Eq.size())];
1800 break;
1801 }
1802 }
1803 }
1804 }
1805 }
1806 }
1807 }
1808
1809 //protected
1810 void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in,
1811 bool reduced) const
1812 {
1813 const dim_t numComp = in.getDataPointSize();
1814 if (reduced) {
1815 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1816 const double c0 = .125;
1817 #pragma omp parallel for
1818 for (index_t k2=0; k2 < m_NE2; ++k2) {
1819 for (index_t k1=0; k1 < m_NE1; ++k1) {
1820 for (index_t k0=0; k0 < m_NE0; ++k0) {
1821 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1822 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1823 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1824 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1825 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1826 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1827 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1828 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1829 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1830 for (index_t i=0; i < numComp; ++i) {
1831 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i] + f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1832 } /* end of component loop i */
1833 } /* end of k0 loop */
1834 } /* end of k1 loop */
1835 } /* end of k2 loop */
1836 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1837 } else {
1838 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1839 const double c0 = .0094373878376559314545;
1840 const double c1 = .035220810900864519624;
1841 const double c2 = .13144585576580214704;
1842 const double c3 = .49056261216234406855;
1843 #pragma omp parallel for
1844 for (index_t k2=0; k2 < m_NE2; ++k2) {
1845 for (index_t k1=0; k1 < m_NE1; ++k1) {
1846 for (index_t k0=0; k0 < m_NE0; ++k0) {
1847 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1848 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1849 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1850 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1851 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1852 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1853 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1854 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1855 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1856 for (index_t i=0; i < numComp; ++i) {
1857 o[INDEX2(i,numComp,0)] = f_000[i]*c3 + f_111[i]*c0 + c2*(f_001[i] + f_010[i] + f_100[i]) + c1*(f_011[i] + f_101[i] + f_110[i]);
1858 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_100[i]*c3 + c2*(f_000[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_111[i]);
1859 o[INDEX2(i,numComp,2)] = f_010[i]*c3 + f_101[i]*c0 + c2*(f_000[i] + f_011[i] + f_110[i]) + c1*(f_001[i] + f_100[i] + f_111[i]);
1860 o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_110[i]*c3 + c2*(f_010[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_101[i]);
1861 o[INDEX2(i,numComp,4)] = f_001[i]*c3 + f_110[i]*c0 + c2*(f_000[i] + f_011[i] + f_101[i]) + c1*(f_010[i] + f_100[i] + f_111[i]);
1862 o[INDEX2(i,numComp,5)] = f_010[i]*c0 + f_101[i]*c3 + c2*(f_001[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_110[i]);
1863 o[INDEX2(i,numComp,6)] = f_011[i]*c3 + f_100[i]*c0 + c2*(f_001[i] + f_010[i] + f_111[i]) + c1*(f_000[i] + f_101[i] + f_110[i]);
1864 o[INDEX2(i,numComp,7)] = f_000[i]*c0 + f_111[i]*c3 + c2*(f_011[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_100[i]);
1865 } /* end of component loop i */
1866 } /* end of k0 loop */
1867 } /* end of k1 loop */
1868 } /* end of k2 loop */
1869 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1870 }
1871 }
1872
1873 //protected
1874 void Brick::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1875 bool reduced) const
1876 {
1877 const dim_t numComp = in.getDataPointSize();
1878 if (reduced) {
1879 const double c0 = .25;
1880 #pragma omp parallel
1881 {
1882 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1883 if (m_faceOffset[0] > -1) {
1884 #pragma omp for nowait
1885 for (index_t k2=0; k2 < m_NE2; ++k2) {
1886 for (index_t k1=0; k1 < m_NE1; ++k1) {
1887 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1888 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1889 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1890 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1891 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1892 for (index_t i=0; i < numComp; ++i) {
1893 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]);
1894 } /* end of component loop i */
1895 } /* end of k1 loop */
1896 } /* end of k2 loop */
1897 } /* end of face 0 */
1898 if (m_faceOffset[1] > -1) {
1899 #pragma omp for nowait
1900 for (index_t k2=0; k2 < m_NE2; ++k2) {
1901 for (index_t k1=0; k1 < m_NE1; ++k1) {
1902 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1903 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1904 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1905 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0