/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3755 - (show annotations)
Thu Jan 5 06:51:31 2012 UTC (7 years, 3 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Brick.cpp
File size: 382648 byte(s)
Made element IDs globally unique so ownSample can work (to be implemented).
Fixed couple block in 3D so PDEs seem to work now with MPI :-)

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Brick.h>
15 extern "C" {
16 #include "paso/SystemMatrix.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33 int d1, int d2) :
34 RipleyDomain(3),
35 m_gNE0(n0),
36 m_gNE1(n1),
37 m_gNE2(n2),
38 m_l0(l0),
39 m_l1(l1),
40 m_l2(l2),
41 m_NX(d0),
42 m_NY(d1),
43 m_NZ(d2)
44 {
45 // ensure number of subdivisions is valid and nodes can be distributed
46 // among number of ranks
47 if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48 throw RipleyException("Invalid number of spatial subdivisions");
49
50 if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0 || (n2+1)%m_NZ > 0)
51 throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension");
52
53 if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2) || (m_NZ > 1 && (n2+1)/m_NZ<2))
54 throw RipleyException("Too few elements for the number of ranks");
55
56 // local number of elements (including overlap)
57 m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0);
58 if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1)
59 m_NE0++;
60 m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1);
61 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX>0 && m_mpiInfo->rank%(m_NX*m_NY)/m_NX<m_NY-1)
62 m_NE1++;
63 m_NE2 = (m_NZ>1 ? (n2+1)/m_NZ : n2);
64 if (m_mpiInfo->rank/(m_NX*m_NY)>0 && m_mpiInfo->rank/(m_NX*m_NY)<m_NZ-1)
65 m_NE2++;
66
67 // local number of nodes
68 m_N0 = m_NE0+1;
69 m_N1 = m_NE1+1;
70 m_N2 = m_NE2+1;
71
72 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
73 m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX);
74 if (m_offset0 > 0)
75 m_offset0--;
76 m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
77 if (m_offset1 > 0)
78 m_offset1--;
79 m_offset2 = (n2+1)/m_NZ*(m_mpiInfo->rank/(m_NX*m_NY));
80 if (m_offset2 > 0)
81 m_offset2--;
82
83 populateSampleIds();
84 }
85
86
87 Brick::~Brick()
88 {
89 }
90
91 string Brick::getDescription() const
92 {
93 return "ripley::Brick";
94 }
95
96 bool Brick::operator==(const AbstractDomain& other) const
97 {
98 const Brick* o=dynamic_cast<const Brick*>(&other);
99 if (o) {
100 return (RipleyDomain::operator==(other) &&
101 m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 && m_gNE2==o->m_gNE2
102 && m_l0==o->m_l0 && m_l1==o->m_l1 && m_l2==o->m_l2
103 && m_NX==o->m_NX && m_NY==o->m_NY && m_NZ==o->m_NZ);
104 }
105
106 return false;
107 }
108
109 void Brick::dump(const string& fileName) const
110 {
111 #if USE_SILO
112 string fn(fileName);
113 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
114 fn+=".silo";
115 }
116
117 const int NUM_SILO_FILES = 1;
118 const char* blockDirFmt = "/block%04d";
119 int driver=DB_HDF5;
120 string siloPath;
121 DBfile* dbfile = NULL;
122
123 #ifdef ESYS_MPI
124 PMPIO_baton_t* baton = NULL;
125 #endif
126
127 if (m_mpiInfo->size > 1) {
128 #ifdef ESYS_MPI
129 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
130 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
131 PMPIO_DefaultClose, (void*)&driver);
132 // try the fallback driver in case of error
133 if (!baton && driver != DB_PDB) {
134 driver = DB_PDB;
135 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
136 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
137 PMPIO_DefaultClose, (void*)&driver);
138 }
139 if (baton) {
140 char str[64];
141 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
142 siloPath = str;
143 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
144 }
145 #endif
146 } else {
147 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
148 getDescription().c_str(), driver);
149 // try the fallback driver in case of error
150 if (!dbfile && driver != DB_PDB) {
151 driver = DB_PDB;
152 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
153 getDescription().c_str(), driver);
154 }
155 }
156
157 if (!dbfile)
158 throw RipleyException("dump: Could not create Silo file");
159
160 /*
161 if (driver==DB_HDF5) {
162 // gzip level 1 already provides good compression with minimal
163 // performance penalty. Some tests showed that gzip levels >3 performed
164 // rather badly on escript data both in terms of time and space
165 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
166 }
167 */
168
169 boost::scoped_ptr<double> x(new double[m_N0]);
170 boost::scoped_ptr<double> y(new double[m_N1]);
171 boost::scoped_ptr<double> z(new double[m_N2]);
172 double* coords[3] = { x.get(), y.get(), z.get() };
173 pair<double,double> xdx = getFirstCoordAndSpacing(0);
174 pair<double,double> ydy = getFirstCoordAndSpacing(1);
175 pair<double,double> zdz = getFirstCoordAndSpacing(2);
176 #pragma omp parallel
177 {
178 #pragma omp for
179 for (dim_t i0 = 0; i0 < m_N0; i0++) {
180 coords[0][i0]=xdx.first+i0*xdx.second;
181 }
182 #pragma omp for
183 for (dim_t i1 = 0; i1 < m_N1; i1++) {
184 coords[1][i1]=ydy.first+i1*ydy.second;
185 }
186 #pragma omp for
187 for (dim_t i2 = 0; i2 < m_N2; i2++) {
188 coords[2][i2]=zdz.first+i2*zdz.second;
189 }
190 }
191 IndexVector dims = getNumNodesPerDim();
192 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
193 DB_COLLINEAR, NULL);
194
195 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
196 NULL, 0, DB_INT, DB_NODECENT, NULL);
197
198 // write element ids
199 dims = getNumElementsPerDim();
200 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
201 &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
202
203 // rank 0 writes multimesh and multivar
204 if (m_mpiInfo->rank == 0) {
205 vector<string> tempstrings;
206 vector<char*> names;
207 for (dim_t i=0; i<m_mpiInfo->size; i++) {
208 stringstream path;
209 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
210 tempstrings.push_back(path.str());
211 names.push_back((char*)tempstrings.back().c_str());
212 }
213 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
214 DBSetDir(dbfile, "/");
215 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
216 &types[0], NULL);
217 tempstrings.clear();
218 names.clear();
219 for (dim_t i=0; i<m_mpiInfo->size; i++) {
220 stringstream path;
221 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
222 tempstrings.push_back(path.str());
223 names.push_back((char*)tempstrings.back().c_str());
224 }
225 types.assign(m_mpiInfo->size, DB_QUADVAR);
226 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
227 &types[0], NULL);
228 tempstrings.clear();
229 names.clear();
230 for (dim_t i=0; i<m_mpiInfo->size; i++) {
231 stringstream path;
232 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
233 tempstrings.push_back(path.str());
234 names.push_back((char*)tempstrings.back().c_str());
235 }
236 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
237 &types[0], NULL);
238 }
239
240 if (m_mpiInfo->size > 1) {
241 #ifdef ESYS_MPI
242 PMPIO_HandOffBaton(baton, dbfile);
243 PMPIO_Finish(baton);
244 #endif
245 } else {
246 DBClose(dbfile);
247 }
248
249 #else // USE_SILO
250 throw RipleyException("dump(): no Silo support");
251 #endif
252 }
253
254 const int* Brick::borrowSampleReferenceIDs(int fsType) const
255 {
256 switch (fsType) {
257 case Nodes:
258 case ReducedNodes: //FIXME: reduced
259 return &m_nodeId[0];
260 case DegreesOfFreedom: //FIXME
261 case ReducedDegreesOfFreedom: //FIXME
262 return &m_dofId[0];
263 case Elements:
264 case ReducedElements:
265 return &m_elementId[0];
266 case ReducedFaceElements:
267 return &m_faceId[0];
268 default:
269 break;
270 }
271
272 stringstream msg;
273 msg << "borrowSampleReferenceIDs() not implemented for function space type "
274 << fsType;
275 throw RipleyException(msg.str());
276 }
277
278 bool Brick::ownSample(int fsCode, index_t id) const
279 {
280 #ifdef ESYS_MPI
281 if (fsCode == Nodes) {
282 const index_t left = (m_offset0==0 ? 0 : 1);
283 const index_t bottom = (m_offset1==0 ? 0 : 1);
284 const index_t front = (m_offset2==0 ? 0 : 1);
285 const index_t right = (m_mpiInfo->rank%m_NX==m_NX-1 ? m_N0 : m_N0-1);
286 const index_t top = (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1 ? m_N1 : m_N1-1);
287 const index_t back = (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1 ? m_N2 : m_N2-1);
288 const index_t x=id%m_N0;
289 const index_t y=id%(m_N0*m_N1)/m_N0;
290 const index_t z=id/(m_N0*m_N1);
291 return (x>=left && x<right && y>=bottom && y<top && z>=front && z<back);
292
293 } else {
294 stringstream msg;
295 msg << "ownSample() not implemented for "
296 << functionSpaceTypeAsString(fsCode);
297 throw RipleyException(msg.str());
298 }
299 #else
300 return true;
301 #endif
302 }
303
304 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
305 {
306 escript::Data& in = *const_cast<escript::Data*>(&cIn);
307 const dim_t numComp = in.getDataPointSize();
308 const double h0 = m_l0/m_gNE0;
309 const double h1 = m_l1/m_gNE1;
310 const double h2 = m_l1/m_gNE2;
311 const double C0 = .044658198738520451079;
312 const double C1 = .16666666666666666667;
313 const double C2 = .21132486540518711775;
314 const double C3 = .25;
315 const double C4 = .5;
316 const double C5 = .62200846792814621559;
317 const double C6 = .78867513459481288225;
318
319 if (out.getFunctionSpace().getTypeCode() == Elements) {
320 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
321 #pragma omp parallel for
322 for (index_t k2=0; k2 < m_NE2; ++k2) {
323 for (index_t k1=0; k1 < m_NE1; ++k1) {
324 for (index_t k0=0; k0 < m_NE0; ++k0) {
325 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
326 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
327 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
328 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
329 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
330 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
331 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
332 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
333 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
334 for (index_t i=0; i < numComp; ++i) {
335 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
336 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
337 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
338 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
339 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
340 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
341 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
342 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
343 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
344 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
345 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
346 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
347 o[INDEX3(i,0,0,numComp,3)] = V0;
348 o[INDEX3(i,1,0,numComp,3)] = V4;
349 o[INDEX3(i,2,0,numComp,3)] = V8;
350 o[INDEX3(i,0,1,numComp,3)] = V0;
351 o[INDEX3(i,1,1,numComp,3)] = V5;
352 o[INDEX3(i,2,1,numComp,3)] = V9;
353 o[INDEX3(i,0,2,numComp,3)] = V1;
354 o[INDEX3(i,1,2,numComp,3)] = V4;
355 o[INDEX3(i,2,2,numComp,3)] = V10;
356 o[INDEX3(i,0,3,numComp,3)] = V1;
357 o[INDEX3(i,1,3,numComp,3)] = V5;
358 o[INDEX3(i,2,3,numComp,3)] = V11;
359 o[INDEX3(i,0,4,numComp,3)] = V2;
360 o[INDEX3(i,1,4,numComp,3)] = V6;
361 o[INDEX3(i,2,4,numComp,3)] = V8;
362 o[INDEX3(i,0,5,numComp,3)] = V2;
363 o[INDEX3(i,1,5,numComp,3)] = V7;
364 o[INDEX3(i,2,5,numComp,3)] = V9;
365 o[INDEX3(i,0,6,numComp,3)] = V3;
366 o[INDEX3(i,1,6,numComp,3)] = V6;
367 o[INDEX3(i,2,6,numComp,3)] = V10;
368 o[INDEX3(i,0,7,numComp,3)] = V3;
369 o[INDEX3(i,1,7,numComp,3)] = V7;
370 o[INDEX3(i,2,7,numComp,3)] = V11;
371 } /* end of component loop i */
372 } /* end of k0 loop */
373 } /* end of k1 loop */
374 } /* end of k2 loop */
375 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
376 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
377 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
378 #pragma omp parallel for
379 for (index_t k2=0; k2 < m_NE2; ++k2) {
380 for (index_t k1=0; k1 < m_NE1; ++k1) {
381 for (index_t k0=0; k0 < m_NE0; ++k0) {
382 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
383 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
384 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
385 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
386 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
387 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
388 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
389 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
390 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
391 for (index_t i=0; i < numComp; ++i) {
392 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
393 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
394 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
395 } /* end of component loop i */
396 } /* end of k0 loop */
397 } /* end of k1 loop */
398 } /* end of k2 loop */
399 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
400 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
401 /*** GENERATOR SNIP_GRAD_FACES TOP */
402 #pragma omp parallel
403 {
404 if (m_faceOffset[0] > -1) {
405 #pragma omp for nowait
406 for (index_t k2=0; k2 < m_NE2; ++k2) {
407 for (index_t k1=0; k1 < m_NE1; ++k1) {
408 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
409 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
410 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
411 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
412 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
413 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
414 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
415 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
416 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
417 for (index_t i=0; i < numComp; ++i) {
418 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
419 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
420 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
421 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
422 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
423 o[INDEX3(i,1,0,numComp,3)] = V0;
424 o[INDEX3(i,2,0,numComp,3)] = V2;
425 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
426 o[INDEX3(i,1,1,numComp,3)] = V0;
427 o[INDEX3(i,2,1,numComp,3)] = V3;
428 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
429 o[INDEX3(i,1,2,numComp,3)] = V1;
430 o[INDEX3(i,2,2,numComp,3)] = V2;
431 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
432 o[INDEX3(i,1,3,numComp,3)] = V1;
433 o[INDEX3(i,2,3,numComp,3)] = V3;
434 } /* end of component loop i */
435 } /* end of k1 loop */
436 } /* end of k2 loop */
437 } /* end of face 0 */
438 if (m_faceOffset[1] > -1) {
439 #pragma omp for nowait
440 for (index_t k2=0; k2 < m_NE2; ++k2) {
441 for (index_t k1=0; k1 < m_NE1; ++k1) {
442 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
443 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
444 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
445 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
446 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
447 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
448 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
449 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
450 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
451 for (index_t i=0; i < numComp; ++i) {
452 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
453 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
454 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
455 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
456 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
457 o[INDEX3(i,1,0,numComp,3)] = V0;
458 o[INDEX3(i,2,0,numComp,3)] = V2;
459 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
460 o[INDEX3(i,1,1,numComp,3)] = V0;
461 o[INDEX3(i,2,1,numComp,3)] = V3;
462 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
463 o[INDEX3(i,1,2,numComp,3)] = V1;
464 o[INDEX3(i,2,2,numComp,3)] = V2;
465 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
466 o[INDEX3(i,1,3,numComp,3)] = V1;
467 o[INDEX3(i,2,3,numComp,3)] = V3;
468 } /* end of component loop i */
469 } /* end of k1 loop */
470 } /* end of k2 loop */
471 } /* end of face 1 */
472 if (m_faceOffset[2] > -1) {
473 #pragma omp for nowait
474 for (index_t k2=0; k2 < m_NE2; ++k2) {
475 for (index_t k0=0; k0 < m_NE0; ++k0) {
476 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
477 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
478 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
479 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
480 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
481 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
482 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
483 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
484 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
485 for (index_t i=0; i < numComp; ++i) {
486 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
487 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
488 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
489 o[INDEX3(i,0,0,numComp,3)] = V0;
490 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
491 o[INDEX3(i,2,0,numComp,3)] = V1;
492 o[INDEX3(i,0,1,numComp,3)] = V0;
493 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
494 o[INDEX3(i,2,1,numComp,3)] = V2;
495 o[INDEX3(i,0,2,numComp,3)] = V0;
496 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
497 o[INDEX3(i,2,2,numComp,3)] = V1;
498 o[INDEX3(i,0,3,numComp,3)] = V0;
499 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
500 o[INDEX3(i,2,3,numComp,3)] = V2;
501 } /* end of component loop i */
502 } /* end of k0 loop */
503 } /* end of k2 loop */
504 } /* end of face 2 */
505 if (m_faceOffset[3] > -1) {
506 #pragma omp for nowait
507 for (index_t k2=0; k2 < m_NE2; ++k2) {
508 for (index_t k0=0; k0 < m_NE0; ++k0) {
509 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
510 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
511 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
512 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
513 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
514 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
515 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
516 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
517 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
518 for (index_t i=0; i < numComp; ++i) {
519 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
520 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
521 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
522 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
523 o[INDEX3(i,0,0,numComp,3)] = V0;
524 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
525 o[INDEX3(i,2,0,numComp,3)] = V2;
526 o[INDEX3(i,0,1,numComp,3)] = V0;
527 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
528 o[INDEX3(i,2,1,numComp,3)] = V3;
529 o[INDEX3(i,0,2,numComp,3)] = V1;
530 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
531 o[INDEX3(i,2,2,numComp,3)] = V2;
532 o[INDEX3(i,0,3,numComp,3)] = V1;
533 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
534 o[INDEX3(i,2,3,numComp,3)] = V3;
535 } /* end of component loop i */
536 } /* end of k0 loop */
537 } /* end of k2 loop */
538 } /* end of face 3 */
539 if (m_faceOffset[4] > -1) {
540 #pragma omp for nowait
541 for (index_t k1=0; k1 < m_NE1; ++k1) {
542 for (index_t k0=0; k0 < m_NE0; ++k0) {
543 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
544 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
545 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
546 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
547 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
548 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
549 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
550 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
551 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
552 for (index_t i=0; i < numComp; ++i) {
553 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
554 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
555 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
556 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
557 o[INDEX3(i,0,0,numComp,3)] = V0;
558 o[INDEX3(i,1,0,numComp,3)] = V2;
559 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
560 o[INDEX3(i,0,1,numComp,3)] = V0;
561 o[INDEX3(i,1,1,numComp,3)] = V3;
562 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
563 o[INDEX3(i,0,2,numComp,3)] = V1;
564 o[INDEX3(i,1,2,numComp,3)] = V2;
565 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
566 o[INDEX3(i,0,3,numComp,3)] = V1;
567 o[INDEX3(i,1,3,numComp,3)] = V3;
568 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
569 } /* end of component loop i */
570 } /* end of k0 loop */
571 } /* end of k1 loop */
572 } /* end of face 4 */
573 if (m_faceOffset[5] > -1) {
574 #pragma omp for nowait
575 for (index_t k1=0; k1 < m_NE1; ++k1) {
576 for (index_t k0=0; k0 < m_NE0; ++k0) {
577 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
578 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
579 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
580 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
581 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
582 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
583 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
584 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
585 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
586 for (index_t i=0; i < numComp; ++i) {
587 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
588 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
589 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
590 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
591 o[INDEX3(i,0,0,numComp,3)] = V0;
592 o[INDEX3(i,1,0,numComp,3)] = V2;
593 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
594 o[INDEX3(i,0,1,numComp,3)] = V0;
595 o[INDEX3(i,1,1,numComp,3)] = V3;
596 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
597 o[INDEX3(i,0,2,numComp,3)] = V1;
598 o[INDEX3(i,1,2,numComp,3)] = V2;
599 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
600 o[INDEX3(i,0,3,numComp,3)] = V1;
601 o[INDEX3(i,1,3,numComp,3)] = V3;
602 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
603 } /* end of component loop i */
604 } /* end of k0 loop */
605 } /* end of k1 loop */
606 } /* end of face 5 */
607 } // end of parallel section
608 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
609 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
610 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
611 #pragma omp parallel
612 {
613 if (m_faceOffset[0] > -1) {
614 #pragma omp for nowait
615 for (index_t k2=0; k2 < m_NE2; ++k2) {
616 for (index_t k1=0; k1 < m_NE1; ++k1) {
617 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
618 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
619 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
620 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
621 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
622 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
623 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
624 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
625 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
626 for (index_t i=0; i < numComp; ++i) {
627 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
628 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
629 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
630 } /* end of component loop i */
631 } /* end of k1 loop */
632 } /* end of k2 loop */
633 } /* end of face 0 */
634 if (m_faceOffset[1] > -1) {
635 #pragma omp for nowait
636 for (index_t k2=0; k2 < m_NE2; ++k2) {
637 for (index_t k1=0; k1 < m_NE1; ++k1) {
638 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
639 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
640 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
641 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
642 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
643 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
644 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
645 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
646 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
647 for (index_t i=0; i < numComp; ++i) {
648 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
649 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
650 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
651 } /* end of component loop i */
652 } /* end of k1 loop */
653 } /* end of k2 loop */
654 } /* end of face 1 */
655 if (m_faceOffset[2] > -1) {
656 #pragma omp for nowait
657 for (index_t k2=0; k2 < m_NE2; ++k2) {
658 for (index_t k0=0; k0 < m_NE0; ++k0) {
659 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
660 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
661 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
662 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
663 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
664 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
665 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
666 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
667 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
668 for (index_t i=0; i < numComp; ++i) {
669 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
670 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
671 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
672 } /* end of component loop i */
673 } /* end of k0 loop */
674 } /* end of k2 loop */
675 } /* end of face 2 */
676 if (m_faceOffset[3] > -1) {
677 #pragma omp for nowait
678 for (index_t k2=0; k2 < m_NE2; ++k2) {
679 for (index_t k0=0; k0 < m_NE0; ++k0) {
680 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
681 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
682 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
683 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
684 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
685 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
686 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
687 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
688 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
689 for (index_t i=0; i < numComp; ++i) {
690 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
691 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
692 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
693 } /* end of component loop i */
694 } /* end of k0 loop */
695 } /* end of k2 loop */
696 } /* end of face 3 */
697 if (m_faceOffset[4] > -1) {
698 #pragma omp for nowait
699 for (index_t k1=0; k1 < m_NE1; ++k1) {
700 for (index_t k0=0; k0 < m_NE0; ++k0) {
701 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
702 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
703 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
704 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
705 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
706 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
707 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
708 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
709 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
710 for (index_t i=0; i < numComp; ++i) {
711 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
712 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
713 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
714 } /* end of component loop i */
715 } /* end of k0 loop */
716 } /* end of k1 loop */
717 } /* end of face 4 */
718 if (m_faceOffset[5] > -1) {
719 #pragma omp for nowait
720 for (index_t k1=0; k1 < m_NE1; ++k1) {
721 for (index_t k0=0; k0 < m_NE0; ++k0) {
722 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
723 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
724 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
725 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
726 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
727 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
728 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
729 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
730 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
731 for (index_t i=0; i < numComp; ++i) {
732 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
733 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
734 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
735 } /* end of component loop i */
736 } /* end of k0 loop */
737 } /* end of k1 loop */
738 } /* end of face 5 */
739 } // end of parallel section
740 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
741 } else {
742 stringstream msg;
743 msg << "setToGradient() not implemented for "
744 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
745 throw RipleyException(msg.str());
746 }
747 }
748
749 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
750 {
751 escript::Data& in = *const_cast<escript::Data*>(&arg);
752 const dim_t numComp = in.getDataPointSize();
753 const double h0 = m_l0/m_gNE0;
754 const double h1 = m_l1/m_gNE1;
755 const double h2 = m_l2/m_gNE2;
756 if (arg.getFunctionSpace().getTypeCode() == Elements) {
757 const double w_0 = h0*h1*h2/8.;
758 #pragma omp parallel
759 {
760 vector<double> int_local(numComp, 0);
761 #pragma omp for nowait
762 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
763 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
764 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
765 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
766 for (index_t i=0; i < numComp; ++i) {
767 const register double f_0 = f[INDEX2(i,0,numComp)];
768 const register double f_1 = f[INDEX2(i,1,numComp)];
769 const register double f_2 = f[INDEX2(i,2,numComp)];
770 const register double f_3 = f[INDEX2(i,3,numComp)];
771 const register double f_4 = f[INDEX2(i,4,numComp)];
772 const register double f_5 = f[INDEX2(i,5,numComp)];
773 const register double f_6 = f[INDEX2(i,6,numComp)];
774 const register double f_7 = f[INDEX2(i,7,numComp)];
775 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
776 } /* end of component loop i */
777 } /* end of k0 loop */
778 } /* end of k1 loop */
779 } /* end of k2 loop */
780
781 #pragma omp critical
782 for (index_t i=0; i<numComp; i++)
783 integrals[i]+=int_local[i];
784 } // end of parallel section
785 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
786 const double w_0 = h0*h1*h2;
787 #pragma omp parallel
788 {
789 vector<double> int_local(numComp, 0);
790 #pragma omp for nowait
791 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
792 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
793 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
794 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
795 for (index_t i=0; i < numComp; ++i) {
796 int_local[i]+=f[i]*w_0;
797 } /* end of component loop i */
798 } /* end of k0 loop */
799 } /* end of k1 loop */
800 } /* end of k2 loop */
801
802 #pragma omp critical
803 for (index_t i=0; i<numComp; i++)
804 integrals[i]+=int_local[i];
805 } // end of parallel section
806 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
807 const double w_0 = h1*h2/4.;
808 const double w_1 = h0*h2/4.;
809 const double w_2 = h0*h1/4.;
810 #pragma omp parallel
811 {
812 vector<double> int_local(numComp, 0);
813 if (m_faceOffset[0] > -1) {
814 #pragma omp for nowait
815 for (index_t k2=0; k2 < m_NE2; ++k2) {
816 for (index_t k1=0; k1 < m_NE1; ++k1) {
817 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
818 for (index_t i=0; i < numComp; ++i) {
819 const register double f_0 = f[INDEX2(i,0,numComp)];
820 const register double f_1 = f[INDEX2(i,1,numComp)];
821 const register double f_2 = f[INDEX2(i,2,numComp)];
822 const register double f_3 = f[INDEX2(i,3,numComp)];
823 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
824 } /* end of component loop i */
825 } /* end of k1 loop */
826 } /* end of k2 loop */
827 }
828
829 if (m_faceOffset[1] > -1) {
830 #pragma omp for nowait
831 for (index_t k2=0; k2 < m_NE2; ++k2) {
832 for (index_t k1=0; k1 < m_NE1; ++k1) {
833 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
834 for (index_t i=0; i < numComp; ++i) {
835 const register double f_0 = f[INDEX2(i,0,numComp)];
836 const register double f_1 = f[INDEX2(i,1,numComp)];
837 const register double f_2 = f[INDEX2(i,2,numComp)];
838 const register double f_3 = f[INDEX2(i,3,numComp)];
839 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
840 } /* end of component loop i */
841 } /* end of k1 loop */
842 } /* end of k2 loop */
843 }
844
845 if (m_faceOffset[2] > -1) {
846 #pragma omp for nowait
847 for (index_t k2=0; k2 < m_NE2; ++k2) {
848 for (index_t k0=0; k0 < m_NE0; ++k0) {
849 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
850 for (index_t i=0; i < numComp; ++i) {
851 const register double f_0 = f[INDEX2(i,0,numComp)];
852 const register double f_1 = f[INDEX2(i,1,numComp)];
853 const register double f_2 = f[INDEX2(i,2,numComp)];
854 const register double f_3 = f[INDEX2(i,3,numComp)];
855 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
856 } /* end of component loop i */
857 } /* end of k1 loop */
858 } /* end of k2 loop */
859 }
860
861 if (m_faceOffset[3] > -1) {
862 #pragma omp for nowait
863 for (index_t k2=0; k2 < m_NE2; ++k2) {
864 for (index_t k0=0; k0 < m_NE0; ++k0) {
865 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
866 for (index_t i=0; i < numComp; ++i) {
867 const register double f_0 = f[INDEX2(i,0,numComp)];
868 const register double f_1 = f[INDEX2(i,1,numComp)];
869 const register double f_2 = f[INDEX2(i,2,numComp)];
870 const register double f_3 = f[INDEX2(i,3,numComp)];
871 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
872 } /* end of component loop i */
873 } /* end of k1 loop */
874 } /* end of k2 loop */
875 }
876
877 if (m_faceOffset[4] > -1) {
878 #pragma omp for nowait
879 for (index_t k1=0; k1 < m_NE1; ++k1) {
880 for (index_t k0=0; k0 < m_NE0; ++k0) {
881 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
882 for (index_t i=0; i < numComp; ++i) {
883 const register double f_0 = f[INDEX2(i,0,numComp)];
884 const register double f_1 = f[INDEX2(i,1,numComp)];
885 const register double f_2 = f[INDEX2(i,2,numComp)];
886 const register double f_3 = f[INDEX2(i,3,numComp)];
887 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
888 } /* end of component loop i */
889 } /* end of k1 loop */
890 } /* end of k2 loop */
891 }
892
893 if (m_faceOffset[5] > -1) {
894 #pragma omp for nowait
895 for (index_t k1=0; k1 < m_NE1; ++k1) {
896 for (index_t k0=0; k0 < m_NE0; ++k0) {
897 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
898 for (index_t i=0; i < numComp; ++i) {
899 const register double f_0 = f[INDEX2(i,0,numComp)];
900 const register double f_1 = f[INDEX2(i,1,numComp)];
901 const register double f_2 = f[INDEX2(i,2,numComp)];
902 const register double f_3 = f[INDEX2(i,3,numComp)];
903 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
904 } /* end of component loop i */
905 } /* end of k1 loop */
906 } /* end of k2 loop */
907 }
908
909 #pragma omp critical
910 for (index_t i=0; i<numComp; i++)
911 integrals[i]+=int_local[i];
912 } // end of parallel section
913
914 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
915 const double w_0 = h1*h2;
916 const double w_1 = h0*h2;
917 const double w_2 = h0*h1;
918 #pragma omp parallel
919 {
920 vector<double> int_local(numComp, 0);
921 if (m_faceOffset[0] > -1) {
922 #pragma omp for nowait
923 for (index_t k2=0; k2 < m_NE2; ++k2) {
924 for (index_t k1=0; k1 < m_NE1; ++k1) {
925 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
926 for (index_t i=0; i < numComp; ++i) {
927 int_local[i]+=f[i]*w_0;
928 } /* end of component loop i */
929 } /* end of k1 loop */
930 } /* end of k2 loop */
931 }
932
933 if (m_faceOffset[1] > -1) {
934 #pragma omp for nowait
935 for (index_t k2=0; k2 < m_NE2; ++k2) {
936 for (index_t k1=0; k1 < m_NE1; ++k1) {
937 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
938 for (index_t i=0; i < numComp; ++i) {
939 int_local[i]+=f[i]*w_0;
940 } /* end of component loop i */
941 } /* end of k1 loop */
942 } /* end of k2 loop */
943 }
944
945 if (m_faceOffset[2] > -1) {
946 #pragma omp for nowait
947 for (index_t k2=0; k2 < m_NE2; ++k2) {
948 for (index_t k0=0; k0 < m_NE0; ++k0) {
949 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
950 for (index_t i=0; i < numComp; ++i) {
951 int_local[i]+=f[i]*w_1;
952 } /* end of component loop i */
953 } /* end of k1 loop */
954 } /* end of k2 loop */
955 }
956
957 if (m_faceOffset[3] > -1) {
958 #pragma omp for nowait
959 for (index_t k2=0; k2 < m_NE2; ++k2) {
960 for (index_t k0=0; k0 < m_NE0; ++k0) {
961 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
962 for (index_t i=0; i < numComp; ++i) {
963 int_local[i]+=f[i]*w_1;
964 } /* end of component loop i */
965 } /* end of k1 loop */
966 } /* end of k2 loop */
967 }
968
969 if (m_faceOffset[4] > -1) {
970 #pragma omp for nowait
971 for (index_t k1=0; k1 < m_NE1; ++k1) {
972 for (index_t k0=0; k0 < m_NE0; ++k0) {
973 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
974 for (index_t i=0; i < numComp; ++i) {
975 int_local[i]+=f[i]*w_2;
976 } /* end of component loop i */
977 } /* end of k1 loop */
978 } /* end of k2 loop */
979 }
980
981 if (m_faceOffset[5] > -1) {
982 #pragma omp for nowait
983 for (index_t k1=0; k1 < m_NE1; ++k1) {
984 for (index_t k0=0; k0 < m_NE0; ++k0) {
985 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
986 for (index_t i=0; i < numComp; ++i) {
987 int_local[i]+=f[i]*w_2;
988 } /* end of component loop i */
989 } /* end of k1 loop */
990 } /* end of k2 loop */
991 }
992
993 #pragma omp critical
994 for (index_t i=0; i<numComp; i++)
995 integrals[i]+=int_local[i];
996 } // end of parallel section
997
998 } else {
999 stringstream msg;
1000 msg << "setToIntegrals() not implemented for "
1001 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
1002 throw RipleyException(msg.str());
1003 }
1004 }
1005
1006 void Brick::setToNormal(escript::Data& out) const
1007 {
1008 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1009 #pragma omp parallel
1010 {
1011 if (m_faceOffset[0] > -1) {
1012 #pragma omp for nowait
1013 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1014 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1015 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1016 // set vector at four quadrature points
1017 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1018 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1019 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1020 *o++ = -1.; *o++ = 0.; *o = 0.;
1021 }
1022 }
1023 }
1024
1025 if (m_faceOffset[1] > -1) {
1026 #pragma omp for nowait
1027 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1028 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1029 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1030 // set vector at four quadrature points
1031 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1032 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1033 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1034 *o++ = 1.; *o++ = 0.; *o = 0.;
1035 }
1036 }
1037 }
1038
1039 if (m_faceOffset[2] > -1) {
1040 #pragma omp for nowait
1041 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1042 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1043 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1044 // set vector at four quadrature points
1045 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1046 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1047 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1048 *o++ = 0.; *o++ = -1.; *o = 0.;
1049 }
1050 }
1051 }
1052
1053 if (m_faceOffset[3] > -1) {
1054 #pragma omp for nowait
1055 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1056 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1057 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1058 // set vector at four quadrature points
1059 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1060 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1061 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1062 *o++ = 0.; *o++ = 1.; *o = 0.;
1063 }
1064 }
1065 }
1066
1067 if (m_faceOffset[4] > -1) {
1068 #pragma omp for nowait
1069 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1070 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1071 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1072 // set vector at four quadrature points
1073 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1074 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1075 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1076 *o++ = 0.; *o++ = 0.; *o = -1.;
1077 }
1078 }
1079 }
1080
1081 if (m_faceOffset[5] > -1) {
1082 #pragma omp for nowait
1083 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1084 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1085 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1086 // set vector at four quadrature points
1087 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1088 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1089 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1090 *o++ = 0.; *o++ = 0.; *o = 1.;
1091 }
1092 }
1093 }
1094 } // end of parallel section
1095 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1096 #pragma omp parallel
1097 {
1098 if (m_faceOffset[0] > -1) {
1099 #pragma omp for nowait
1100 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1101 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1102 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1103 *o++ = -1.;
1104 *o++ = 0.;
1105 *o = 0.;
1106 }
1107 }
1108 }
1109
1110 if (m_faceOffset[1] > -1) {
1111 #pragma omp for nowait
1112 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1113 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1114 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1115 *o++ = 1.;
1116 *o++ = 0.;
1117 *o = 0.;
1118 }
1119 }
1120 }
1121
1122 if (m_faceOffset[2] > -1) {
1123 #pragma omp for nowait
1124 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1125 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1126 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1127 *o++ = 0.;
1128 *o++ = -1.;
1129 *o = 0.;
1130 }
1131 }
1132 }
1133
1134 if (m_faceOffset[3] > -1) {
1135 #pragma omp for nowait
1136 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1137 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1138 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1139 *o++ = 0.;
1140 *o++ = 1.;
1141 *o = 0.;
1142 }
1143 }
1144 }
1145
1146 if (m_faceOffset[4] > -1) {
1147 #pragma omp for nowait
1148 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1149 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1150 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1151 *o++ = 0.;
1152 *o++ = 0.;
1153 *o = -1.;
1154 }
1155 }
1156 }
1157
1158 if (m_faceOffset[5] > -1) {
1159 #pragma omp for nowait
1160 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1161 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1162 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1163 *o++ = 0.;
1164 *o++ = 0.;
1165 *o = 1.;
1166 }
1167 }
1168 }
1169 } // end of parallel section
1170
1171 } else {
1172 stringstream msg;
1173 msg << "setToNormal() not implemented for "
1174 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1175 throw RipleyException(msg.str());
1176 }
1177 }
1178
1179 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1180 bool reducedColOrder) const
1181 {
1182 if (reducedRowOrder || reducedColOrder)
1183 throw RipleyException("getPattern() not implemented for reduced order");
1184
1185 // connector
1186 RankVector neighbour;
1187 IndexVector offsetInShared(1,0);
1188 IndexVector sendShared, recvShared;
1189 const IndexVector faces=getNumFacesPerBoundary();
1190 const index_t nDOF0 = (m_gNE0+1)/m_NX;
1191 const index_t nDOF1 = (m_gNE1+1)/m_NY;
1192 const index_t nDOF2 = (m_gNE2+1)/m_NZ;
1193 const int numDOF=nDOF0*nDOF1*nDOF2;
1194 const index_t left = (m_offset0==0 ? 0 : 1);
1195 const index_t bottom = (m_offset1==0 ? 0 : 1);
1196 const index_t front = (m_offset2==0 ? 0 : 1);
1197 vector<IndexVector> colIndices(numDOF); // for the couple blocks
1198 int numShared=0;
1199
1200 m_dofMap.assign(getNumNodes(), 0);
1201 #pragma omp parallel for
1202 for (index_t i=front; i<m_N2; i++) {
1203 for (index_t j=bottom; j<m_N1; j++) {
1204 for (index_t k=left; k<m_N0; k++) {
1205 m_dofMap[i*m_N0*m_N1+j*m_N0+k]=(i-front)*nDOF0*nDOF1+(j-bottom)*nDOF0+k-left;
1206 }
1207 }
1208 }
1209
1210 // build list of shared components and neighbours by looping through
1211 // all potential neighbouring ranks and checking if positions are
1212 // within bounds
1213 const int x=m_mpiInfo->rank%m_NX;
1214 const int y=m_mpiInfo->rank%(m_NX*m_NY)/m_NX;
1215 const int z=m_mpiInfo->rank/(m_NX*m_NY);
1216 for (int i2=-1; i2<2; i2++) {
1217 for (int i1=-1; i1<2; i1++) {
1218 for (int i0=-1; i0<2; i0++) {
1219 // skip rank itself
1220 if (i0==0 && i1==0 && i2==0)
1221 continue;
1222 // location of neighbour rank
1223 const int nx=x+i0;
1224 const int ny=y+i1;
1225 const int nz=z+i2;
1226 if (nx>=0 && ny>=0 && nz>=0 && nx<m_NX && ny<m_NY && nz<m_NZ) {
1227 neighbour.push_back(nz*m_NX*m_NY+ny*m_NX+nx);
1228 if (i0==0 && i1==0) {
1229 // sharing front or back plane
1230 offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF1);
1231 for (dim_t i=0; i<nDOF1; i++) {
1232 const int firstDOF=(i2==-1 ? i*nDOF0
1233 : i*nDOF0 + nDOF0*nDOF1*(nDOF2-1));
1234 const int firstNode=(i2==-1 ? left+(i+bottom)*m_N0
1235 : left+(i+bottom)*m_N0+m_N0*m_N1*(m_N2-1));
1236 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1237 sendShared.push_back(firstDOF+j);
1238 recvShared.push_back(numDOF+numShared);
1239 if (j>0) {
1240 if (i>0)
1241 colIndices[firstDOF+j-1-nDOF0].push_back(numShared);
1242 colIndices[firstDOF+j-1].push_back(numShared);
1243 if (i<nDOF1-1)
1244 colIndices[firstDOF+j-1+nDOF0].push_back(numShared);
1245 }
1246 if (i>0)
1247 colIndices[firstDOF+j-nDOF0].push_back(numShared);
1248 colIndices[firstDOF+j].push_back(numShared);
1249 if (i<nDOF1-1)
1250 colIndices[firstDOF+j+nDOF0].push_back(numShared);
1251 if (j<nDOF0-1) {
1252 if (i>0)
1253 colIndices[firstDOF+j+1-nDOF0].push_back(numShared);
1254 colIndices[firstDOF+j+1].push_back(numShared);
1255 if (i<nDOF1-1)
1256 colIndices[firstDOF+j+1+nDOF0].push_back(numShared);
1257 }
1258 m_dofMap[firstNode+j]=numDOF+numShared;
1259 }
1260 }
1261 } else if (i0==0 && i2==0) {
1262 // sharing top or bottom plane
1263 offsetInShared.push_back(offsetInShared.back()+nDOF0*nDOF2);
1264 for (dim_t i=0; i<nDOF2; i++) {
1265 const int firstDOF=(i1==-1 ? i*nDOF0*nDOF1
1266 : nDOF0*((i+1)*nDOF1-1));
1267 const int firstNode=(i1==-1 ?
1268 left+(i+front)*m_N0*m_N1
1269 : left+m_N0*((i+1+front)*m_N1-1));
1270 for (dim_t j=0; j<nDOF0; j++, numShared++) {
1271 sendShared.push_back(firstDOF+j);
1272 recvShared.push_back(numDOF+numShared);
1273 if (j>0) {
1274 if (i>0)
1275 colIndices[firstDOF+j-1-nDOF0*nDOF1].push_back(numShared);
1276 colIndices[firstDOF+j-1].push_back(numShared);
1277 if (i<nDOF2-1)
1278 colIndices[firstDOF+j-1+nDOF0*nDOF1].push_back(numShared);
1279 }
1280 if (i>0)
1281 colIndices[firstDOF+j-nDOF0*nDOF1].push_back(numShared);
1282 colIndices[firstDOF+j].push_back(numShared);
1283 if (i<nDOF2-1)
1284 colIndices[firstDOF+j+nDOF0*nDOF1].push_back(numShared);
1285 if (j<nDOF0-1) {
1286 if (i>0)
1287 colIndices[firstDOF+j+1-nDOF0*nDOF1].push_back(numShared);
1288 colIndices[firstDOF+j+1].push_back(numShared);
1289 if (i<nDOF2-1)
1290 colIndices[firstDOF+j+1+nDOF0*nDOF1].push_back(numShared);
1291 }
1292 m_dofMap[firstNode+j]=numDOF+numShared;
1293 }
1294 }
1295 } else if (i1==0 && i2==0) {
1296 // sharing left or right plane
1297 offsetInShared.push_back(offsetInShared.back()+nDOF1*nDOF2);
1298 for (dim_t i=0; i<nDOF2; i++) {
1299 const int firstDOF=(i0==-1 ? i*nDOF0*nDOF1
1300 : nDOF0*(1+i*nDOF1)-1);
1301 const int firstNode=(i0==-1 ?
1302 (bottom+(i+front)*m_N1)*m_N0
1303 : (bottom+1+(i+front)*m_N1)*m_N0-1);
1304 for (dim_t j=0; j<nDOF1; j++, numShared++) {
1305 sendShared.push_back(firstDOF+j*nDOF0);
1306 recvShared.push_back(numDOF+numShared);
1307 if (j>0) {
1308 if (i>0)
1309 colIndices[firstDOF+(j-1)*nDOF0-nDOF0*nDOF1].push_back(numShared);
1310 colIndices[firstDOF+(j-1)*nDOF0].push_back(numShared);
1311 if (i<nDOF2-1)
1312 colIndices[firstDOF+(j-1)*nDOF0+nDOF0*nDOF1].push_back(numShared);
1313 }
1314 if (i>0)
1315 colIndices[firstDOF+j*nDOF0-nDOF0*nDOF1].push_back(numShared);
1316 colIndices[firstDOF+j*nDOF0].push_back(numShared);
1317 if (i<nDOF2-1)
1318 colIndices[firstDOF+j*nDOF0+nDOF0*nDOF1].push_back(numShared);
1319 if (j<nDOF1-1) {
1320 if (i>0)
1321 colIndices[firstDOF+(j+1)*nDOF0-nDOF0*nDOF1].push_back(numShared);
1322 colIndices[firstDOF+(j+1)*nDOF0].push_back(numShared);
1323 if (i<nDOF2-1)
1324 colIndices[firstDOF+(j+1)*nDOF0+nDOF0*nDOF1].push_back(numShared);
1325 }
1326 m_dofMap[firstNode+j*m_N0]=numDOF+numShared;
1327 }
1328 }
1329 } else if (i0==0) {
1330 // sharing an edge in x direction
1331 offsetInShared.push_back(offsetInShared.back()+nDOF0);
1332 const int firstDOF=(i1+1)/2*nDOF0*(nDOF1-1)
1333 +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
1334 const int firstNode=(i1+1)/2*m_N0*(m_N1-1)
1335 +(i2+1)/2*m_N0*m_N1*(m_N2-1);
1336 for (dim_t i=0; i<nDOF0; i++, numShared++) {
1337 sendShared.push_back(firstDOF+i);
1338 recvShared.push_back(numDOF+numShared);
1339 if (i>0)
1340 colIndices[firstDOF+i-1].push_back(numShared);
1341 colIndices[firstDOF+i].push_back(numShared);
1342 if (i<nDOF0-1)
1343 colIndices[firstDOF+i+1].push_back(numShared);
1344 m_dofMap[firstNode+i]=numDOF+numShared;
1345 }
1346 } else if (i1==0) {
1347 // sharing an edge in y direction
1348 offsetInShared.push_back(offsetInShared.back()+nDOF1);
1349 const int firstDOF=(i0+1)/2*(nDOF0-1)
1350 +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
1351 const int firstNode=(i0+1)/2*(m_N0-1)
1352 +(i2+1)/2*m_N0*m_N1*(m_N2-1);
1353 for (dim_t i=0; i<nDOF1; i++, numShared++) {
1354 sendShared.push_back(firstDOF+i*nDOF0);
1355 recvShared.push_back(numDOF+numShared);
1356 if (i>0)
1357 colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared);
1358 colIndices[firstDOF+i*nDOF0].push_back(numShared);
1359 if (i<nDOF1-1)
1360 colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared);
1361 m_dofMap[firstNode+i*m_N0]=numDOF+numShared;
1362 }
1363 } else if (i2==0) {
1364 // sharing an edge in z direction
1365 offsetInShared.push_back(offsetInShared.back()+nDOF2);
1366 const int firstDOF=(i0+1)/2*(nDOF0-1)
1367 +(i1+1)/2*nDOF0*(nDOF1-1);
1368 const int firstNode=(i0+1)/2*(m_N0-1)
1369 +(i1+1)/2*m_N0*(m_N1-1);
1370 for (dim_t i=0; i<nDOF2; i++, numShared++) {
1371 sendShared.push_back(firstDOF+i*nDOF0*nDOF1);
1372 recvShared.push_back(numDOF+numShared);
1373 if (i>0)
1374 colIndices[firstDOF+(i-1)*nDOF0*nDOF1].push_back(numShared);
1375 colIndices[firstDOF+i*nDOF0*nDOF1].push_back(numShared);
1376 if (i<nDOF2-1)
1377 colIndices[firstDOF+(i+1)*nDOF0*nDOF1].push_back(numShared);
1378 m_dofMap[firstNode+i*m_N0*m_N1]=numDOF+numShared;
1379 }
1380 } else {
1381 // sharing a node
1382 const int dof=(i0+1)/2*(nDOF0-1)
1383 +(i1+1)/2*nDOF0*(nDOF1-1)
1384 +(i2+1)/2*nDOF0*nDOF1*(nDOF2-1);
1385 const int node=(i0+1)/2*(m_N0-1)
1386 +(i1+1)/2*m_N0*(m_N1-1)
1387 +(i2+1)/2*m_N0*m_N1*(m_N2-1);
1388 offsetInShared.push_back(offsetInShared.back()+1);
1389 sendShared.push_back(dof);
1390 recvShared.push_back(numDOF+numShared);
1391 colIndices[dof].push_back(numShared);
1392 m_dofMap[node]=numDOF+numShared;
1393 ++numShared;
1394 }
1395 }
1396 }
1397 }
1398 }
1399
1400 /*
1401 cout << "--- rcv_shcomp ---" << endl;
1402 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
1403 for (size_t i=0; i<neighbour.size(); i++) {
1404 cout << "neighbor[" << i << "]=" << neighbour[i]
1405 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
1406 }
1407 for (size_t i=0; i<recvShared.size(); i++) {
1408 cout << "shared[" << i << "]=" << recvShared[i] << endl;
1409 }
1410 cout << "--- snd_shcomp ---" << endl;
1411 for (size_t i=0; i<sendShared.size(); i++) {
1412 cout << "shared[" << i << "]=" << sendShared[i] << endl;
1413 }
1414 cout << "--- dofMap ---" << endl;
1415 for (size_t i=0; i<m_dofMap.size(); i++) {
1416 cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl;
1417 }
1418 cout << "--- colIndices ---" << endl;
1419 for (size_t i=0; i<colIndices.size(); i++) {
1420 cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl;
1421 }
1422 */
1423
1424 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
1425 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
1426 &offsetInShared[0], 1, 0, m_mpiInfo);
1427 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
1428 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
1429 &offsetInShared[0], 1, 0, m_mpiInfo);
1430 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
1431 Paso_SharedComponents_free(snd_shcomp);
1432 Paso_SharedComponents_free(rcv_shcomp);
1433
1434 // create patterns
1435 dim_t M, N;
1436 IndexVector ptr(1,0);
1437 IndexVector index;
1438
1439 // main pattern
1440 for (index_t i=0; i<numDOF; i++) {
1441 // always add the node itself
1442 index.push_back(i);
1443 const int num=insertNeighbours(index, i);
1444 ptr.push_back(ptr.back()+num+1);
1445 }
1446 M=N=ptr.size()-1;
1447 // paso will manage the memory
1448 index_t* indexC = MEMALLOC(index.size(),index_t);
1449 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1450 copy(index.begin(), index.end(), indexC);
1451 copy(ptr.begin(), ptr.end(), ptrC);
1452 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
1453 M, N, ptrC, indexC);
1454
1455 /*
1456 cout << "--- main_pattern ---" << endl;
1457 cout << "M=" << M << ", N=" << N << endl;
1458 for (size_t i=0; i<ptr.size(); i++) {
1459 cout << "ptr[" << i << "]=" << ptr[i] << endl;
1460 }
1461 for (size_t i=0; i<index.size(); i++) {
1462 cout << "index[" << i << "]=" << index[i] << endl;
1463 }
1464 */
1465
1466 // column & row couple patterns
1467 ptr.assign(1, 0);
1468 index.clear();
1469
1470 for (index_t i=0; i<numDOF; i++) {
1471 index.insert(index.end(), colIndices[i].begin(), colIndices[i].end());
1472 ptr.push_back(ptr.back()+colIndices[i].size());
1473 }
1474
1475 // paso will manage the memory
1476 indexC = MEMALLOC(index.size(), index_t);
1477 ptrC = MEMALLOC(ptr.size(), index_t);
1478 copy(index.begin(), index.end(), indexC);
1479 copy(ptr.begin(), ptr.end(), ptrC);
1480 M=ptr.size()-1;
1481 N=numShared;
1482 Paso_Pattern *colCouplePattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
1483 M, N, ptrC, indexC);
1484
1485 /*
1486 cout << "--- colCouple_pattern ---" << endl;
1487 cout << "M=" << M << ", N=" << N << endl;
1488 for (size_t i=0; i<ptr.size(); i++) {
1489 cout << "ptr[" << i << "]=" << ptr[i] << endl;
1490 }
1491 for (size_t i=0; i<index.size(); i++) {
1492 cout << "index[" << i << "]=" << index[i] << endl;
1493 }
1494 */
1495
1496 // now build the row couple pattern
1497 IndexVector ptr2(1,0);
1498 IndexVector index2;
1499 for (dim_t id=0; id<N; id++) {
1500 dim_t n=0;
1501 for (dim_t i=0; i<M; i++) {
1502 for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
1503 if (index[j]==id) {
1504 index2.push_back(i);
1505 n++;
1506 break;
1507 }
1508 }
1509 }
1510 ptr2.push_back(ptr2.back()+n);
1511 }
1512
1513 // paso will manage the memory
1514 indexC = MEMALLOC(index2.size(), index_t);
1515 ptrC = MEMALLOC(ptr2.size(), index_t);
1516 copy(index2.begin(), index2.end(), indexC);
1517 copy(ptr2.begin(), ptr2.end(), ptrC);
1518 Paso_Pattern *rowCouplePattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
1519 N, M, ptrC, indexC);
1520
1521 /*
1522 cout << "--- rowCouple_pattern ---" << endl;
1523 cout << "M=" << N << ", N=" << M << endl;
1524 for (size_t i=0; i<ptr2.size(); i++) {
1525 cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1526 }
1527 for (size_t i=0; i<index2.size(); i++) {
1528 cout << "index[" << i << "]=" << index2[i] << endl;
1529 }
1530 */
1531
1532 // allocate paso distribution
1533 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
1534 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
1535
1536 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
1537 MATRIX_FORMAT_DEFAULT, distribution, distribution,
1538 mainPattern, colCouplePattern, rowCouplePattern,
1539 connector, connector);
1540 Paso_Pattern_free(mainPattern);
1541 Paso_Pattern_free(colCouplePattern);
1542 Paso_Pattern_free(rowCouplePattern);
1543 Paso_Distribution_free(distribution);
1544 return pattern;
1545 }
1546
1547 void Brick::Print_Mesh_Info(const bool full) const
1548 {
1549 RipleyDomain::Print_Mesh_Info(full);
1550 if (full) {
1551 cout << " Id Coordinates" << endl;
1552 cout.precision(15);
1553 cout.setf(ios::scientific, ios::floatfield);
1554 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1555 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1556 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1557 for (index_t i=0; i < getNumNodes(); i++) {
1558 cout << " " << setw(5) << m_nodeId[i]
1559 << " " << xdx.first+(i%m_N0)*xdx.second
1560 << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1561 << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1562 }
1563 }
1564 }
1565
1566 IndexVector Brick::getNumNodesPerDim() const
1567 {
1568 IndexVector ret;
1569 ret.push_back(m_N0);
1570 ret.push_back(m_N1);
1571 ret.push_back(m_N2);
1572 return ret;
1573 }
1574
1575 IndexVector Brick::getNumElementsPerDim() const
1576 {
1577 IndexVector ret;
1578 ret.push_back(m_NE0);
1579 ret.push_back(m_NE1);
1580 ret.push_back(m_NE2);
1581 return ret;
1582 }
1583
1584 IndexVector Brick::getNumFacesPerBoundary() const
1585 {
1586 IndexVector ret(6, 0);
1587 //left
1588 if (m_offset0==0)
1589 ret[0]=m_NE1*m_NE2;
1590 //right
1591 if (m_mpiInfo->rank%m_NX==m_NX-1)
1592 ret[1]=m_NE1*m_NE2;
1593 //bottom
1594 if (m_offset1==0)
1595 ret[2]=m_NE0*m_NE2;
1596 //top
1597 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1598 ret[3]=m_NE0*m_NE2;
1599 //front
1600 if (m_offset2==0)
1601 ret[4]=m_NE0*m_NE1;
1602 //back
1603 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1604 ret[5]=m_NE0*m_NE1;
1605 return ret;
1606 }
1607
1608 pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1609 {
1610 if (dim==0)
1611 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1612 else if (dim==1)
1613 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1614 else if (dim==2)
1615 return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1616
1617 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1618 }
1619
1620 //protected
1621 dim_t Brick::getNumDOF() const
1622 {
1623 return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY*(m_gNE2+1)/m_NZ;
1624 }
1625
1626 //protected
1627 dim_t Brick::getNumFaceElements() const
1628 {
1629 const IndexVector faces = getNumFacesPerBoundary();
1630 dim_t n=0;
1631 for (size_t i=0; i<faces.size(); i++)
1632 n+=faces[i];
1633 return n;
1634 }
1635
1636 //protected
1637 void Brick::assembleCoordinates(escript::Data& arg) const
1638 {
1639 escriptDataC x = arg.getDataC();
1640 int numDim = m_numDim;
1641 if (!isDataPointShapeEqual(&x, 1, &numDim))
1642 throw RipleyException("setToX: Invalid Data object shape");
1643 if (!numSamplesEqual(&x, 1, getNumNodes()))
1644 throw RipleyException("setToX: Illegal number of samples in Data object");
1645
1646 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1647 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1648 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1649 arg.requireWrite();
1650 #pragma omp parallel for
1651 for (dim_t i2 = 0; i2 < m_N2; i2++) {
1652 for (dim_t i1 = 0; i1 < m_N1; i1++) {
1653 for (dim_t i0 = 0; i0 < m_N0; i0++) {
1654 double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1655 point[0] = xdx.first+i0*xdx.second;
1656 point[1] = ydy.first+i1*ydy.second;
1657 point[2] = zdz.first+i2*zdz.second;
1658 }
1659 }
1660 }
1661 }
1662
1663 //private
1664 void Brick::populateSampleIds()
1665 {
1666 // identifiers are ordered from left to right, bottom to top, front to back
1667 // globally
1668
1669 // build node distribution vector first.
1670 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1671 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1672 const dim_t numDOF=getNumDOF();
1673 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1674 m_nodeDistribution[k]=k*numDOF;
1675 }
1676 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1677 m_nodeId.resize(getNumNodes());
1678 m_dofId.resize(numDOF);
1679 m_elementId.resize(getNumElements());
1680 m_faceId.resize(getNumFaceElements());
1681
1682 #pragma omp parallel
1683 {
1684 #pragma omp for nowait
1685 // nodes
1686 for (dim_t i2=0; i2<m_N2; i2++) {
1687 for (dim_t i1=0; i1<m_N1; i1++) {
1688 for (dim_t i0=0; i0<m_N0; i0++) {
1689 m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] =
1690 (m_offset2+i2)*(m_gNE0+1)*(m_gNE1+1)
1691 +(m_offset1+i1)*(m_gNE0+1)
1692 +m_offset0+i0;
1693 }
1694 }
1695 }
1696
1697 // degrees of freedom
1698 #pragma omp for nowait
1699 for (dim_t k=0; k<numDOF; k++)
1700 m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k;
1701
1702 // elements
1703 #pragma omp for nowait
1704 for (dim_t i2=0; i2<m_NE2; i2++) {
1705 for (dim_t i1=0; i1<m_NE1; i1++) {
1706 for (dim_t i0=0; i0<m_NE0; i0++) {
1707 m_elementId[i0+i1*m_NE0+i2*m_NE0*m_NE1] =
1708 (m_offset2+i2)*m_gNE0*m_gNE1
1709 +(m_offset1+i1)*m_gNE0
1710 +m_offset0+i0;
1711 }
1712 }
1713 }
1714
1715 // face elements
1716 #pragma omp for
1717 for (dim_t k=0; k<getNumFaceElements(); k++)
1718 m_faceId[k]=k;
1719 } // end parallel section
1720
1721 m_nodeTags.assign(getNumNodes(), 0);
1722 updateTagsInUse(Nodes);
1723
1724 m_elementTags.assign(getNumElements(), 0);
1725 updateTagsInUse(Elements);
1726
1727 // generate face offset vector and set face tags
1728 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1729 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1730 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1731 m_faceOffset.assign(facesPerEdge.size(), -1);
1732 m_faceTags.clear();
1733 index_t offset=0;
1734 for (size_t i=0; i<facesPerEdge.size(); i++) {
1735 if (facesPerEdge[i]>0) {
1736 m_faceOffset[i]=offset;
1737 offset+=facesPerEdge[i];
1738 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1739 }
1740 }
1741 setTagMap("left", LEFT);
1742 setTagMap("right", RIGHT);
1743 setTagMap("bottom", BOTTOM);
1744 setTagMap("top", TOP);
1745 setTagMap("front", FRONT);
1746 setTagMap("back", BACK);
1747 updateTagsInUse(FaceElements);
1748 }
1749
1750 //private
1751 int Brick::insertNeighbours(IndexVector& index, index_t node) const
1752 {
1753 const index_t nDOF0 = (m_gNE0+1)/m_NX;
1754 const index_t nDOF1 = (m_gNE1+1)/m_NY;
1755 const index_t nDOF2 = (m_gNE2+1)/m_NZ;
1756 const int x=node%nDOF0;
1757 const int y=node%(nDOF0*nDOF1)/nDOF0;
1758 const int z=node/(nDOF0*nDOF1);
1759 int num=0;
1760 // loop through potential neighbours and add to index if positions are
1761 // within bounds
1762 for (int i2=-1; i2<2; i2++) {
1763 for (int i1=-1; i1<2; i1++) {
1764 for (int i0=-1; i0<2; i0++) {
1765 // skip node itself
1766 if (i0==0 && i1==0 && i2==0)
1767 continue;
1768 // location of neighbour node
1769 const int nx=x+i0;
1770 const int ny=y+i1;
1771 const int nz=z+i2;
1772 if (nx>=0 && ny>=0 && nz>=0
1773 && nx<nDOF0 && ny<nDOF1 && nz<nDOF2) {
1774 index.push_back(nz*nDOF0*nDOF1+ny*nDOF0+nx);
1775 num++;
1776 }
1777 }
1778 }
1779 }
1780
1781 return num;
1782 }
1783
1784 //protected
1785 void Brick::addToSystemMatrix(Paso_SystemMatrix* mat,
1786 const IndexVector& nodes_Eq, dim_t num_Eq, const IndexVector& nodes_Sol,
1787 dim_t num_Sol, const vector<double>& array) const
1788 {
1789 const dim_t numMyCols = mat->pattern->mainPattern->numInput;
1790 const dim_t numMyRows = mat->pattern->mainPattern->numOutput;
1791
1792 const index_t* mainBlock_ptr = mat->mainBlock->pattern->ptr;
1793 const index_t* mainBlock_index = mat->mainBlock->pattern->index;
1794 double* mainBlock_val = mat->mainBlock->val;
1795 const index_t* col_coupleBlock_ptr = mat->col_coupleBlock->pattern->ptr;
1796 const index_t* col_coupleBlock_index = mat->col_coupleBlock->pattern->index;
1797 double* col_coupleBlock_val = mat->col_coupleBlock->val;
1798 const index_t* row_coupleBlock_ptr = mat->row_coupleBlock->pattern->ptr;
1799 const index_t* row_coupleBlock_index = mat->row_coupleBlock->pattern->index;
1800 double* row_coupleBlock_val = mat->row_coupleBlock->val;
1801
1802 for (dim_t k_Eq = 0; k_Eq < nodes_Eq.size(); ++k_Eq) {
1803 // down columns of array
1804 const dim_t j_Eq = nodes_Eq[k_Eq];
1805 const dim_t i_row = j_Eq;
1806 //printf("row:%d\n", i_row);
1807 // only look at the matrix rows stored on this processor
1808 if (i_row < numMyRows) {
1809 for (dim_t k_Sol = 0; k_Sol < nodes_Sol.size(); ++k_Sol) {
1810 const dim_t i_col = nodes_Sol[k_Sol];
1811 if (i_col < numMyCols) {
1812 for (dim_t k = mainBlock_ptr[i_row]; k < mainBlock_ptr[i_row + 1]; ++k) {
1813 if (mainBlock_index[k] == i_col) {
1814 mainBlock_val[k] += array[INDEX2(k_Eq, k_Sol, nodes_Eq.size())];
1815 break;
1816 }
1817 }
1818 } else {
1819 for (dim_t k = col_coupleBlock_ptr[i_row]; k < col_coupleBlock_ptr[i_row + 1]; ++k) {
1820 //cout << "col:" << i_col-numMyCols << " colIdx:" << col_coupleBlock_index[k] << endl;
1821 if (col_coupleBlock_index[k] == i_col - numMyCols) {
1822 col_coupleBlock_val[k] += array[INDEX2(k_Eq, k_Sol, nodes_Eq.size())];
1823 break;
1824 }
1825 }
1826 }
1827 }
1828 } else {
1829 for (dim_t k_Sol = 0; k_Sol < nodes_Sol.size(); ++k_Sol) {
1830 // across rows of array
1831 const dim_t i_col = nodes_Sol[k_Sol];
1832 if (i_col < numMyCols) {
1833 for (dim_t k = row_coupleBlock_ptr[i_row - numMyRows];
1834 k < row_coupleBlock_ptr[i_row - numMyRows + 1]; ++k)
1835 {
1836 //cout << "col:" << i_col << " rowIdx:" << row_coupleBlock_index[k] << endl;
1837 if (row_coupleBlock_index[k] == i_col) {
1838 row_coupleBlock_val[k] += array[INDEX2(k_Eq, k_Sol, nodes_Eq.size())];
1839 break;
1840 }
1841 }
1842 }
1843 }
1844 }
1845 }
1846 }
1847
1848 //protected
1849 void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in,
1850 bool reduced) const
1851 {
1852 const dim_t numComp = in.getDataPointSize();
1853 if (reduced) {
1854 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1855 const double c0 = .125;
1856 #pragma omp parallel for
1857 for (index_t k2=0; k2 < m_NE2; ++k2) {
1858 for (index_t k1=0; k1 < m_NE1; ++k1) {
1859 for (index_t k0=0; k0 < m_NE0; ++k0) {
1860 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1861 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1862 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1863 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1864 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1865 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1866 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1867 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1868 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1869 for (index_t i=0; i < numComp; ++i) {
1870 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i] + f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1871 } /* end of component loop i */
1872 } /* end of k0 loop */
1873 } /* end of k1 loop */
1874 } /* end of k2 loop */
1875 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1876 } else {
1877 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1878 const double c0 = .0094373878376559314545;
1879 const double c1 = .035220810900864519624;
1880 const double c2 = .13144585576580214704;
1881 const double c3 = .49056261216234406855;
1882 #pragma omp parallel for
1883 for (index_t k2=0; k2 < m_NE2; ++k2) {
1884 for (index_t k1=0; k1 < m_NE1; ++k1) {
1885 for (index_t k0=0; k0 < m_NE0; ++k0) {
1886 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1887 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1888 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1889 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1890 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1891 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1892 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1893 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1894 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1895 for (index_t i=0; i < numComp; ++i) {
1896 o[INDEX2(i,numComp,0)] = f_000[i]*c3 + f_111[i]*c0 + c2*(f_001[i] + f_010[i] + f_100[i]) + c1*(f_011[i] + f_101[i] + f_110[i]);
1897 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_100[i]*c3 + c2*(f_000[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_111[i]);
1898 o[INDEX2(i,numComp,2)] = f_010[i]*c3 + f_101[i]*c0 + c2*(f_000[i] + f_011[i] + f_110[i]) + c1*(f_001[i] + f_100[i] + f_111[i]);
1899 o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_110[i]*c3 + c2*(f_010[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_101[i]);
1900 o[INDEX2(i,numComp,4)] = f_001[i]*c3 + f_110[i]*c0 + c2*(f_000[i] + f_011[i] + f_101[i]) + c1*(f_010[i] + f_100[i] + f_111[i]);
1901 o[INDEX2(i,numComp,5)] = f_010[i]*c0 + f_101[i]*c3 + c2*(f_001[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_110[i]);
1902 o[INDEX2(i,numComp,6)] = f_011[i]*c3 + f_100[i]*c0 + c2*(f_001[i] + f_010[i] + f_111[i]) + c1*(f_000[i] + f_101[i] + f_110[i]);
1903 o[INDEX2(i,numComp,7)] = f_000[i]*c0 + f_111[i]*c3 + c2*(f_011[i] + f_101[i]