/[escript]/trunk/ripley/src/Brick.cpp
ViewVC logotype

Contents of /trunk/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5986 - (show annotations)
Fri Feb 26 04:10:41 2016 UTC (3 years, 1 month ago) by caltinay
File size: 174689 byte(s)
more test fixes.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #define ESNEEDPYTHON
18 #include <esysUtils/first.h>
19
20 #include <ripley/Brick.h>
21 #include <ripley/DefaultAssembler3D.h>
22 #include <ripley/LameAssembler3D.h>
23 #include <ripley/WaveAssembler3D.h>
24 #include <ripley/blocktools.h>
25 #include <ripley/domainhelpers.h>
26
27 #include <escript/FileWriter.h>
28 #include <escript/Random.h>
29
30 #include <paso/SystemMatrix.h>
31
32 #ifdef USE_NETCDF
33 #include <netcdfcpp.h>
34 #endif
35
36 #if USE_SILO
37 #include <silo.h>
38 #ifdef ESYS_MPI
39 #include <pmpio.h>
40 #endif
41 #endif
42
43 #include <boost/math/special_functions/fpclassify.hpp> // for isnan
44 #include <boost/scoped_array.hpp>
45
46 #include <iomanip>
47 #include <limits>
48
49 namespace bp = boost::python;
50 namespace bm = boost::math;
51 using escript::AbstractSystemMatrix;
52 using escript::FileWriter;
53 using escript::NotImplementedError;
54 using escript::ValueError;
55 using std::vector;
56 using std::string;
57 using std::min;
58 using std::max;
59 using std::ios;
60 using std::fill;
61
62 namespace ripley {
63
64 inline int indexOfMax(dim_t a, dim_t b, dim_t c)
65 {
66 if (a > b) {
67 if (c > a) {
68 return 2;
69 }
70 return 0;
71 } else if (b > c) {
72 return 1;
73 }
74 return 2;
75 }
76
77 Brick::Brick(dim_t n0, dim_t n1, dim_t n2, double x0, double y0, double z0,
78 double x1, double y1, double z1, int d0, int d1, int d2,
79 const vector<double>& points, const vector<int>& tags,
80 const TagMap& tagnamestonums,
81 escript::SubWorld_ptr w) :
82 RipleyDomain(3, w)
83 {
84 if (static_cast<long>(n0 + 1) * static_cast<long>(n1 + 1)
85 * static_cast<long>(n2 + 1) > std::numeric_limits<dim_t>::max())
86 throw RipleyException("The number of elements has overflowed, this "
87 "limit may be raised in future releases.");
88
89 if (n0 <= 0 || n1 <= 0 || n2 <= 0)
90 throw ValueError("Number of elements in each spatial dimension "
91 "must be positive");
92
93 // ignore subdivision parameters for serial run
94 if (m_mpiInfo->size == 1) {
95 d0=1;
96 d1=1;
97 d2=1;
98 }
99 bool warn=false;
100
101 vector<int> factors;
102 int ranks = m_mpiInfo->size;
103 dim_t epr[3] = {n0,n1,n2};
104 int d[3] = {d0,d1,d2};
105 if (d0<=0 || d1<=0 || d2<=0) {
106 for (int i = 0; i < 3; i++) {
107 if (d[i] < 1) {
108 d[i] = 1;
109 continue;
110 }
111 epr[i] = -1; // can no longer be max
112 if (ranks % d[i] != 0) {
113 throw ValueError("Invalid number of spatial subdivisions");
114 }
115 //remove
116 ranks /= d[i];
117 }
118 factorise(factors, ranks);
119 if (factors.size() != 0) {
120 warn = true;
121 }
122 }
123 while (factors.size() > 0) {
124 int i = indexOfMax(epr[0],epr[1],epr[2]);
125 int f = factors.back();
126 factors.pop_back();
127 d[i] *= f;
128 epr[i] /= f;
129 }
130 d0 = d[0]; d1 = d[1]; d2 = d[2];
131
132 // ensure number of subdivisions is valid and nodes can be distributed
133 // among number of ranks
134 if (d0*d1*d2 != m_mpiInfo->size){
135 throw ValueError("Invalid number of spatial subdivisions");
136 }
137 if (warn) {
138 std::cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
139 << d1 << ", d2=" << d2 << "). This may not be optimal!" << std::endl;
140 }
141
142 double l0 = x1-x0;
143 double l1 = y1-y0;
144 double l2 = z1-z0;
145 m_dx[0] = l0/n0;
146 m_dx[1] = l1/n1;
147 m_dx[2] = l2/n2;
148
149 if ((n0+1)%d0 > 0) {
150 n0=(dim_t)round((float)(n0+1)/d0+0.5)*d0-1;
151 l0=m_dx[0]*n0;
152 std::cout << "Warning: Adjusted number of elements and length. N0="
153 << n0 << ", l0=" << l0 << std::endl;
154 }
155 if ((n1+1)%d1 > 0) {
156 n1=(dim_t)round((float)(n1+1)/d1+0.5)*d1-1;
157 l1=m_dx[1]*n1;
158 std::cout << "Warning: Adjusted number of elements and length. N1="
159 << n1 << ", l1=" << l1 << std::endl;
160 }
161 if ((n2+1)%d2 > 0) {
162 n2=(dim_t)round((float)(n2+1)/d2+0.5)*d2-1;
163 l2=m_dx[2]*n2;
164 std::cout << "Warning: Adjusted number of elements and length. N2="
165 << n2 << ", l2=" << l2 << std::endl;
166 }
167
168 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
169 throw ValueError("Too few elements for the number of ranks");
170
171 m_gNE[0] = n0;
172 m_gNE[1] = n1;
173 m_gNE[2] = n2;
174 m_origin[0] = x0;
175 m_origin[1] = y0;
176 m_origin[2] = z0;
177 m_length[0] = l0;
178 m_length[1] = l1;
179 m_length[2] = l2;
180 m_NX[0] = d0;
181 m_NX[1] = d1;
182 m_NX[2] = d2;
183
184 // local number of elements (including overlap)
185 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
186 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
187 m_NE[0]++;
188 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
189 m_ownNE[0]--;
190
191 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
192 if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
193 m_NE[1]++;
194 else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
195 m_ownNE[1]--;
196
197 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
198 if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
199 m_NE[2]++;
200 else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
201 m_ownNE[2]--;
202
203 // local number of nodes
204 m_NN[0] = m_NE[0]+1;
205 m_NN[1] = m_NE[1]+1;
206 m_NN[2] = m_NE[2]+1;
207
208 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
209 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
210 if (m_offset[0] > 0)
211 m_offset[0]--;
212 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
213 if (m_offset[1] > 0)
214 m_offset[1]--;
215 m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
216 if (m_offset[2] > 0)
217 m_offset[2]--;
218
219 populateSampleIds();
220
221 for (TagMap::const_iterator i = tagnamestonums.begin();
222 i != tagnamestonums.end(); i++) {
223 setTagMap(i->first, i->second);
224 }
225 addPoints(points, tags);
226 }
227
228 Brick::~Brick()
229 {
230 }
231
232 string Brick::getDescription() const
233 {
234 return "ripley::Brick";
235 }
236
237 bool Brick::operator==(const escript::AbstractDomain& other) const
238 {
239 const Brick* o=dynamic_cast<const Brick*>(&other);
240 if (o) {
241 return (RipleyDomain::operator==(other) &&
242 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
243 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
244 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
245 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
246 }
247
248 return false;
249 }
250
251 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
252 const ReaderParameters& params) const
253 {
254 #ifdef USE_NETCDF
255 // check destination function space
256 dim_t myN0, myN1, myN2;
257 if (out.getFunctionSpace().getTypeCode() == Nodes) {
258 myN0 = m_NN[0];
259 myN1 = m_NN[1];
260 myN2 = m_NN[2];
261 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
262 out.getFunctionSpace().getTypeCode() == ReducedElements) {
263 myN0 = m_NE[0];
264 myN1 = m_NE[1];
265 myN2 = m_NE[2];
266 } else
267 throw ValueError("readNcGrid(): invalid function space for output data object");
268
269 if (params.first.size() != 3)
270 throw ValueError("readNcGrid(): argument 'first' must have 3 entries");
271
272 if (params.numValues.size() != 3)
273 throw ValueError("readNcGrid(): argument 'numValues' must have 3 entries");
274
275 if (params.multiplier.size() != 3)
276 throw ValueError("readNcGrid(): argument 'multiplier' must have 3 entries");
277 for (size_t i=0; i<params.multiplier.size(); i++)
278 if (params.multiplier[i]<1)
279 throw ValueError("readNcGrid(): all multipliers must be positive");
280
281 // check file existence and size
282 NcFile f(filename.c_str(), NcFile::ReadOnly);
283 if (!f.is_valid())
284 throw RipleyException("readNcGrid(): cannot open file");
285
286 NcVar* var = f.get_var(varname.c_str());
287 if (!var)
288 throw RipleyException("readNcGrid(): invalid variable name");
289
290 // TODO: rank>0 data support
291 const int numComp = out.getDataPointSize();
292 if (numComp > 1)
293 throw RipleyException("readNcGrid(): only scalar data supported");
294
295 const int dims = var->num_dims();
296 boost::scoped_array<long> edges(var->edges());
297
298 // is this a slice of the data object (dims!=3)?
299 // note the expected ordering of edges (as in numpy: z,y,x)
300 if ( (dims==3 && (params.numValues[2] > edges[0] ||
301 params.numValues[1] > edges[1] ||
302 params.numValues[0] > edges[2]))
303 || (dims==2 && params.numValues[2]>1)
304 || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
305 throw RipleyException("readNcGrid(): not enough data in file");
306 }
307
308 // check if this rank contributes anything
309 if (params.first[0] >= m_offset[0]+myN0 ||
310 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
311 params.first[1] >= m_offset[1]+myN1 ||
312 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
313 params.first[2] >= m_offset[2]+myN2 ||
314 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
315 return;
316 }
317
318 // now determine how much this rank has to write
319
320 // first coordinates in data object to write to
321 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
322 const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
323 const dim_t first2 = max(dim_t(0), params.first[2]-m_offset[2]);
324 // indices to first value in file (not accounting for reverse yet)
325 dim_t idx0 = max(dim_t(0), m_offset[0]-params.first[0]);
326 dim_t idx1 = max(dim_t(0), m_offset[1]-params.first[1]);
327 dim_t idx2 = max(dim_t(0), m_offset[2]-params.first[2]);
328 // number of values to read
329 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
330 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
331 const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
332
333 // make sure we read the right block if going backwards through file
334 if (params.reverse[0])
335 idx0 = edges[dims-1]-num0-idx0;
336 if (dims>1 && params.reverse[1])
337 idx1 = edges[dims-2]-num1-idx1;
338 if (dims>2 && params.reverse[2])
339 idx2 = edges[dims-3]-num2-idx2;
340
341
342 vector<double> values(num0*num1*num2);
343 if (dims==3) {
344 var->set_cur(idx2, idx1, idx0);
345 var->get(&values[0], num2, num1, num0);
346 } else if (dims==2) {
347 var->set_cur(idx1, idx0);
348 var->get(&values[0], num1, num0);
349 } else {
350 var->set_cur(idx0);
351 var->get(&values[0], num0);
352 }
353
354 const int dpp = out.getNumDataPointsPerSample();
355 out.requireWrite();
356
357 // helpers for reversing
358 const dim_t x0 = (params.reverse[0] ? num0-1 : 0);
359 const int x_mult = (params.reverse[0] ? -1 : 1);
360 const dim_t y0 = (params.reverse[1] ? num1-1 : 0);
361 const int y_mult = (params.reverse[1] ? -1 : 1);
362 const dim_t z0 = (params.reverse[2] ? num2-1 : 0);
363 const int z_mult = (params.reverse[2] ? -1 : 1);
364
365 for (index_t z=0; z<num2; z++) {
366 for (index_t y=0; y<num1; y++) {
367 #pragma omp parallel for
368 for (index_t x=0; x<num0; x++) {
369 const dim_t baseIndex = first0+x*params.multiplier[0]
370 +(first1+y*params.multiplier[1])*myN0
371 +(first2+z*params.multiplier[2])*myN0*myN1;
372 const dim_t srcIndex=(z0+z_mult*z)*num1*num0
373 +(y0+y_mult*y)*num0
374 +(x0+x_mult*x);
375 if (!bm::isnan(values[srcIndex])) {
376 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
377 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
378 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
379 const dim_t dataIndex = baseIndex+m0
380 +m1*myN0
381 +m2*myN0*myN1;
382 double* dest = out.getSampleDataRW(dataIndex);
383 for (index_t q=0; q<dpp; q++) {
384 *dest++ = values[srcIndex];
385 }
386 }
387 }
388 }
389 }
390 }
391 }
392 }
393 #else
394 throw RipleyException("readNcGrid(): not compiled with netCDF support");
395 #endif
396 }
397
398 #ifdef USE_BOOSTIO
399 void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
400 const ReaderParameters& params) const
401 {
402 // the mapping is not universally correct but should work on our
403 // supported platforms
404 switch (params.dataType) {
405 case DATATYPE_INT32:
406 readBinaryGridZippedImpl<int>(out, filename, params);
407 break;
408 case DATATYPE_FLOAT32:
409 readBinaryGridZippedImpl<float>(out, filename, params);
410 break;
411 case DATATYPE_FLOAT64:
412 readBinaryGridZippedImpl<double>(out, filename, params);
413 break;
414 default:
415 throw ValueError("readBinaryGrid(): invalid or unsupported datatype");
416 }
417 }
418 #endif
419
420 void Brick::readBinaryGrid(escript::Data& out, string filename,
421 const ReaderParameters& params) const
422 {
423 // the mapping is not universally correct but should work on our
424 // supported platforms
425 switch (params.dataType) {
426 case DATATYPE_INT32:
427 readBinaryGridImpl<int>(out, filename, params);
428 break;
429 case DATATYPE_FLOAT32:
430 readBinaryGridImpl<float>(out, filename, params);
431 break;
432 case DATATYPE_FLOAT64:
433 readBinaryGridImpl<double>(out, filename, params);
434 break;
435 default:
436 throw ValueError("readBinaryGrid(): invalid or unsupported datatype");
437 }
438 }
439
440 template<typename ValueType>
441 void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
442 const ReaderParameters& params) const
443 {
444 // check destination function space
445 dim_t myN0, myN1, myN2;
446 if (out.getFunctionSpace().getTypeCode() == Nodes) {
447 myN0 = m_NN[0];
448 myN1 = m_NN[1];
449 myN2 = m_NN[2];
450 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
451 out.getFunctionSpace().getTypeCode() == ReducedElements) {
452 myN0 = m_NE[0];
453 myN1 = m_NE[1];
454 myN2 = m_NE[2];
455 } else
456 throw ValueError("readBinaryGrid(): invalid function space for output data object");
457
458 if (params.first.size() != 3)
459 throw ValueError("readBinaryGrid(): argument 'first' must have 3 entries");
460
461 if (params.numValues.size() != 3)
462 throw ValueError("readBinaryGrid(): argument 'numValues' must have 3 entries");
463
464 if (params.multiplier.size() != 3)
465 throw ValueError("readBinaryGrid(): argument 'multiplier' must have 3 entries");
466 for (size_t i=0; i<params.multiplier.size(); i++)
467 if (params.multiplier[i]<1)
468 throw ValueError("readBinaryGrid(): all multipliers must be positive");
469 if (params.reverse[0] != 0 || params.reverse[1] != 0)
470 throw RipleyException("readBinaryGrid(): reversing only supported in Z-direction currently");
471
472 // check file existence and size
473 std::ifstream f(filename.c_str(), std::ifstream::binary);
474 if (f.fail()) {
475 throw RipleyException("readBinaryGrid(): cannot open file");
476 }
477 f.seekg(0, std::ios::end);
478 const int numComp = out.getDataPointSize();
479 const dim_t filesize = f.tellg();
480 const dim_t reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
481 if (filesize < reqsize) {
482 f.close();
483 throw RipleyException("readBinaryGrid(): not enough data in file");
484 }
485
486 // check if this rank contributes anything
487 if (params.first[0] >= m_offset[0]+myN0 ||
488 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
489 params.first[1] >= m_offset[1]+myN1 ||
490 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
491 params.first[2] >= m_offset[2]+myN2 ||
492 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
493 f.close();
494 return;
495 }
496
497 // now determine how much this rank has to write
498
499 // first coordinates in data object to write to
500 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
501 const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
502 const dim_t first2 = max(dim_t(0), params.first[2]-m_offset[2]);
503 // indices to first value in file (not accounting for reverse yet)
504 dim_t idx0 = max(dim_t(0), (m_offset[0]/params.multiplier[0])-params.first[0]);
505 dim_t idx1 = max(dim_t(0), (m_offset[1]/params.multiplier[1])-params.first[1]);
506 dim_t idx2 = max(dim_t(0), (m_offset[2]/params.multiplier[2])-params.first[2]);
507 // if restX > 0 the first value in the respective dimension has been
508 // written restX times already in a previous rank so this rank only
509 // contributes (multiplier-rank) copies of that value
510 const dim_t rest0 = m_offset[0]%params.multiplier[0];
511 const dim_t rest1 = m_offset[1]%params.multiplier[1];
512 const dim_t rest2 = m_offset[2]%params.multiplier[2];
513
514 // number of values to read
515 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
516 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
517 const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
518
519 // make sure we read the right block if going backwards through file
520 if (params.reverse[2])
521 idx2 = params.numValues[2]-idx2-1;
522
523 // helpers for reversing
524 const int z_mult = (params.reverse[2] ? -1 : 1);
525
526 out.requireWrite();
527 vector<ValueType> values(num0*numComp);
528 const int dpp = out.getNumDataPointsPerSample();
529
530 for (dim_t z=0; z<num2; z++) {
531 const dim_t m2limit = (z==0 ? params.multiplier[2]-rest2 : params.multiplier[2]);
532 dim_t dataZbase = first2 + z*params.multiplier[2];
533 if (z>0)
534 dataZbase -= rest2;
535
536 for (dim_t y=0; y<num1; y++) {
537 const dim_t fileofs = numComp*(idx0 +
538 (idx1+y)*params.numValues[0] +
539 (idx2+z_mult*z)*params.numValues[0]*params.numValues[1]);
540 f.seekg(fileofs*sizeof(ValueType));
541 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
542 const dim_t m1limit = (y==0 ? params.multiplier[1]-rest1 : params.multiplier[1]);
543 dim_t dataYbase = first1 + y*params.multiplier[1];
544 if (y>0)
545 dataYbase -= rest1;
546
547 for (dim_t x=0; x<num0; x++) {
548 const dim_t m0limit = (x==0 ? params.multiplier[0]-rest0 : params.multiplier[0]);
549 dim_t dataXbase = first0 + x*params.multiplier[0];
550 if (x>0)
551 dataXbase -= rest0;
552 // write a block of mult0 x mult1 x mult2 identical values into
553 // Data object
554 for (dim_t m2=0; m2 < m2limit; m2++) {
555 const dim_t dataZ = dataZbase + m2;
556 if (dataZ >= myN2)
557 break;
558 for (dim_t m1=0; m1 < m1limit; m1++) {
559 const dim_t dataY = dataYbase + m1;
560 if (dataY >= myN1)
561 break;
562 for (dim_t m0=0; m0 < m0limit; m0++) {
563 const dim_t dataX = dataXbase + m0;
564 if (dataX >= myN0)
565 break;
566 const dim_t dataIndex = dataX + dataY*myN0 + dataZ*myN0*myN1;
567 double* dest = out.getSampleDataRW(dataIndex);
568 for (int c=0; c<numComp; c++) {
569 ValueType val = values[x*numComp+c];
570
571 if (params.byteOrder != BYTEORDER_NATIVE) {
572 char* cval = reinterpret_cast<char*>(&val);
573 // this will alter val!!
574 if (sizeof(ValueType)>4) {
575 byte_swap64(cval);
576 } else {
577 byte_swap32(cval);
578 }
579 }
580 if (!bm::isnan(val)) {
581 for (int q=0; q<dpp; q++) {
582 *dest++ = static_cast<double>(val);
583 }
584 }
585 }
586 }
587 }
588 }
589 }
590 }
591 }
592
593 f.close();
594 }
595
596 #ifdef USE_BOOSTIO
597 template<typename ValueType>
598 void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
599 const ReaderParameters& params) const
600 {
601 // check destination function space
602 dim_t myN0, myN1, myN2;
603 if (out.getFunctionSpace().getTypeCode() == Nodes) {
604 myN0 = m_NN[0];
605 myN1 = m_NN[1];
606 myN2 = m_NN[2];
607 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
608 out.getFunctionSpace().getTypeCode() == ReducedElements) {
609 myN0 = m_NE[0];
610 myN1 = m_NE[1];
611 myN2 = m_NE[2];
612 } else
613 throw ValueError("readBinaryGridFromZipped(): invalid function space for output data object");
614
615 if (params.first.size() != 3)
616 throw ValueError("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
617
618 if (params.numValues.size() != 3)
619 throw ValueError("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
620
621 if (params.multiplier.size() != 3)
622 throw ValueError("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
623 for (size_t i=0; i<params.multiplier.size(); i++)
624 if (params.multiplier[i]<1)
625 throw ValueError("readBinaryGridFromZipped(): all multipliers must be positive");
626
627 // check file existence and size
628 std::ifstream f(filename.c_str(), std::ifstream::binary);
629 if (f.fail()) {
630 throw RipleyException("readBinaryGridFromZipped(): cannot open file");
631 }
632 f.seekg(0, std::ios::end);
633 const int numComp = out.getDataPointSize();
634 dim_t filesize = f.tellg();
635 f.seekg(0, ios::beg);
636 vector<char> compressed(filesize);
637 f.read((char*)&compressed[0], filesize);
638 f.close();
639 vector<char> decompressed = unzip(compressed);
640 filesize = decompressed.size();
641 const dim_t reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
642 if (filesize < reqsize) {
643 throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
644 }
645
646 // check if this rank contributes anything
647 if (params.first[0] >= m_offset[0]+myN0 ||
648 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
649 params.first[1] >= m_offset[1]+myN1 ||
650 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
651 params.first[2] >= m_offset[2]+myN2 ||
652 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
653 return;
654 }
655
656 // now determine how much this rank has to write
657
658 // first coordinates in data object to write to
659 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
660 const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
661 const dim_t first2 = max(dim_t(0), params.first[2]-m_offset[2]);
662 // indices to first value in file
663 const dim_t idx0 = max(dim_t(0), m_offset[0]-params.first[0]);
664 const dim_t idx1 = max(dim_t(0), m_offset[1]-params.first[1]);
665 const dim_t idx2 = max(dim_t(0), m_offset[2]-params.first[2]);
666 // number of values to read
667 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
668 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
669 const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
670
671 out.requireWrite();
672 vector<ValueType> values(num0*numComp);
673 const int dpp = out.getNumDataPointsPerSample();
674
675 for (dim_t z=0; z<num2; z++) {
676 for (dim_t y=0; y<num1; y++) {
677 const dim_t fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
678 +(idx2+z)*params.numValues[0]*params.numValues[1]);
679 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
680
681 for (dim_t x=0; x<num0; x++) {
682 const dim_t baseIndex = first0+x*params.multiplier[0]
683 +(first1+y*params.multiplier[1])*myN0
684 +(first2+z*params.multiplier[2])*myN0*myN1;
685 for (dim_t m2=0; m2<params.multiplier[2]; m2++) {
686 for (dim_t m1=0; m1<params.multiplier[1]; m1++) {
687 for (dim_t m0=0; m0<params.multiplier[0]; m0++) {
688 const dim_t dataIndex = baseIndex+m0
689 +m1*myN0
690 +m2*myN0*myN1;
691 double* dest = out.getSampleDataRW(dataIndex);
692 for (int c=0; c<numComp; c++) {
693 ValueType val = values[x*numComp+c];
694
695 if (params.byteOrder != BYTEORDER_NATIVE) {
696 char* cval = reinterpret_cast<char*>(&val);
697 // this will alter val!!
698 byte_swap32(cval);
699 }
700 if (!bm::isnan(val)) {
701 for (int q=0; q<dpp; q++) {
702 *dest++ = static_cast<double>(val);
703 }
704 }
705 }
706 }
707 }
708 }
709 }
710 }
711 }
712 }
713 #endif
714
715 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
716 int byteOrder, int dataType) const
717 {
718 // the mapping is not universally correct but should work on our
719 // supported platforms
720 switch (dataType) {
721 case DATATYPE_INT32:
722 writeBinaryGridImpl<int>(in, filename, byteOrder);
723 break;
724 case DATATYPE_FLOAT32:
725 writeBinaryGridImpl<float>(in, filename, byteOrder);
726 break;
727 case DATATYPE_FLOAT64:
728 writeBinaryGridImpl<double>(in, filename, byteOrder);
729 break;
730 default:
731 throw ValueError("writeBinaryGrid(): invalid or unsupported datatype");
732 }
733 }
734
735 template<typename ValueType>
736 void Brick::writeBinaryGridImpl(const escript::Data& in,
737 const string& filename, int byteOrder) const
738 {
739 // check function space and determine number of points
740 dim_t myN0, myN1, myN2;
741 dim_t totalN0, totalN1, totalN2;
742 dim_t offset0, offset1, offset2;
743 if (in.getFunctionSpace().getTypeCode() == Nodes) {
744 myN0 = m_NN[0];
745 myN1 = m_NN[1];
746 myN2 = m_NN[2];
747 totalN0 = m_gNE[0]+1;
748 totalN1 = m_gNE[1]+1;
749 totalN2 = m_gNE[2]+1;
750 offset0 = m_offset[0];
751 offset1 = m_offset[1];
752 offset2 = m_offset[2];
753 } else if (in.getFunctionSpace().getTypeCode() == DegreesOfFreedom ||
754 in.getFunctionSpace().getTypeCode() == ReducedDegreesOfFreedom) {
755 myN0 = (m_gNE[0]+1)/m_NX[0];
756 myN1 = (m_gNE[1]+1)/m_NX[1];
757 myN2 = (m_gNE[2]+1)/m_NX[2];
758 totalN0 = m_gNE[0]+1;
759 totalN1 = m_gNE[1]+1;
760 totalN2 = m_gNE[2]+1;
761 offset0 = (m_offset[0]>0 ? m_offset[0]+1 : 0);
762 offset1 = (m_offset[1]>0 ? m_offset[1]+1 : 0);
763 offset2 = (m_offset[2]>0 ? m_offset[2]+1 : 0);
764 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
765 in.getFunctionSpace().getTypeCode() == ReducedElements) {
766 myN0 = m_NE[0];
767 myN1 = m_NE[1];
768 myN2 = m_NE[2];
769 totalN0 = m_gNE[0];
770 totalN1 = m_gNE[1];
771 totalN2 = m_gNE[2];
772 offset0 = m_offset[0];
773 offset1 = m_offset[1];
774 offset2 = m_offset[2];
775 } else
776 throw RipleyException("writeBinaryGrid(): unsupported function space");
777
778 const int numComp = in.getDataPointSize();
779 const int dpp = in.getNumDataPointsPerSample();
780 const dim_t fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
781
782 if (numComp > 1 || dpp > 1)
783 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
784
785 // from here on we know that each sample consists of one value
786 FileWriter fw(m_mpiInfo->comm);
787 fw.openFile(filename, fileSize, true);
788 MPIBarrier();
789
790 for (index_t z=0; z<myN2; z++) {
791 for (index_t y=0; y<myN1; y++) {
792 const dim_t fileofs = (offset0+(offset1+y)*totalN0
793 +(offset2+z)*totalN0*totalN1)*sizeof(ValueType);
794 std::ostringstream oss;
795
796 for (index_t x=0; x<myN0; x++) {
797 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
798 ValueType fvalue = static_cast<ValueType>(*sample);
799 if (byteOrder == BYTEORDER_NATIVE) {
800 oss.write((char*)&fvalue, sizeof(fvalue));
801 } else {
802 char* value = reinterpret_cast<char*>(&fvalue);
803 if (sizeof(fvalue)>4) {
804 byte_swap64(value);
805 } else {
806 byte_swap32(value);
807 }
808 oss.write(value, sizeof(fvalue));
809 }
810 }
811 fw.writeAt(oss, fileofs);
812 }
813 }
814 fw.close();
815 }
816
817 void Brick::write(const std::string& filename) const
818 {
819 throw RipleyException("write: not supported");
820 }
821
822 void Brick::dump(const string& fileName) const
823 {
824 #if USE_SILO
825 string fn(fileName);
826 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
827 fn+=".silo";
828 }
829
830 int driver=DB_HDF5;
831 string siloPath;
832 DBfile* dbfile = NULL;
833
834 #ifdef ESYS_MPI
835 PMPIO_baton_t* baton = NULL;
836 const int NUM_SILO_FILES = 1;
837 const char* blockDirFmt = "/block%04d";
838 #endif
839
840 if (m_mpiInfo->size > 1) {
841 #ifdef ESYS_MPI
842 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
843 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
844 PMPIO_DefaultClose, (void*)&driver);
845 // try the fallback driver in case of error
846 if (!baton && driver != DB_PDB) {
847 driver = DB_PDB;
848 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
849 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
850 PMPIO_DefaultClose, (void*)&driver);
851 }
852 if (baton) {
853 char str[64];
854 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
855 siloPath = str;
856 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
857 }
858 #endif
859 } else {
860 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
861 getDescription().c_str(), driver);
862 // try the fallback driver in case of error
863 if (!dbfile && driver != DB_PDB) {
864 driver = DB_PDB;
865 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
866 getDescription().c_str(), driver);
867 }
868 }
869
870 if (!dbfile)
871 throw RipleyException("dump: Could not create Silo file");
872
873 /*
874 if (driver==DB_HDF5) {
875 // gzip level 1 already provides good compression with minimal
876 // performance penalty. Some tests showed that gzip levels >3 performed
877 // rather badly on escript data both in terms of time and space
878 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
879 }
880 */
881
882 boost::scoped_ptr<double> x(new double[m_NN[0]]);
883 boost::scoped_ptr<double> y(new double[m_NN[1]]);
884 boost::scoped_ptr<double> z(new double[m_NN[2]]);
885 double* coords[3] = { x.get(), y.get(), z.get() };
886 const dim_t NN0 = m_NN[0];
887 const dim_t NN1 = m_NN[1];
888 const dim_t NN2 = m_NN[2];
889
890 #pragma omp parallel
891 {
892 #pragma omp for
893 for (dim_t i0 = 0; i0 < NN0; i0++) {
894 coords[0][i0]=getLocalCoordinate(i0, 0);
895 }
896 #pragma omp for
897 for (dim_t i1 = 0; i1 < NN1; i1++) {
898 coords[1][i1]=getLocalCoordinate(i1, 1);
899 }
900 #pragma omp for
901 for (dim_t i2 = 0; i2 < NN2; i2++) {
902 coords[2][i2]=getLocalCoordinate(i2, 2);
903 }
904 }
905 // converting to int!
906 vector<int> dims(m_NN, m_NN+3);
907
908 // write mesh
909 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
910 DB_COLLINEAR, NULL);
911
912 // write node ids
913 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
914 NULL, 0, DB_INT, DB_NODECENT, NULL);
915
916 // write element ids
917 dims.assign(m_NE, m_NE+3);
918 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
919 &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
920
921 // rank 0 writes multimesh and multivar
922 if (m_mpiInfo->rank == 0) {
923 vector<string> tempstrings;
924 vector<char*> names;
925 for (dim_t i=0; i<m_mpiInfo->size; i++) {
926 std::stringstream path;
927 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/mesh";
928 tempstrings.push_back(path.str());
929 names.push_back((char*)tempstrings.back().c_str());
930 }
931 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
932 DBSetDir(dbfile, "/");
933 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
934 &types[0], NULL);
935 tempstrings.clear();
936 names.clear();
937 for (dim_t i=0; i<m_mpiInfo->size; i++) {
938 std::stringstream path;
939 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/nodeId";
940 tempstrings.push_back(path.str());
941 names.push_back((char*)tempstrings.back().c_str());
942 }
943 types.assign(m_mpiInfo->size, DB_QUADVAR);
944 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
945 &types[0], NULL);
946 tempstrings.clear();
947 names.clear();
948 for (dim_t i=0; i<m_mpiInfo->size; i++) {
949 std::stringstream path;
950 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/elementId";
951 tempstrings.push_back(path.str());
952 names.push_back((char*)tempstrings.back().c_str());
953 }
954 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
955 &types[0], NULL);
956 }
957
958 if (m_mpiInfo->size > 1) {
959 #ifdef ESYS_MPI
960 PMPIO_HandOffBaton(baton, dbfile);
961 PMPIO_Finish(baton);
962 #endif
963 } else {
964 DBClose(dbfile);
965 }
966
967 #else // USE_SILO
968 throw RipleyException("dump: no Silo support");
969 #endif
970 }
971
972 const dim_t* Brick::borrowSampleReferenceIDs(int fsType) const
973 {
974 switch (fsType) {
975 case Nodes:
976 case ReducedNodes: //FIXME: reduced
977 return &m_nodeId[0];
978 case DegreesOfFreedom:
979 case ReducedDegreesOfFreedom: //FIXME: reduced
980 return &m_dofId[0];
981 case Elements:
982 case ReducedElements:
983 return &m_elementId[0];
984 case FaceElements:
985 case ReducedFaceElements:
986 return &m_faceId[0];
987 case Points:
988 return &m_diracPointNodeIDs[0];
989 default:
990 break;
991 }
992
993 std::stringstream msg;
994 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
995 throw ValueError(msg.str());
996 }
997
998 bool Brick::ownSample(int fsType, index_t id) const
999 {
1000 if (getMPISize()==1)
1001 return true;
1002
1003 switch (fsType) {
1004 case Nodes:
1005 case ReducedNodes: //FIXME: reduced
1006 return (m_dofMap[id] < getNumDOF());
1007 case DegreesOfFreedom:
1008 case ReducedDegreesOfFreedom:
1009 return true;
1010 case Elements:
1011 case ReducedElements:
1012 {
1013 // check ownership of element's _last_ node
1014 const index_t x=id%m_NE[0] + 1;
1015 const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
1016 const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
1017 return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
1018 }
1019 case FaceElements:
1020 case ReducedFaceElements:
1021 {
1022 // check ownership of face element's last node
1023 dim_t n=0;
1024 for (size_t i=0; i<6; i++) {
1025 n+=m_faceCount[i];
1026 if (id<n) {
1027 const index_t j=id-n+m_faceCount[i];
1028 if (i>=4) { // front or back
1029 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
1030 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
1031 } else if (i>=2) { // bottom or top
1032 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
1033 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
1034 } else { // left or right
1035 const index_t first=(i==0 ? 0 : m_NN[0]-1);
1036 return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
1037 }
1038 }
1039 }
1040 return false;
1041 }
1042 default:
1043 break;
1044 }
1045
1046 std::stringstream msg;
1047 msg << "ownSample: invalid function space type " << fsType;
1048 throw ValueError(msg.str());
1049 }
1050
1051 RankVector Brick::getOwnerVector(int fsType) const
1052 {
1053 RankVector owner;
1054 const int rank = m_mpiInfo->rank;
1055
1056 if (fsType == Elements || fsType == ReducedElements) {
1057 owner.assign(getNumElements(), rank);
1058 // shared plane in Y-Z
1059 if (m_faceCount[0]==0) {
1060 for (dim_t k2=0; k2<m_NE[2]; k2++) {
1061 for (dim_t k1=0; k1<m_NE[1]; k1++) {
1062 const dim_t e=k2*m_NE[0]*m_NE[1]+k1*m_NE[0];
1063 owner[e] = rank-1;
1064 }
1065 }
1066 }
1067 // shared plane in X-Z
1068 if (m_faceCount[2]==0) {
1069 for (dim_t k2=0; k2<m_NE[2]; k2++) {
1070 for (dim_t k0=0; k0<m_NE[0]; k0++) {
1071 const dim_t e=k2*m_NE[0]*m_NE[1]+k0;
1072 owner[e] = rank-m_NX[0];
1073 }
1074 }
1075 }
1076 // shared plane in X-Y
1077 if (m_faceCount[4]==0) {
1078 for (dim_t k1=0; k1<m_NE[1]; k1++) {
1079 for (dim_t k0=0; k0<m_NE[0]; k0++) {
1080 const dim_t e=k1*m_NE[0]+k0;
1081 owner[e] = rank-m_NX[0]*m_NX[1];
1082 }
1083 }
1084 }
1085
1086 } else if (fsType == FaceElements || fsType == ReducedFaceElements) {
1087 owner.assign(getNumFaceElements(), rank);
1088 index_t offset=0;
1089 if (m_faceCount[0] > 0) {
1090 if (m_faceCount[2]==0) {
1091 for (dim_t k2=0; k2<m_NE[2]; k2++)
1092 owner[k2*m_NE[1]] = rank-m_NX[0];
1093 }
1094 if (m_faceCount[4]==0) {
1095 for (dim_t k1=0; k1<m_NE[1]; k1++)
1096 owner[k1] = rank-m_NX[0]*m_NX[1];
1097 }
1098 offset += m_faceCount[0];
1099 }
1100 if (m_faceCount[1] > 0) {
1101 if (m_faceCount[2]==0) {
1102 for (dim_t k2=0; k2<m_NE[2]; k2++)
1103 owner[offset+k2*m_NE[1]] = rank-m_NX[0];
1104 }
1105 if (m_faceCount[4]==0) {
1106 for (dim_t k1=0; k1<m_NE[1]; k1++)
1107 owner[offset+k1] = rank-m_NX[0]*m_NX[1];
1108 }
1109 offset += m_faceCount[1];
1110 }
1111 if (m_faceCount[2] > 0) {
1112 if (m_faceCount[0]==0) {
1113 for (dim_t k2=0; k2<m_NE[2]; k2++)
1114 owner[offset+k2*m_NE[0]] = rank-1;
1115 }
1116 if (m_faceCount[4]==0) {
1117 for (dim_t k0=0; k0<m_NE[0]; k0++)
1118 owner[offset+k0] = rank-m_NX[0]*m_NX[1];
1119 }
1120 offset += m_faceCount[2];
1121 }
1122 if (m_faceCount[3] > 0) {
1123 if (m_faceCount[0]==0) {
1124 for (dim_t k2=0; k2<m_NE[2]; k2++)
1125 owner[offset+k2*m_NE[0]] = rank-1;
1126 }
1127 if (m_faceCount[4]==0) {
1128 for (dim_t k0=0; k0<m_NE[0]; k0++)
1129 owner[offset+k0] = rank-m_NX[0]*m_NX[1];
1130 }
1131 offset += m_faceCount[3];
1132 }
1133 if (m_faceCount[4] > 0) {
1134 if (m_faceCount[0]==0) {
1135 for (dim_t k1=0; k1<m_NE[1]; k1++)
1136 owner[offset+k1*m_NE[0]] = rank-1;
1137 }
1138 if (m_faceCount[2]==0) {
1139 for (dim_t k0=0; k0<m_NE[0]; k0++)
1140 owner[offset+k0] = rank-m_NX[0];
1141 }
1142 offset += m_faceCount[4];
1143 }
1144 if (m_faceCount[5] > 0) {
1145 if (m_faceCount[0]==0) {
1146 for (dim_t k1=0; k1<m_NE[1]; k1++)
1147 owner[offset+k1*m_NE[0]] = rank-1;
1148 }
1149 if (m_faceCount[2]==0) {
1150 for (dim_t k0=0; k0<m_NE[0]; k0++)
1151 owner[offset+k0] = rank-m_NX[0];
1152 }
1153 }
1154 } else {
1155 throw ValueError("getOwnerVector: only valid for element types");
1156 }
1157
1158 return owner;
1159 }
1160
1161 void Brick::setToNormal(escript::Data& out) const
1162 {
1163 const dim_t NE0 = m_NE[0];
1164 const dim_t NE1 = m_NE[1];
1165 const dim_t NE2 = m_NE[2];
1166
1167 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1168 out.requireWrite();
1169 #pragma omp parallel
1170 {
1171 if (m_faceOffset[0] > -1) {
1172 #pragma omp for nowait
1173 for (index_t k2 = 0; k2 < NE2; ++k2) {
1174 for (index_t k1 = 0; k1 < NE1; ++k1) {
1175 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1176 // set vector at four quadrature points
1177 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1178 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1179 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1180 *o++ = -1.; *o++ = 0.; *o = 0.;
1181 }
1182 }
1183 }
1184
1185 if (m_faceOffset[1] > -1) {
1186 #pragma omp for nowait
1187 for (index_t k2 = 0; k2 < NE2; ++k2) {
1188 for (index_t k1 = 0; k1 < NE1; ++k1) {
1189 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1190 // set vector at four quadrature points
1191 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1192 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1193 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1194 *o++ = 1.; *o++ = 0.; *o = 0.;
1195 }
1196 }
1197 }
1198
1199 if (m_faceOffset[2] > -1) {
1200 #pragma omp for nowait
1201 for (index_t k2 = 0; k2 < NE2; ++k2) {
1202 for (index_t k0 = 0; k0 < NE0; ++k0) {
1203 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1204 // set vector at four quadrature points
1205 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1206 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1207 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1208 *o++ = 0.; *o++ = -1.; *o = 0.;
1209 }
1210 }
1211 }
1212
1213 if (m_faceOffset[3] > -1) {
1214 #pragma omp for nowait
1215 for (index_t k2 = 0; k2 < NE2; ++k2) {
1216 for (index_t k0 = 0; k0 < NE0; ++k0) {
1217 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1218 // set vector at four quadrature points
1219 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1220 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1221 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1222 *o++ = 0.; *o++ = 1.; *o = 0.;
1223 }
1224 }
1225 }
1226
1227 if (m_faceOffset[4] > -1) {
1228 #pragma omp for nowait
1229 for (index_t k1 = 0; k1 < NE1; ++k1) {
1230 for (index_t k0 = 0; k0 < NE0; ++k0) {
1231 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1232 // set vector at four quadrature points
1233 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1234 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1235 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1236 *o++ = 0.; *o++ = 0.; *o = -1.;
1237 }
1238 }
1239 }
1240
1241 if (m_faceOffset[5] > -1) {
1242 #pragma omp for nowait
1243 for (index_t k1 = 0; k1 < NE1; ++k1) {
1244 for (index_t k0 = 0; k0 < NE0; ++k0) {
1245 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1246 // set vector at four quadrature points
1247 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1248 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1249 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1250 *o++ = 0.; *o++ = 0.; *o = 1.;
1251 }
1252 }
1253 }
1254 } // end of parallel section
1255 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1256 out.requireWrite();
1257 #pragma omp parallel
1258 {
1259 if (m_faceOffset[0] > -1) {
1260 #pragma omp for nowait
1261 for (index_t k2 = 0; k2 < NE2; ++k2) {
1262 for (index_t k1 = 0; k1 < NE1; ++k1) {
1263 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1264 *o++ = -1.;
1265 *o++ = 0.;
1266 *o = 0.;
1267 }
1268 }
1269 }
1270
1271 if (m_faceOffset[1] > -1) {
1272 #pragma omp for nowait
1273 for (index_t k2 = 0; k2 < NE2; ++k2) {
1274 for (index_t k1 = 0; k1 < NE1; ++k1) {
1275 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1276 *o++ = 1.;
1277 *o++ = 0.;
1278 *o = 0.;
1279 }
1280 }
1281 }
1282
1283 if (m_faceOffset[2] > -1) {
1284 #pragma omp for nowait
1285 for (index_t k2 = 0; k2 < NE2; ++k2) {
1286 for (index_t k0 = 0; k0 < NE0; ++k0) {
1287 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1288 *o++ = 0.;
1289 *o++ = -1.;
1290 *o = 0.;
1291 }
1292 }
1293 }
1294
1295 if (m_faceOffset[3] > -1) {
1296 #pragma omp for nowait
1297 for (index_t k2 = 0; k2 < NE2; ++k2) {
1298 for (index_t k0 = 0; k0 < NE0; ++k0) {
1299 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1300 *o++ = 0.;
1301 *o++ = 1.;
1302 *o = 0.;
1303 }
1304 }
1305 }
1306
1307 if (m_faceOffset[4] > -1) {
1308 #pragma omp for nowait
1309 for (index_t k1 = 0; k1 < NE1; ++k1) {
1310 for (index_t k0 = 0; k0 < NE0; ++k0) {
1311 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1312 *o++ = 0.;
1313 *o++ = 0.;
1314 *o = -1.;
1315 }
1316 }
1317 }
1318
1319 if (m_faceOffset[5] > -1) {
1320 #pragma omp for nowait
1321 for (index_t k1 = 0; k1 < NE1; ++k1) {
1322 for (index_t k0 = 0; k0 < NE0; ++k0) {
1323 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1324 *o++ = 0.;
1325 *o++ = 0.;
1326 *o = 1.;
1327 }
1328 }
1329 }
1330 } // end of parallel section
1331
1332 } else {
1333 std::stringstream msg;
1334 msg << "setToNormal: invalid function space type "
1335 << out.getFunctionSpace().getTypeCode();
1336 throw ValueError(msg.str());
1337 }
1338 }
1339
1340 void Brick::setToSize(escript::Data& out) const
1341 {
1342 if (out.getFunctionSpace().getTypeCode() == Elements
1343 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1344 out.requireWrite();
1345 const dim_t numQuad=out.getNumDataPointsPerSample();
1346 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1347 const dim_t NE = getNumElements();
1348 #pragma omp parallel for
1349 for (index_t k = 0; k < NE; ++k) {
1350 double* o = out.getSampleDataRW(k);
1351 fill(o, o+numQuad, size);
1352 }
1353 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1354 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1355 out.requireWrite();
1356 const dim_t numQuad=out.getNumDataPointsPerSample();
1357 const dim_t NE0 = m_NE[0];
1358 const dim_t NE1 = m_NE[1];
1359 const dim_t NE2 = m_NE[2];
1360 #pragma omp parallel
1361 {
1362 if (m_faceOffset[0] > -1) {
1363 const double size=min(m_dx[1],m_dx[2]);
1364 #pragma omp for nowait
1365 for (index_t k2 = 0; k2 < NE2; ++k2) {
1366 for (index_t k1 = 0; k1 < NE1; ++k1) {
1367 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1368 fill(o, o+numQuad, size);
1369 }
1370 }
1371 }
1372
1373 if (m_faceOffset[1] > -1) {
1374 const double size=min(m_dx[1],m_dx[2]);
1375 #pragma omp for nowait
1376 for (index_t k2 = 0; k2 < NE2; ++k2) {
1377 for (index_t k1 = 0; k1 < NE1; ++k1) {
1378 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1379 fill(o, o+numQuad, size);
1380 }
1381 }
1382 }
1383
1384 if (m_faceOffset[2] > -1) {
1385 const double size=min(m_dx[0],m_dx[2]);
1386 #pragma omp for nowait
1387 for (index_t k2 = 0; k2 < NE2; ++k2) {
1388 for (index_t k0 = 0; k0 < NE0; ++k0) {
1389 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1390 fill(o, o+numQuad, size);
1391 }
1392 }
1393 }
1394
1395 if (m_faceOffset[3] > -1) {
1396 const double size=min(m_dx[0],m_dx[2]);
1397 #pragma omp for nowait
1398 for (index_t k2 = 0; k2 < NE2; ++k2) {
1399 for (index_t k0 = 0; k0 < NE0; ++k0) {
1400 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1401 fill(o, o+numQuad, size);
1402 }
1403 }
1404 }
1405
1406 if (m_faceOffset[4] > -1) {
1407 const double size=min(m_dx[0],m_dx[1]);
1408 #pragma omp for nowait
1409 for (index_t k1 = 0; k1 < NE1; ++k1) {
1410 for (index_t k0 = 0; k0 < NE0; ++k0) {
1411 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1412 fill(o, o+numQuad, size);
1413 }
1414 }
1415 }
1416
1417 if (m_faceOffset[5] > -1) {
1418 const double size=min(m_dx[0],m_dx[1]);
1419 #pragma omp for nowait
1420 for (index_t k1 = 0; k1 < NE1; ++k1) {
1421 for (index_t k0 = 0; k0 < NE0; ++k0) {
1422 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1423 fill(o, o+numQuad, size);
1424 }
1425 }
1426 }
1427 } // end of parallel section
1428
1429 } else {
1430 std::stringstream msg;
1431 msg << "setToSize: invalid function space type "
1432 << out.getFunctionSpace().getTypeCode();
1433 throw ValueError(msg.str());
1434 }
1435 }
1436
1437 void Brick::Print_Mesh_Info(const bool full) const
1438 {
1439 RipleyDomain::Print_Mesh_Info(full);
1440 if (full) {
1441 std::cout << " Id Coordinates" << std::endl;
1442 std::cout.precision(15);
1443 std::cout.setf(ios::scientific, std::ios::floatfield);
1444 for (index_t i=0; i < getNumNodes(); i++) {
1445 std::cout << " " << std::setw(5) << m_nodeId[i]
1446 << " " << getLocalCoordinate(i%m_NN[0], 0)
1447 << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1448 << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << std::endl;
1449 }
1450 }
1451 }
1452
1453
1454 //protected
1455 void Brick::assembleCoordinates(escript::Data& arg) const
1456 {
1457 int numDim = m_numDim;
1458 if (!arg.isDataPointShapeEqual(1, &numDim))
1459 throw ValueError("setToX: Invalid Data object shape");
1460 if (!arg.numSamplesEqual(1, getNumNodes()))
1461 throw ValueError("setToX: Illegal number of samples in Data object");
1462
1463 const dim_t NN0 = m_NN[0];
1464 const dim_t NN1 = m_NN[1];
1465 const dim_t NN2 = m_NN[2];
1466 arg.requireWrite();
1467 #pragma omp parallel for
1468 for (dim_t i2 = 0; i2 < NN2; i2++) {
1469 for (dim_t i1 = 0; i1 < NN1; i1++) {
1470 for (dim_t i0 = 0; i0 < NN0; i0++) {
1471 double* point = arg.getSampleDataRW(i0+NN0*i1+NN0*NN1*i2);
1472 point[0] = getLocalCoordinate(i0, 0);
1473 point[1] = getLocalCoordinate(i1, 1);
1474 point[2] = getLocalCoordinate(i2, 2);
1475 }
1476 }
1477 }
1478 }
1479
1480 //protected
1481 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1482 {
1483 const dim_t numComp = in.getDataPointSize();
1484 const double C0 = .044658198738520451079;
1485 const double C1 = .16666666666666666667;
1486 const double C2 = .21132486540518711775;
1487 const double C3 = .25;
1488 const double C4 = .5;
1489 const double C5 = .62200846792814621559;
1490 const double C6 = .78867513459481288225;
1491 const dim_t NE0 = m_NE[0];
1492 const dim_t NE1 = m_NE[1];
1493 const dim_t NE2 = m_NE[2];
1494
1495 if (out.getFunctionSpace().getTypeCode() == Elements) {
1496 out.requireWrite();
1497 #pragma omp parallel
1498 {
1499 vector<double> f_000(numComp);
1500 vector<double> f_001(numComp);
1501 vector<double> f_010(numComp);
1502 vector<double> f_011(numComp);
1503 vector<double> f_100(numComp);
1504 vector<double> f_101(numComp);
1505 vector<double> f_110(numComp);
1506 vector<double> f_111(numComp);
1507 #pragma omp for
1508 for (index_t k2=0; k2 < NE2; ++k2) {
1509 for (index_t k1=0; k1 < NE1; ++k1) {
1510 for (index_t k0=0; k0 < NE0; ++k0) {
1511 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1512 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1513 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1514 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1515 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1516 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1517 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1518 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1519 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,NE0,NE1));
1520 for (index_t i=0; i < numComp; ++i) {
1521 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1522 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1523 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1524 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1525 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1526 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1527 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1528 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1529 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1530 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1531 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1532 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1533 o[INDEX3(i,0,0,numComp,3)] = V0;
1534 o[INDEX3(i,1,0,numComp,3)] = V4;
1535 o[INDEX3(i,2,0,numComp,3)] = V8;
1536 o[INDEX3(i,0,1,numComp,3)] = V0;
1537 o[INDEX3(i,1,1,numComp,3)] = V5;
1538 o[INDEX3(i,2,1,numComp,3)] = V9;
1539 o[INDEX3(i,0,2,numComp,3)] = V1;
1540 o[INDEX3(i,1,2,numComp,3)] = V4;
1541 o[INDEX3(i,2,2,numComp,3)] = V10;
1542 o[INDEX3(i,0,3,numComp,3)] = V1;
1543 o[INDEX3(i,1,3,numComp,3)] = V5;
1544 o[INDEX3(i,2,3,numComp,3)] = V11;
1545 o[INDEX3(i,0,4,numComp,3)] = V2;
1546 o[INDEX3(i,1,4,numComp,3)] = V6;
1547 o[INDEX3(i,2,4,numComp,3)] = V8;
1548 o[INDEX3(i,0,5,numComp,3)] = V2;
1549 o[INDEX3(i,1,5,numComp,3)] = V7;
1550 o[INDEX3(i,2,5,numComp,3)] = V9;
1551 o[INDEX3(i,0,6,numComp,3)] = V3;
1552 o[INDEX3(i,1,6,numComp,3)] = V6;
1553 o[INDEX3(i,2,6,numComp,3)] = V10;
1554 o[INDEX3(i,0,7,numComp,3)] = V3;
1555 o[INDEX3(i,1,7,numComp,3)] = V7;
1556 o[INDEX3(i,2,7,numComp,3)] = V11;
1557 } // end of component loop i
1558 } // end of k0 loop
1559 } // end of k1 loop
1560 } // end of k2 loop
1561 } // end of parallel section
1562 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1563 out.requireWrite();
1564 #pragma omp parallel
1565 {
1566 vector<double> f_000(numComp);
1567 vector<double> f_001(numComp);
1568 vector<double> f_010(numComp);
1569 vector<double> f_011(numComp);
1570 vector<double> f_100(numComp);
1571 vector<double> f_101(numComp);
1572 vector<double> f_110(numComp);
1573 vector<double> f_111(numComp);
1574 #pragma omp for
1575 for (index_t k2=0; k2 < NE2; ++k2) {
1576 for (index_t k1=0; k1 < NE1; ++k1) {
1577 for (index_t k0=0; k0 < NE0; ++k0) {
1578 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1579 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1580 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1581 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1582 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1583 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1584 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1585 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1586 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,NE0,NE1));
1587 for (index_t i=0; i < numComp; ++i) {
1588 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1589 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1590 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1591 } // end of component loop i
1592 } // end of k0 loop
1593 } // end of k1 loop
1594 } // end of k2 loop
1595 } // end of parallel section
1596 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1597 out.requireWrite();
1598 #pragma omp parallel
1599 {
1600 vector<double> f_000(numComp);
1601 vector<double> f_001(numComp);
1602 vector<double> f_010(numComp);
1603 vector<double> f_011(numComp);
1604 vector<double> f_100(numComp);
1605 vector<double> f_101(numComp);
1606 vector<double> f_110(numComp);
1607 vector<double> f_111(numComp);
1608 if (m_faceOffset[0] > -1) {
1609 #pragma omp for nowait
1610 for (index_t k2=0; k2 < NE2; ++k2) {
1611 for (index_t k1=0; k1 < NE1; ++k1) {
1612 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1613 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1614 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1615 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1616 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1617 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1618 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1619 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1620 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,NE1));
1621 for (index_t i=0; i < numComp; ++i) {
1622 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1623 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1624 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1625 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1626 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1627 o[INDEX3(i,1,0,numComp,3)] = V0;
1628 o[INDEX3(i,2,0,numComp,3)] = V2;
1629 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1630 o[INDEX3(i,1,1,numComp,3)] = V0;
1631 o[INDEX3(i,2,1,numComp,3)] = V3;
1632 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1633 o[INDEX3(i,1,2,numComp,3)] = V1;
1634 o[INDEX3(i,2,2,numComp,3)] = V2;
1635 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1636 o[INDEX3(i,1,3,numComp,3)] = V1;
1637 o[INDEX3(i,2,3,numComp,3)] = V3;
1638 } // end of component loop i
1639 } // end of k1 loop
1640 } // end of k2 loop
1641 } // end of face 0
1642 if (m_faceOffset[1] > -1) {
1643 #pragma omp for nowait
1644 for (index_t k2=0; k2 < NE2; ++k2) {
1645 for (index_t k1=0; k1 < NE1; ++k1) {
1646 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1647 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1648 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1649 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1650 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1651 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1652 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1653 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1654 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,NE1));
1655 for (index_t i=0; i < numComp; ++i) {
1656 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1657 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1658 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1659 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1660 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1661 o[INDEX3(i,1,0,numComp,3)] = V0;
1662 o[INDEX3(i,2,0,numComp,3)] = V2;
1663 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1664 o[INDEX3(i,1,1,numComp,3)] = V0;
1665 o[INDEX3(i,2,1,numComp,3)] = V3;
1666 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1667 o[INDEX3(i,1,2,numComp,3)] = V1;
1668 o[INDEX3(i,2,2,numComp,3)] = V2;
1669 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1670 o[INDEX3(i,1,3,numComp,3)] = V1;
1671 o[INDEX3(i,2,3,numComp,3)] = V3;
1672 } // end of component loop i
1673 } // end of k1 loop
1674 } // end of k2 loop
1675 } // end of face 1
1676 if (m_faceOffset[2] > -1) {
1677 #pragma omp for nowait
1678 for (index_t k2=0; k2 < NE2; ++k2) {
1679 for (index_t k0=0; k0 < NE0; ++k0) {
1680 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1681 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1682 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1683 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1684 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1685 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1686 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1687 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1688 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,NE0));
1689 for (index_t i=0; i < numComp; ++i) {
1690 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1691 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1692 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1693 o[INDEX3(i,0,0,numComp,3)] = V0;
1694 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1695 o[INDEX3(i,2,0,numComp,3)] = V1;
1696 o[INDEX3(i,0,1,numComp,3)] = V0;
1697 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1698 o[INDEX3(i,2,1,numComp,3)] = V2;
1699 o[INDEX3(i,0,2,numComp,3)] = V0;
1700 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1701 o[INDEX3(i,2,2,numComp,3)] = V1;
1702 o[INDEX3(i,0,3,numComp,3)] = V0;
1703 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1704 o[INDEX3(i,2,3,numComp,3)] = V2;
1705 } // end of component loop i
1706 } // end of k0 loop
1707 } // end of k2 loop
1708 } // end of face 2
1709 if (m_faceOffset[3] > -1) {
1710 #pragma omp for nowait
1711 for (index_t k2=0; k2 < NE2; ++k2) {
1712 for (index_t k0=0; k0 < NE0; ++k0) {
1713 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1714 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1715 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1716 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1717 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1718 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1719 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1720 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1721 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,NE0));
1722 for (index_t i=0; i < numComp; ++i) {
1723 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1724 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1725 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1726 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1727 o[INDEX3(i,0,0,numComp,3)] = V0;
1728 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1729 o[INDEX3(i,2,0,numComp,3)] = V2;
1730 o[INDEX3(i,0,1,numComp,3)] = V0;
1731 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1732 o[INDEX3(i,2,1,numComp,3)] = V3;
1733 o[INDEX3(i,0,2,numComp,3)] = V1;
1734 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1735 o[INDEX3(i,2,2,numComp,3)] = V2;
1736 o[INDEX3(i,0,3,numComp,3)] = V1;
1737 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1738 o[INDEX3(i,2,3,numComp,3)] = V3;
1739 } // end of component loop i
1740 } // end of k0 loop
1741 } // end of k2 loop
1742 } // end of face 3
1743 if (m_faceOffset[4] > -1) {
1744 #pragma omp for nowait
1745 for (index_t k1=0; k1 < NE1; ++k1) {
1746 for (index_t k0=0; k0 < NE0; ++k0) {
1747 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1748 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1749 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1750 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1751 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1752 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1753 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1754 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1755 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,NE0));
1756 for (index_t i=0; i < numComp; ++i) {
1757 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1758 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1759 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1760 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1761 o[INDEX3(i,0,0,numComp,3)] = V0;
1762 o[INDEX3(i,1,0,numComp,3)] = V2;
1763 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1764 o[INDEX3(i,0,1,numComp,3)] = V0;
1765 o[INDEX3(i,1,1,numComp,3)] = V3;
1766 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1767 o[INDEX3(i,0,2,numComp,3)] = V1;
1768 o[INDEX3(i,1,2,numComp,3)] = V2;
1769 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1770 o[INDEX3(i,0,3,numComp,3)] = V1;
1771 o[INDEX3(i,1,3,numComp,3)] = V3;
1772 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1773 } // end of component loop i
1774 } // end of k0 loop
1775 } // end of k1 loop
1776 } // end of face 4
1777 if (m_faceOffset[5] > -1) {
1778 #pragma omp for nowait
1779 for (index_t k1=0; k1 < NE1; ++k1) {
1780 for (index_t k0=0; k0 < NE0; ++k0) {
1781 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1782 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1783 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1784 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1785 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1786 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1787 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1788 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1789 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,NE0));
1790 for (index_t i=0; i < numComp; ++i) {
1791 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1792 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1793 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1794 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1795 o[INDEX3(i,0,0,numComp,3)] = V0;
1796 o[INDEX3(i,1,0,numComp,3)] = V2;
1797 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1798 o[INDEX3(i,0,1,numComp,3)] = V0;
1799 o[INDEX3(i,1,1,numComp,3)] = V3;
1800 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1801 o[INDEX3(i,0,2,numComp,3)] = V1;
1802 o[INDEX3(i,1,2,numComp,3)] = V2;
1803 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1804 o[INDEX3(i,0,3,numComp,3)] = V1;
1805 o[INDEX3(i,1,3,numComp,3)] = V3;
1806 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1807 } // end of component loop i
1808 } // end of k0 loop
1809 } // end of k1 loop
1810 } // end of face 5
1811 } // end of parallel section
1812 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1813 out.requireWrite();
1814 #pragma omp parallel
1815 {
1816 vector<double> f_000(numComp);
1817 vector<double> f_001(numComp);
1818 vector<double> f_010(numComp);
1819 vector<double> f_011(numComp);
1820 vector<double> f_100(numComp);
1821 vector<double> f_101(numComp);
1822 vector<double> f_110(numComp);
1823 vector<double> f_111(numComp);
1824 if (m_faceOffset[0] > -1) {
1825 #pragma omp for nowait
1826 for (index_t k2=0; k2 < NE2; ++k2) {
1827 for (index_t k1=0; k1 < NE1; ++k1) {
1828 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1829 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1830 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1831 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1832 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1833 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1834 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1835 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1836 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,NE1));
1837 for (index_t i=0; i < numComp; ++i) {
1838 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1839 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1840 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1841 } // end of component loop i
1842 } // end of k1 loop
1843 } // end of k2 loop
1844 } // end of face 0
1845 if (m_faceOffset[1] > -1) {
1846 #pragma omp for nowait
1847 for (index_t k2=0; k2 < NE2; ++k2) {
1848 for (index_t k1=0; k1 < NE1; ++k1) {
1849 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1850 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1851 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1852 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1853 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1854 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1855 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1856 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1857 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,NE1));
1858 for (index_t i=0; i < numComp; ++i) {
1859 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1860 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1861 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1862 } // end of component loop i
1863 } // end of k1 loop
1864 } // end of k2 loop
1865 } // end of face 1
1866 if (m_faceOffset[2] > -1) {
1867 #pragma omp for nowait
1868 for (index_t k2=0; k2 < NE2; ++k2) {
1869 for (index_t k0=0; k0 < NE0; ++k0) {
1870 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1871 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1872 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1873 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1874 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1875 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1876 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1877 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1878 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,NE0));
1879 for (index_t i=0; i < numComp; ++i) {
1880 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / m_dx[0];
1881 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1882 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / m_dx[2];
1883 } // end of component loop i
1884 } // end of k0 loop
1885 } // end of k2 loop
1886 } // end of face 2
1887 if (m_faceOffset[3] > -1) {
1888 #pragma omp for nowait
1889 for (index_t k2=0; k2 < NE2; ++k2) {
1890 for (index_t k0=0; k0 < NE0; ++k0) {
1891 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1892 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1893 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1894 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1895 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1896 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1897 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1898 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1899 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,NE0));
1900 for (index_t i=0; i < numComp; ++i) {
1901 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / m_dx[0];
1902 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / <