/[escript]/trunk/ripley/src/Brick.cpp
ViewVC logotype

Contents of /trunk/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6141 - (show annotations)
Wed Apr 6 03:51:30 2016 UTC (2 years, 8 months ago) by caltinay
File size: 176538 byte(s)
more namespacing of defines.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <ripley/Brick.h>
18 #include <ripley/DefaultAssembler3D.h>
19 #include <ripley/LameAssembler3D.h>
20 #include <ripley/WaveAssembler3D.h>
21 #include <ripley/blocktools.h>
22 #include <ripley/domainhelpers.h>
23
24 #include <escript/FileWriter.h>
25 #include <escript/Random.h>
26
27 #include <paso/SystemMatrix.h>
28
29 #ifdef USE_NETCDF
30 #include <netcdfcpp.h>
31 #endif
32
33 #if USE_SILO
34 #include <silo.h>
35 #ifdef ESYS_MPI
36 #include <pmpio.h>
37 #endif
38 #endif
39
40 #include <boost/math/special_functions/fpclassify.hpp> // for isnan
41 #include <boost/scoped_array.hpp>
42
43 #include <iomanip>
44 #include <limits>
45
46 namespace bp = boost::python;
47 namespace bm = boost::math;
48 using escript::AbstractSystemMatrix;
49 using escript::FileWriter;
50 using escript::NotImplementedError;
51 using escript::ValueError;
52 using std::vector;
53 using std::string;
54 using std::min;
55 using std::max;
56 using std::ios;
57 using std::fill;
58
59 namespace ripley {
60
61 inline int indexOfMax(dim_t a, dim_t b, dim_t c)
62 {
63 if (a > b) {
64 if (c > a) {
65 return 2;
66 }
67 return 0;
68 } else if (b > c) {
69 return 1;
70 }
71 return 2;
72 }
73
74 Brick::Brick(dim_t n0, dim_t n1, dim_t n2, double x0, double y0, double z0,
75 double x1, double y1, double z1, int d0, int d1, int d2,
76 const vector<double>& points, const vector<int>& tags,
77 const TagMap& tagnamestonums,
78 escript::SubWorld_ptr w) :
79 RipleyDomain(3, w)
80 {
81 if (static_cast<long>(n0 + 1) * static_cast<long>(n1 + 1)
82 * static_cast<long>(n2 + 1) > std::numeric_limits<dim_t>::max())
83 throw RipleyException("The number of elements has overflowed, this "
84 "limit may be raised in future releases.");
85
86 if (n0 <= 0 || n1 <= 0 || n2 <= 0)
87 throw ValueError("Number of elements in each spatial dimension "
88 "must be positive");
89
90 // ignore subdivision parameters for serial run
91 if (m_mpiInfo->size == 1) {
92 d0=1;
93 d1=1;
94 d2=1;
95 }
96 bool warn=false;
97
98 vector<int> factors;
99 int ranks = m_mpiInfo->size;
100 dim_t epr[3] = {n0,n1,n2};
101 int d[3] = {d0,d1,d2};
102 if (d0<=0 || d1<=0 || d2<=0) {
103 for (int i = 0; i < 3; i++) {
104 if (d[i] < 1) {
105 d[i] = 1;
106 continue;
107 }
108 epr[i] = -1; // can no longer be max
109 if (ranks % d[i] != 0) {
110 throw ValueError("Invalid number of spatial subdivisions");
111 }
112 //remove
113 ranks /= d[i];
114 }
115 factorise(factors, ranks);
116 if (factors.size() != 0) {
117 warn = true;
118 }
119 }
120 while (factors.size() > 0) {
121 int i = indexOfMax(epr[0],epr[1],epr[2]);
122 int f = factors.back();
123 factors.pop_back();
124 d[i] *= f;
125 epr[i] /= f;
126 }
127 d0 = d[0]; d1 = d[1]; d2 = d[2];
128
129 // ensure number of subdivisions is valid and nodes can be distributed
130 // among number of ranks
131 if (d0*d1*d2 != m_mpiInfo->size){
132 throw ValueError("Invalid number of spatial subdivisions");
133 }
134 if (warn) {
135 std::cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
136 << d1 << ", d2=" << d2 << "). This may not be optimal!" << std::endl;
137 }
138
139 double l0 = x1-x0;
140 double l1 = y1-y0;
141 double l2 = z1-z0;
142 m_dx[0] = l0/n0;
143 m_dx[1] = l1/n1;
144 m_dx[2] = l2/n2;
145
146 if ((n0+1)%d0 > 0) {
147 n0=(dim_t)round((float)(n0+1)/d0+0.5)*d0-1;
148 l0=m_dx[0]*n0;
149 std::cout << "Warning: Adjusted number of elements and length. N0="
150 << n0 << ", l0=" << l0 << std::endl;
151 }
152 if ((n1+1)%d1 > 0) {
153 n1=(dim_t)round((float)(n1+1)/d1+0.5)*d1-1;
154 l1=m_dx[1]*n1;
155 std::cout << "Warning: Adjusted number of elements and length. N1="
156 << n1 << ", l1=" << l1 << std::endl;
157 }
158 if ((n2+1)%d2 > 0) {
159 n2=(dim_t)round((float)(n2+1)/d2+0.5)*d2-1;
160 l2=m_dx[2]*n2;
161 std::cout << "Warning: Adjusted number of elements and length. N2="
162 << n2 << ", l2=" << l2 << std::endl;
163 }
164
165 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
166 throw ValueError("Too few elements for the number of ranks");
167
168 m_gNE[0] = n0;
169 m_gNE[1] = n1;
170 m_gNE[2] = n2;
171 m_origin[0] = x0;
172 m_origin[1] = y0;
173 m_origin[2] = z0;
174 m_length[0] = l0;
175 m_length[1] = l1;
176 m_length[2] = l2;
177 m_NX[0] = d0;
178 m_NX[1] = d1;
179 m_NX[2] = d2;
180
181 // local number of elements (including overlap)
182 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
183 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
184 m_NE[0]++;
185 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
186 m_ownNE[0]--;
187
188 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
189 if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
190 m_NE[1]++;
191 else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
192 m_ownNE[1]--;
193
194 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
195 if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
196 m_NE[2]++;
197 else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
198 m_ownNE[2]--;
199
200 // local number of nodes
201 m_NN[0] = m_NE[0]+1;
202 m_NN[1] = m_NE[1]+1;
203 m_NN[2] = m_NE[2]+1;
204
205 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
206 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
207 if (m_offset[0] > 0)
208 m_offset[0]--;
209 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
210 if (m_offset[1] > 0)
211 m_offset[1]--;
212 m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
213 if (m_offset[2] > 0)
214 m_offset[2]--;
215
216 populateSampleIds();
217
218 for (TagMap::const_iterator i = tagnamestonums.begin();
219 i != tagnamestonums.end(); i++) {
220 setTagMap(i->first, i->second);
221 }
222 addPoints(points, tags);
223 }
224
225 Brick::~Brick()
226 {
227 }
228
229 string Brick::getDescription() const
230 {
231 return "ripley::Brick";
232 }
233
234 bool Brick::operator==(const escript::AbstractDomain& other) const
235 {
236 const Brick* o=dynamic_cast<const Brick*>(&other);
237 if (o) {
238 return (RipleyDomain::operator==(other) &&
239 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
240 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
241 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
242 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
243 }
244
245 return false;
246 }
247
248 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
249 const ReaderParameters& params) const
250 {
251 #ifdef USE_NETCDF
252 // check destination function space
253 dim_t myN0, myN1, myN2;
254 if (out.getFunctionSpace().getTypeCode() == Nodes) {
255 myN0 = m_NN[0];
256 myN1 = m_NN[1];
257 myN2 = m_NN[2];
258 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
259 out.getFunctionSpace().getTypeCode() == ReducedElements) {
260 myN0 = m_NE[0];
261 myN1 = m_NE[1];
262 myN2 = m_NE[2];
263 } else
264 throw ValueError("readNcGrid(): invalid function space for output data object");
265
266 if (params.first.size() != 3)
267 throw ValueError("readNcGrid(): argument 'first' must have 3 entries");
268
269 if (params.numValues.size() != 3)
270 throw ValueError("readNcGrid(): argument 'numValues' must have 3 entries");
271
272 if (params.multiplier.size() != 3)
273 throw ValueError("readNcGrid(): argument 'multiplier' must have 3 entries");
274 for (size_t i=0; i<params.multiplier.size(); i++)
275 if (params.multiplier[i]<1)
276 throw ValueError("readNcGrid(): all multipliers must be positive");
277
278 // check file existence and size
279 NcFile f(filename.c_str(), NcFile::ReadOnly);
280 if (!f.is_valid())
281 throw RipleyException("readNcGrid(): cannot open file");
282
283 NcVar* var = f.get_var(varname.c_str());
284 if (!var)
285 throw RipleyException("readNcGrid(): invalid variable name");
286
287 // TODO: rank>0 data support
288 const int numComp = out.getDataPointSize();
289 if (numComp > 1)
290 throw RipleyException("readNcGrid(): only scalar data supported");
291
292 const int dims = var->num_dims();
293 boost::scoped_array<long> edges(var->edges());
294
295 // is this a slice of the data object (dims!=3)?
296 // note the expected ordering of edges (as in numpy: z,y,x)
297 if ( (dims==3 && (params.numValues[2] > edges[0] ||
298 params.numValues[1] > edges[1] ||
299 params.numValues[0] > edges[2]))
300 || (dims==2 && params.numValues[2]>1)
301 || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
302 throw RipleyException("readNcGrid(): not enough data in file");
303 }
304
305 // check if this rank contributes anything
306 if (params.first[0] >= m_offset[0]+myN0 ||
307 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
308 params.first[1] >= m_offset[1]+myN1 ||
309 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
310 params.first[2] >= m_offset[2]+myN2 ||
311 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
312 return;
313 }
314
315 // now determine how much this rank has to write
316
317 // first coordinates in data object to write to
318 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
319 const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
320 const dim_t first2 = max(dim_t(0), params.first[2]-m_offset[2]);
321 // indices to first value in file (not accounting for reverse yet)
322 dim_t idx0 = max(dim_t(0), m_offset[0]-params.first[0]);
323 dim_t idx1 = max(dim_t(0), m_offset[1]-params.first[1]);
324 dim_t idx2 = max(dim_t(0), m_offset[2]-params.first[2]);
325 // number of values to read
326 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
327 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
328 const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
329
330 // make sure we read the right block if going backwards through file
331 if (params.reverse[0])
332 idx0 = edges[dims-1]-num0-idx0;
333 if (dims>1 && params.reverse[1])
334 idx1 = edges[dims-2]-num1-idx1;
335 if (dims>2 && params.reverse[2])
336 idx2 = edges[dims-3]-num2-idx2;
337
338
339 vector<double> values(num0*num1*num2);
340 if (dims==3) {
341 var->set_cur(idx2, idx1, idx0);
342 var->get(&values[0], num2, num1, num0);
343 } else if (dims==2) {
344 var->set_cur(idx1, idx0);
345 var->get(&values[0], num1, num0);
346 } else {
347 var->set_cur(idx0);
348 var->get(&values[0], num0);
349 }
350
351 const int dpp = out.getNumDataPointsPerSample();
352 out.requireWrite();
353
354 // helpers for reversing
355 const dim_t x0 = (params.reverse[0] ? num0-1 : 0);
356 const int x_mult = (params.reverse[0] ? -1 : 1);
357 const dim_t y0 = (params.reverse[1] ? num1-1 : 0);
358 const int y_mult = (params.reverse[1] ? -1 : 1);
359 const dim_t z0 = (params.reverse[2] ? num2-1 : 0);
360 const int z_mult = (params.reverse[2] ? -1 : 1);
361
362 for (index_t z=0; z<num2; z++) {
363 for (index_t y=0; y<num1; y++) {
364 #pragma omp parallel for
365 for (index_t x=0; x<num0; x++) {
366 const dim_t baseIndex = first0+x*params.multiplier[0]
367 +(first1+y*params.multiplier[1])*myN0
368 +(first2+z*params.multiplier[2])*myN0*myN1;
369 const dim_t srcIndex=(z0+z_mult*z)*num1*num0
370 +(y0+y_mult*y)*num0
371 +(x0+x_mult*x);
372 if (!bm::isnan(values[srcIndex])) {
373 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
374 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
375 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
376 const dim_t dataIndex = baseIndex+m0
377 +m1*myN0
378 +m2*myN0*myN1;
379 double* dest = out.getSampleDataRW(dataIndex);
380 for (index_t q=0; q<dpp; q++) {
381 *dest++ = values[srcIndex];
382 }
383 }
384 }
385 }
386 }
387 }
388 }
389 }
390 #else
391 throw RipleyException("readNcGrid(): not compiled with netCDF support");
392 #endif
393 }
394
395 void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
396 const ReaderParameters& params) const
397 {
398 #ifdef ESYS_HAVE_BOOST_IO
399 // the mapping is not universally correct but should work on our
400 // supported platforms
401 switch (params.dataType) {
402 case DATATYPE_INT32:
403 readBinaryGridZippedImpl<int>(out, filename, params);
404 break;
405 case DATATYPE_FLOAT32:
406 readBinaryGridZippedImpl<float>(out, filename, params);
407 break;
408 case DATATYPE_FLOAT64:
409 readBinaryGridZippedImpl<double>(out, filename, params);
410 break;
411 default:
412 throw ValueError("readBinaryGridZipped(): invalid or unsupported datatype");
413 }
414 #else
415 throw RipleyException("readBinaryGridZipped(): not compiled with zip support");
416 #endif
417 }
418
419 void Brick::readBinaryGrid(escript::Data& out, string filename,
420 const ReaderParameters& params) const
421 {
422 // the mapping is not universally correct but should work on our
423 // supported platforms
424 switch (params.dataType) {
425 case DATATYPE_INT32:
426 readBinaryGridImpl<int>(out, filename, params);
427 break;
428 case DATATYPE_FLOAT32:
429 readBinaryGridImpl<float>(out, filename, params);
430 break;
431 case DATATYPE_FLOAT64:
432 readBinaryGridImpl<double>(out, filename, params);
433 break;
434 default:
435 throw ValueError("readBinaryGrid(): invalid or unsupported datatype");
436 }
437 }
438
439 template<typename ValueType>
440 void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
441 const ReaderParameters& params) const
442 {
443 // check destination function space
444 dim_t myN0, myN1, myN2;
445 if (out.getFunctionSpace().getTypeCode() == Nodes) {
446 myN0 = m_NN[0];
447 myN1 = m_NN[1];
448 myN2 = m_NN[2];
449 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
450 out.getFunctionSpace().getTypeCode() == ReducedElements) {
451 myN0 = m_NE[0];
452 myN1 = m_NE[1];
453 myN2 = m_NE[2];
454 } else
455 throw ValueError("readBinaryGrid(): invalid function space for output data object");
456
457 if (params.first.size() != 3)
458 throw ValueError("readBinaryGrid(): argument 'first' must have 3 entries");
459
460 if (params.numValues.size() != 3)
461 throw ValueError("readBinaryGrid(): argument 'numValues' must have 3 entries");
462
463 if (params.multiplier.size() != 3)
464 throw ValueError("readBinaryGrid(): argument 'multiplier' must have 3 entries");
465 for (size_t i=0; i<params.multiplier.size(); i++)
466 if (params.multiplier[i]<1)
467 throw ValueError("readBinaryGrid(): all multipliers must be positive");
468 if (params.reverse[0] != 0 || params.reverse[1] != 0)
469 throw RipleyException("readBinaryGrid(): reversing only supported in Z-direction currently");
470
471 // check file existence and size
472 std::ifstream f(filename.c_str(), std::ifstream::binary);
473 if (f.fail()) {
474 throw RipleyException("readBinaryGrid(): cannot open file");
475 }
476 f.seekg(0, std::ios::end);
477 const int numComp = out.getDataPointSize();
478 const dim_t filesize = f.tellg();
479 const dim_t reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
480 if (filesize < reqsize) {
481 f.close();
482 throw RipleyException("readBinaryGrid(): not enough data in file");
483 }
484
485 // check if this rank contributes anything
486 if (params.first[0] >= m_offset[0]+myN0 ||
487 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
488 params.first[1] >= m_offset[1]+myN1 ||
489 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
490 params.first[2] >= m_offset[2]+myN2 ||
491 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
492 f.close();
493 return;
494 }
495
496 // now determine how much this rank has to write
497
498 // first coordinates in data object to write to
499 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
500 const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
501 const dim_t first2 = max(dim_t(0), params.first[2]-m_offset[2]);
502 // indices to first value in file (not accounting for reverse yet)
503 dim_t idx0 = max(dim_t(0), (m_offset[0]/params.multiplier[0])-params.first[0]);
504 dim_t idx1 = max(dim_t(0), (m_offset[1]/params.multiplier[1])-params.first[1]);
505 dim_t idx2 = max(dim_t(0), (m_offset[2]/params.multiplier[2])-params.first[2]);
506 // if restX > 0 the first value in the respective dimension has been
507 // written restX times already in a previous rank so this rank only
508 // contributes (multiplier-rank) copies of that value
509 const dim_t rest0 = m_offset[0]%params.multiplier[0];
510 const dim_t rest1 = m_offset[1]%params.multiplier[1];
511 const dim_t rest2 = m_offset[2]%params.multiplier[2];
512
513 // number of values to read
514 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
515 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
516 const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
517
518 // make sure we read the right block if going backwards through file
519 if (params.reverse[2])
520 idx2 = params.numValues[2]-idx2-1;
521
522 // helpers for reversing
523 const int z_mult = (params.reverse[2] ? -1 : 1);
524
525 out.requireWrite();
526 vector<ValueType> values(num0*numComp);
527 const int dpp = out.getNumDataPointsPerSample();
528
529 for (dim_t z=0; z<num2; z++) {
530 const dim_t m2limit = (z==0 ? params.multiplier[2]-rest2 : params.multiplier[2]);
531 dim_t dataZbase = first2 + z*params.multiplier[2];
532 if (z>0)
533 dataZbase -= rest2;
534
535 for (dim_t y=0; y<num1; y++) {
536 const dim_t fileofs = numComp*(idx0 +
537 (idx1+y)*params.numValues[0] +
538 (idx2+z_mult*z)*params.numValues[0]*params.numValues[1]);
539 f.seekg(fileofs*sizeof(ValueType));
540 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
541 const dim_t m1limit = (y==0 ? params.multiplier[1]-rest1 : params.multiplier[1]);
542 dim_t dataYbase = first1 + y*params.multiplier[1];
543 if (y>0)
544 dataYbase -= rest1;
545
546 for (dim_t x=0; x<num0; x++) {
547 const dim_t m0limit = (x==0 ? params.multiplier[0]-rest0 : params.multiplier[0]);
548 dim_t dataXbase = first0 + x*params.multiplier[0];
549 if (x>0)
550 dataXbase -= rest0;
551 // write a block of mult0 x mult1 x mult2 identical values into
552 // Data object
553 for (dim_t m2=0; m2 < m2limit; m2++) {
554 const dim_t dataZ = dataZbase + m2;
555 if (dataZ >= myN2)
556 break;
557 for (dim_t m1=0; m1 < m1limit; m1++) {
558 const dim_t dataY = dataYbase + m1;
559 if (dataY >= myN1)
560 break;
561 for (dim_t m0=0; m0 < m0limit; m0++) {
562 const dim_t dataX = dataXbase + m0;
563 if (dataX >= myN0)
564 break;
565 const dim_t dataIndex = dataX + dataY*myN0 + dataZ*myN0*myN1;
566 double* dest = out.getSampleDataRW(dataIndex);
567 for (int c=0; c<numComp; c++) {
568 ValueType val = values[x*numComp+c];
569
570 if (params.byteOrder != BYTEORDER_NATIVE) {
571 char* cval = reinterpret_cast<char*>(&val);
572 // this will alter val!!
573 if (sizeof(ValueType)>4) {
574 byte_swap64(cval);
575 } else {
576 byte_swap32(cval);
577 }
578 }
579 if (!bm::isnan(val)) {
580 for (int q=0; q<dpp; q++) {
581 *dest++ = static_cast<double>(val);
582 }
583 }
584 }
585 }
586 }
587 }
588 }
589 }
590 }
591
592 f.close();
593 }
594
595 #ifdef ESYS_HAVE_BOOST_IO
596 template<typename ValueType>
597 void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
598 const ReaderParameters& params) const
599 {
600 // check destination function space
601 dim_t myN0, myN1, myN2;
602 if (out.getFunctionSpace().getTypeCode() == Nodes) {
603 myN0 = m_NN[0];
604 myN1 = m_NN[1];
605 myN2 = m_NN[2];
606 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
607 out.getFunctionSpace().getTypeCode() == ReducedElements) {
608 myN0 = m_NE[0];
609 myN1 = m_NE[1];
610 myN2 = m_NE[2];
611 } else
612 throw ValueError("readBinaryGridFromZipped(): invalid function space for output data object");
613
614 if (params.first.size() != 3)
615 throw ValueError("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
616
617 if (params.numValues.size() != 3)
618 throw ValueError("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
619
620 if (params.multiplier.size() != 3)
621 throw ValueError("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
622 for (size_t i=0; i<params.multiplier.size(); i++)
623 if (params.multiplier[i]<1)
624 throw ValueError("readBinaryGridFromZipped(): all multipliers must be positive");
625
626 // check file existence and size
627 std::ifstream f(filename.c_str(), std::ifstream::binary);
628 if (f.fail()) {
629 throw RipleyException("readBinaryGridFromZipped(): cannot open file");
630 }
631 f.seekg(0, std::ios::end);
632 const int numComp = out.getDataPointSize();
633 dim_t filesize = f.tellg();
634 f.seekg(0, ios::beg);
635 vector<char> compressed(filesize);
636 f.read((char*)&compressed[0], filesize);
637 f.close();
638 vector<char> decompressed = unzip(compressed);
639 filesize = decompressed.size();
640 const dim_t reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
641 if (filesize < reqsize) {
642 throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
643 }
644
645 // check if this rank contributes anything
646 if (params.first[0] >= m_offset[0]+myN0 ||
647 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
648 params.first[1] >= m_offset[1]+myN1 ||
649 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
650 params.first[2] >= m_offset[2]+myN2 ||
651 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
652 return;
653 }
654
655 // now determine how much this rank has to write
656
657 // first coordinates in data object to write to
658 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
659 const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
660 const dim_t first2 = max(dim_t(0), params.first[2]-m_offset[2]);
661 // indices to first value in file
662 const dim_t idx0 = max(dim_t(0), m_offset[0]-params.first[0]);
663 const dim_t idx1 = max(dim_t(0), m_offset[1]-params.first[1]);
664 const dim_t idx2 = max(dim_t(0), m_offset[2]-params.first[2]);
665 // number of values to read
666 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
667 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
668 const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
669
670 out.requireWrite();
671 vector<ValueType> values(num0*numComp);
672 const int dpp = out.getNumDataPointsPerSample();
673
674 for (dim_t z=0; z<num2; z++) {
675 for (dim_t y=0; y<num1; y++) {
676 const dim_t fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
677 +(idx2+z)*params.numValues[0]*params.numValues[1]);
678 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
679
680 for (dim_t x=0; x<num0; x++) {
681 const dim_t baseIndex = first0+x*params.multiplier[0]
682 +(first1+y*params.multiplier[1])*myN0
683 +(first2+z*params.multiplier[2])*myN0*myN1;
684 for (dim_t m2=0; m2<params.multiplier[2]; m2++) {
685 for (dim_t m1=0; m1<params.multiplier[1]; m1++) {
686 for (dim_t m0=0; m0<params.multiplier[0]; m0++) {
687 const dim_t dataIndex = baseIndex+m0
688 +m1*myN0
689 +m2*myN0*myN1;
690 double* dest = out.getSampleDataRW(dataIndex);
691 for (int c=0; c<numComp; c++) {
692 ValueType val = values[x*numComp+c];
693
694 if (params.byteOrder != BYTEORDER_NATIVE) {
695 char* cval = reinterpret_cast<char*>(&val);
696 // this will alter val!!
697 byte_swap32(cval);
698 }
699 if (!bm::isnan(val)) {
700 for (int q=0; q<dpp; q++) {
701 *dest++ = static_cast<double>(val);
702 }
703 }
704 }
705 }
706 }
707 }
708 }
709 }
710 }
711 }
712 #endif
713
714 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
715 int byteOrder, int dataType) const
716 {
717 // the mapping is not universally correct but should work on our
718 // supported platforms
719 switch (dataType) {
720 case DATATYPE_INT32:
721 writeBinaryGridImpl<int>(in, filename, byteOrder);
722 break;
723 case DATATYPE_FLOAT32:
724 writeBinaryGridImpl<float>(in, filename, byteOrder);
725 break;
726 case DATATYPE_FLOAT64:
727 writeBinaryGridImpl<double>(in, filename, byteOrder);
728 break;
729 default:
730 throw ValueError("writeBinaryGrid(): invalid or unsupported datatype");
731 }
732 }
733
734 template<typename ValueType>
735 void Brick::writeBinaryGridImpl(const escript::Data& in,
736 const string& filename, int byteOrder) const
737 {
738 // check function space and determine number of points
739 dim_t myN0, myN1, myN2;
740 dim_t totalN0, totalN1, totalN2;
741 dim_t offset0, offset1, offset2;
742 if (in.getFunctionSpace().getTypeCode() == Nodes) {
743 myN0 = m_NN[0];
744 myN1 = m_NN[1];
745 myN2 = m_NN[2];
746 totalN0 = m_gNE[0]+1;
747 totalN1 = m_gNE[1]+1;
748 totalN2 = m_gNE[2]+1;
749 offset0 = m_offset[0];
750 offset1 = m_offset[1];
751 offset2 = m_offset[2];
752 } else if (in.getFunctionSpace().getTypeCode() == DegreesOfFreedom ||
753 in.getFunctionSpace().getTypeCode() == ReducedDegreesOfFreedom) {
754 myN0 = (m_gNE[0]+1)/m_NX[0];
755 myN1 = (m_gNE[1]+1)/m_NX[1];
756 myN2 = (m_gNE[2]+1)/m_NX[2];
757 totalN0 = m_gNE[0]+1;
758 totalN1 = m_gNE[1]+1;
759 totalN2 = m_gNE[2]+1;
760 offset0 = (m_offset[0]>0 ? m_offset[0]+1 : 0);
761 offset1 = (m_offset[1]>0 ? m_offset[1]+1 : 0);
762 offset2 = (m_offset[2]>0 ? m_offset[2]+1 : 0);
763 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
764 in.getFunctionSpace().getTypeCode() == ReducedElements) {
765 myN0 = m_NE[0];
766 myN1 = m_NE[1];
767 myN2 = m_NE[2];
768 totalN0 = m_gNE[0];
769 totalN1 = m_gNE[1];
770 totalN2 = m_gNE[2];
771 offset0 = m_offset[0];
772 offset1 = m_offset[1];
773 offset2 = m_offset[2];
774 } else
775 throw RipleyException("writeBinaryGrid(): unsupported function space");
776
777 const int numComp = in.getDataPointSize();
778 const int dpp = in.getNumDataPointsPerSample();
779 const dim_t fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
780
781 if (numComp > 1 || dpp > 1)
782 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
783
784 // from here on we know that each sample consists of one value
785 FileWriter fw(m_mpiInfo->comm);
786 fw.openFile(filename, fileSize, true);
787 MPIBarrier();
788
789 for (index_t z=0; z<myN2; z++) {
790 for (index_t y=0; y<myN1; y++) {
791 const dim_t fileofs = (offset0+(offset1+y)*totalN0
792 +(offset2+z)*totalN0*totalN1)*sizeof(ValueType);
793 std::ostringstream oss;
794
795 for (index_t x=0; x<myN0; x++) {
796 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
797 ValueType fvalue = static_cast<ValueType>(*sample);
798 if (byteOrder == BYTEORDER_NATIVE) {
799 oss.write((char*)&fvalue, sizeof(fvalue));
800 } else {
801 char* value = reinterpret_cast<char*>(&fvalue);
802 if (sizeof(fvalue)>4) {
803 byte_swap64(value);
804 } else {
805 byte_swap32(value);
806 }
807 oss.write(value, sizeof(fvalue));
808 }
809 }
810 fw.writeAt(oss, fileofs);
811 }
812 }
813 fw.close();
814 }
815
816 void Brick::write(const std::string& filename) const
817 {
818 throw RipleyException("write: not supported");
819 }
820
821 void Brick::dump(const string& fileName) const
822 {
823 #if USE_SILO
824 string fn(fileName);
825 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
826 fn+=".silo";
827 }
828
829 int driver=DB_HDF5;
830 string siloPath;
831 DBfile* dbfile = NULL;
832
833 #ifdef ESYS_MPI
834 PMPIO_baton_t* baton = NULL;
835 const int NUM_SILO_FILES = 1;
836 const char* blockDirFmt = "/block%04d";
837 #endif
838
839 if (m_mpiInfo->size > 1) {
840 #ifdef ESYS_MPI
841 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
842 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
843 PMPIO_DefaultClose, (void*)&driver);
844 // try the fallback driver in case of error
845 if (!baton && driver != DB_PDB) {
846 driver = DB_PDB;
847 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
848 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
849 PMPIO_DefaultClose, (void*)&driver);
850 }
851 if (baton) {
852 char str[64];
853 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
854 siloPath = str;
855 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
856 }
857 #endif
858 } else {
859 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
860 getDescription().c_str(), driver);
861 // try the fallback driver in case of error
862 if (!dbfile && driver != DB_PDB) {
863 driver = DB_PDB;
864 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
865 getDescription().c_str(), driver);
866 }
867 }
868
869 if (!dbfile)
870 throw RipleyException("dump: Could not create Silo file");
871
872 /*
873 if (driver==DB_HDF5) {
874 // gzip level 1 already provides good compression with minimal
875 // performance penalty. Some tests showed that gzip levels >3 performed
876 // rather badly on escript data both in terms of time and space
877 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
878 }
879 */
880
881 boost::scoped_ptr<double> x(new double[m_NN[0]]);
882 boost::scoped_ptr<double> y(new double[m_NN[1]]);
883 boost::scoped_ptr<double> z(new double[m_NN[2]]);
884 double* coords[3] = { x.get(), y.get(), z.get() };
885 const dim_t NN0 = m_NN[0];
886 const dim_t NN1 = m_NN[1];
887 const dim_t NN2 = m_NN[2];
888
889 #pragma omp parallel
890 {
891 #pragma omp for
892 for (dim_t i0 = 0; i0 < NN0; i0++) {
893 coords[0][i0]=getLocalCoordinate(i0, 0);
894 }
895 #pragma omp for
896 for (dim_t i1 = 0; i1 < NN1; i1++) {
897 coords[1][i1]=getLocalCoordinate(i1, 1);
898 }
899 #pragma omp for
900 for (dim_t i2 = 0; i2 < NN2; i2++) {
901 coords[2][i2]=getLocalCoordinate(i2, 2);
902 }
903 }
904 // converting to int!
905 vector<int> dims(m_NN, m_NN+3);
906
907 // write mesh
908 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
909 DB_COLLINEAR, NULL);
910
911 // write node ids
912 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
913 NULL, 0, DB_INT, DB_NODECENT, NULL);
914
915 // write element ids
916 dims.assign(m_NE, m_NE+3);
917 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
918 &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
919
920 // rank 0 writes multimesh and multivar
921 if (m_mpiInfo->rank == 0) {
922 vector<string> tempstrings;
923 vector<char*> names;
924 for (dim_t i=0; i<m_mpiInfo->size; i++) {
925 std::stringstream path;
926 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/mesh";
927 tempstrings.push_back(path.str());
928 names.push_back((char*)tempstrings.back().c_str());
929 }
930 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
931 DBSetDir(dbfile, "/");
932 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
933 &types[0], NULL);
934 tempstrings.clear();
935 names.clear();
936 for (dim_t i=0; i<m_mpiInfo->size; i++) {
937 std::stringstream path;
938 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/nodeId";
939 tempstrings.push_back(path.str());
940 names.push_back((char*)tempstrings.back().c_str());
941 }
942 types.assign(m_mpiInfo->size, DB_QUADVAR);
943 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
944 &types[0], NULL);
945 tempstrings.clear();
946 names.clear();
947 for (dim_t i=0; i<m_mpiInfo->size; i++) {
948 std::stringstream path;
949 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/elementId";
950 tempstrings.push_back(path.str());
951 names.push_back((char*)tempstrings.back().c_str());
952 }
953 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
954 &types[0], NULL);
955 }
956
957 if (m_mpiInfo->size > 1) {
958 #ifdef ESYS_MPI
959 PMPIO_HandOffBaton(baton, dbfile);
960 PMPIO_Finish(baton);
961 #endif
962 } else {
963 DBClose(dbfile);
964 }
965
966 #else // USE_SILO
967 throw RipleyException("dump: no Silo support");
968 #endif
969 }
970
971 const dim_t* Brick::borrowSampleReferenceIDs(int fsType) const
972 {
973 switch (fsType) {
974 case Nodes:
975 case ReducedNodes: //FIXME: reduced
976 return &m_nodeId[0];
977 case DegreesOfFreedom:
978 case ReducedDegreesOfFreedom: //FIXME: reduced
979 return &m_dofId[0];
980 case Elements:
981 case ReducedElements:
982 return &m_elementId[0];
983 case FaceElements:
984 case ReducedFaceElements:
985 return &m_faceId[0];
986 case Points:
987 return &m_diracPointNodeIDs[0];
988 default:
989 break;
990 }
991
992 std::stringstream msg;
993 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
994 throw ValueError(msg.str());
995 }
996
997 bool Brick::ownSample(int fsType, index_t id) const
998 {
999 if (getMPISize()==1)
1000 return true;
1001
1002 switch (fsType) {
1003 case Nodes:
1004 case ReducedNodes: //FIXME: reduced
1005 return (m_dofMap[id] < getNumDOF());
1006 case DegreesOfFreedom:
1007 case ReducedDegreesOfFreedom:
1008 return true;
1009 case Elements:
1010 case ReducedElements:
1011 {
1012 // check ownership of element's _last_ node
1013 const index_t x=id%m_NE[0] + 1;
1014 const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
1015 const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
1016 return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
1017 }
1018 case FaceElements:
1019 case ReducedFaceElements:
1020 {
1021 // check ownership of face element's last node
1022 dim_t n=0;
1023 for (size_t i=0; i<6; i++) {
1024 n+=m_faceCount[i];
1025 if (id<n) {
1026 const index_t j=id-n+m_faceCount[i];
1027 if (i>=4) { // front or back
1028 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
1029 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
1030 } else if (i>=2) { // bottom or top
1031 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
1032 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
1033 } else { // left or right
1034 const index_t first=(i==0 ? 0 : m_NN[0]-1);
1035 return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
1036 }
1037 }
1038 }
1039 return false;
1040 }
1041 default:
1042 break;
1043 }
1044
1045 std::stringstream msg;
1046 msg << "ownSample: invalid function space type " << fsType;
1047 throw ValueError(msg.str());
1048 }
1049
1050 RankVector Brick::getOwnerVector(int fsType) const
1051 {
1052 RankVector owner;
1053 const int rank = m_mpiInfo->rank;
1054
1055 if (fsType == Elements || fsType == ReducedElements) {
1056 owner.assign(getNumElements(), rank);
1057 // shared plane in Y-Z
1058 if (m_faceCount[0]==0) {
1059 for (dim_t k2=0; k2<m_NE[2]; k2++) {
1060 for (dim_t k1=0; k1<m_NE[1]; k1++) {
1061 const dim_t e=k2*m_NE[0]*m_NE[1]+k1*m_NE[0];
1062 owner[e] = rank-1;
1063 }
1064 }
1065 }
1066 // shared plane in X-Z
1067 if (m_faceCount[2]==0) {
1068 for (dim_t k2=0; k2<m_NE[2]; k2++) {
1069 for (dim_t k0=0; k0<m_NE[0]; k0++) {
1070 const dim_t e=k2*m_NE[0]*m_NE[1]+k0;
1071 owner[e] = rank-m_NX[0];
1072 }
1073 }
1074 }
1075 // shared plane in X-Y
1076 if (m_faceCount[4]==0) {
1077 for (dim_t k1=0; k1<m_NE[1]; k1++) {
1078 for (dim_t k0=0; k0<m_NE[0]; k0++) {
1079 const dim_t e=k1*m_NE[0]+k0;
1080 owner[e] = rank-m_NX[0]*m_NX[1];
1081 }
1082 }
1083 }
1084
1085 } else if (fsType == FaceElements || fsType == ReducedFaceElements) {
1086 owner.assign(getNumFaceElements(), rank);
1087 index_t offset=0;
1088 if (m_faceCount[0] > 0) {
1089 if (m_faceCount[2]==0) {
1090 for (dim_t k2=0; k2<m_NE[2]; k2++)
1091 owner[k2*m_NE[1]] = rank-m_NX[0];
1092 }
1093 if (m_faceCount[4]==0) {
1094 for (dim_t k1=0; k1<m_NE[1]; k1++)
1095 owner[k1] = rank-m_NX[0]*m_NX[1];
1096 }
1097 offset += m_faceCount[0];
1098 }
1099 if (m_faceCount[1] > 0) {
1100 if (m_faceCount[2]==0) {
1101 for (dim_t k2=0; k2<m_NE[2]; k2++)
1102 owner[offset+k2*m_NE[1]] = rank-m_NX[0];
1103 }
1104 if (m_faceCount[4]==0) {
1105 for (dim_t k1=0; k1<m_NE[1]; k1++)
1106 owner[offset+k1] = rank-m_NX[0]*m_NX[1];
1107 }
1108 offset += m_faceCount[1];
1109 }
1110 if (m_faceCount[2] > 0) {
1111 if (m_faceCount[0]==0) {
1112 for (dim_t k2=0; k2<m_NE[2]; k2++)
1113 owner[offset+k2*m_NE[0]] = rank-1;
1114 }
1115 if (m_faceCount[4]==0) {
1116 for (dim_t k0=0; k0<m_NE[0]; k0++)
1117 owner[offset+k0] = rank-m_NX[0]*m_NX[1];
1118 }
1119 offset += m_faceCount[2];
1120 }
1121 if (m_faceCount[3] > 0) {
1122 if (m_faceCount[0]==0) {
1123 for (dim_t k2=0; k2<m_NE[2]; k2++)
1124 owner[offset+k2*m_NE[0]] = rank-1;
1125 }
1126 if (m_faceCount[4]==0) {
1127 for (dim_t k0=0; k0<m_NE[0]; k0++)
1128 owner[offset+k0] = rank-m_NX[0]*m_NX[1];
1129 }
1130 offset += m_faceCount[3];
1131 }
1132 if (m_faceCount[4] > 0) {
1133 if (m_faceCount[0]==0) {
1134 for (dim_t k1=0; k1<m_NE[1]; k1++)
1135 owner[offset+k1*m_NE[0]] = rank-1;
1136 }
1137 if (m_faceCount[2]==0) {
1138 for (dim_t k0=0; k0<m_NE[0]; k0++)
1139 owner[offset+k0] = rank-m_NX[0];
1140 }
1141 offset += m_faceCount[4];
1142 }
1143 if (m_faceCount[5] > 0) {
1144 if (m_faceCount[0]==0) {
1145 for (dim_t k1=0; k1<m_NE[1]; k1++)
1146 owner[offset+k1*m_NE[0]] = rank-1;
1147 }
1148 if (m_faceCount[2]==0) {
1149 for (dim_t k0=0; k0<m_NE[0]; k0++)
1150 owner[offset+k0] = rank-m_NX[0];
1151 }
1152 }
1153 } else {
1154 throw ValueError("getOwnerVector: only valid for element types");
1155 }
1156
1157 return owner;
1158 }
1159
1160 void Brick::setToNormal(escript::Data& out) const
1161 {
1162 const dim_t NE0 = m_NE[0];
1163 const dim_t NE1 = m_NE[1];
1164 const dim_t NE2 = m_NE[2];
1165
1166 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1167 out.requireWrite();
1168 #pragma omp parallel
1169 {
1170 if (m_faceOffset[0] > -1) {
1171 #pragma omp for nowait
1172 for (index_t k2 = 0; k2 < NE2; ++k2) {
1173 for (index_t k1 = 0; k1 < NE1; ++k1) {
1174 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1175 // set vector at four quadrature points
1176 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1177 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1178 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1179 *o++ = -1.; *o++ = 0.; *o = 0.;
1180 }
1181 }
1182 }
1183
1184 if (m_faceOffset[1] > -1) {
1185 #pragma omp for nowait
1186 for (index_t k2 = 0; k2 < NE2; ++k2) {
1187 for (index_t k1 = 0; k1 < NE1; ++k1) {
1188 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1189 // set vector at four quadrature points
1190 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1191 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1192 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1193 *o++ = 1.; *o++ = 0.; *o = 0.;
1194 }
1195 }
1196 }
1197
1198 if (m_faceOffset[2] > -1) {
1199 #pragma omp for nowait
1200 for (index_t k2 = 0; k2 < NE2; ++k2) {
1201 for (index_t k0 = 0; k0 < NE0; ++k0) {
1202 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1203 // set vector at four quadrature points
1204 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1205 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1206 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1207 *o++ = 0.; *o++ = -1.; *o = 0.;
1208 }
1209 }
1210 }
1211
1212 if (m_faceOffset[3] > -1) {
1213 #pragma omp for nowait
1214 for (index_t k2 = 0; k2 < NE2; ++k2) {
1215 for (index_t k0 = 0; k0 < NE0; ++k0) {
1216 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1217 // set vector at four quadrature points
1218 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1219 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1220 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1221 *o++ = 0.; *o++ = 1.; *o = 0.;
1222 }
1223 }
1224 }
1225
1226 if (m_faceOffset[4] > -1) {
1227 #pragma omp for nowait
1228 for (index_t k1 = 0; k1 < NE1; ++k1) {
1229 for (index_t k0 = 0; k0 < NE0; ++k0) {
1230 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1231 // set vector at four quadrature points
1232 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1233 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1234 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1235 *o++ = 0.; *o++ = 0.; *o = -1.;
1236 }
1237 }
1238 }
1239
1240 if (m_faceOffset[5] > -1) {
1241 #pragma omp for nowait
1242 for (index_t k1 = 0; k1 < NE1; ++k1) {
1243 for (index_t k0 = 0; k0 < NE0; ++k0) {
1244 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1245 // set vector at four quadrature points
1246 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1247 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1248 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1249 *o++ = 0.; *o++ = 0.; *o = 1.;
1250 }
1251 }
1252 }
1253 } // end of parallel section
1254 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1255 out.requireWrite();
1256 #pragma omp parallel
1257 {
1258 if (m_faceOffset[0] > -1) {
1259 #pragma omp for nowait
1260 for (index_t k2 = 0; k2 < NE2; ++k2) {
1261 for (index_t k1 = 0; k1 < NE1; ++k1) {
1262 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1263 *o++ = -1.;
1264 *o++ = 0.;
1265 *o = 0.;
1266 }
1267 }
1268 }
1269
1270 if (m_faceOffset[1] > -1) {
1271 #pragma omp for nowait
1272 for (index_t k2 = 0; k2 < NE2; ++k2) {
1273 for (index_t k1 = 0; k1 < NE1; ++k1) {
1274 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1275 *o++ = 1.;
1276 *o++ = 0.;
1277 *o = 0.;
1278 }
1279 }
1280 }
1281
1282 if (m_faceOffset[2] > -1) {
1283 #pragma omp for nowait
1284 for (index_t k2 = 0; k2 < NE2; ++k2) {
1285 for (index_t k0 = 0; k0 < NE0; ++k0) {
1286 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1287 *o++ = 0.;
1288 *o++ = -1.;
1289 *o = 0.;
1290 }
1291 }
1292 }
1293
1294 if (m_faceOffset[3] > -1) {
1295 #pragma omp for nowait
1296 for (index_t k2 = 0; k2 < NE2; ++k2) {
1297 for (index_t k0 = 0; k0 < NE0; ++k0) {
1298 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1299 *o++ = 0.;
1300 *o++ = 1.;
1301 *o = 0.;
1302 }
1303 }
1304 }
1305
1306 if (m_faceOffset[4] > -1) {
1307 #pragma omp for nowait
1308 for (index_t k1 = 0; k1 < NE1; ++k1) {
1309 for (index_t k0 = 0; k0 < NE0; ++k0) {
1310 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1311 *o++ = 0.;
1312 *o++ = 0.;
1313 *o = -1.;
1314 }
1315 }
1316 }
1317
1318 if (m_faceOffset[5] > -1) {
1319 #pragma omp for nowait
1320 for (index_t k1 = 0; k1 < NE1; ++k1) {
1321 for (index_t k0 = 0; k0 < NE0; ++k0) {
1322 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1323 *o++ = 0.;
1324 *o++ = 0.;
1325 *o = 1.;
1326 }
1327 }
1328 }
1329 } // end of parallel section
1330
1331 } else {
1332 std::stringstream msg;
1333 msg << "setToNormal: invalid function space type "
1334 << out.getFunctionSpace().getTypeCode();
1335 throw ValueError(msg.str());
1336 }
1337 }
1338
1339 void Brick::setToSize(escript::Data& out) const
1340 {
1341 if (out.getFunctionSpace().getTypeCode() == Elements
1342 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1343 out.requireWrite();
1344 const dim_t numQuad=out.getNumDataPointsPerSample();
1345 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1346 const dim_t NE = getNumElements();
1347 #pragma omp parallel for
1348 for (index_t k = 0; k < NE; ++k) {
1349 double* o = out.getSampleDataRW(k);
1350 fill(o, o+numQuad, size);
1351 }
1352 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1353 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1354 out.requireWrite();
1355 const dim_t numQuad=out.getNumDataPointsPerSample();
1356 const dim_t NE0 = m_NE[0];
1357 const dim_t NE1 = m_NE[1];
1358 const dim_t NE2 = m_NE[2];
1359 #pragma omp parallel
1360 {
1361 if (m_faceOffset[0] > -1) {
1362 const double size=min(m_dx[1],m_dx[2]);
1363 #pragma omp for nowait
1364 for (index_t k2 = 0; k2 < NE2; ++k2) {
1365 for (index_t k1 = 0; k1 < NE1; ++k1) {
1366 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1367 fill(o, o+numQuad, size);
1368 }
1369 }
1370 }
1371
1372 if (m_faceOffset[1] > -1) {
1373 const double size=min(m_dx[1],m_dx[2]);
1374 #pragma omp for nowait
1375 for (index_t k2 = 0; k2 < NE2; ++k2) {
1376 for (index_t k1 = 0; k1 < NE1; ++k1) {
1377 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1378 fill(o, o+numQuad, size);
1379 }
1380 }
1381 }
1382
1383 if (m_faceOffset[2] > -1) {
1384 const double size=min(m_dx[0],m_dx[2]);
1385 #pragma omp for nowait
1386 for (index_t k2 = 0; k2 < NE2; ++k2) {
1387 for (index_t k0 = 0; k0 < NE0; ++k0) {
1388 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1389 fill(o, o+numQuad, size);
1390 }
1391 }
1392 }
1393
1394 if (m_faceOffset[3] > -1) {
1395 const double size=min(m_dx[0],m_dx[2]);
1396 #pragma omp for nowait
1397 for (index_t k2 = 0; k2 < NE2; ++k2) {
1398 for (index_t k0 = 0; k0 < NE0; ++k0) {
1399 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1400 fill(o, o+numQuad, size);
1401 }
1402 }
1403 }
1404
1405 if (m_faceOffset[4] > -1) {
1406 const double size=min(m_dx[0],m_dx[1]);
1407 #pragma omp for nowait
1408 for (index_t k1 = 0; k1 < NE1; ++k1) {
1409 for (index_t k0 = 0; k0 < NE0; ++k0) {
1410 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1411 fill(o, o+numQuad, size);
1412 }
1413 }
1414 }
1415
1416 if (m_faceOffset[5] > -1) {
1417 const double size=min(m_dx[0],m_dx[1]);
1418 #pragma omp for nowait
1419 for (index_t k1 = 0; k1 < NE1; ++k1) {
1420 for (index_t k0 = 0; k0 < NE0; ++k0) {
1421 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1422 fill(o, o+numQuad, size);
1423 }
1424 }
1425 }
1426 } // end of parallel section
1427
1428 } else {
1429 std::stringstream msg;
1430 msg << "setToSize: invalid function space type "
1431 << out.getFunctionSpace().getTypeCode();
1432 throw ValueError(msg.str());
1433 }
1434 }
1435
1436 void Brick::Print_Mesh_Info(const bool full) const
1437 {
1438 RipleyDomain::Print_Mesh_Info(full);
1439 if (full) {
1440 std::cout << " Id Coordinates" << std::endl;
1441 std::cout.precision(15);
1442 std::cout.setf(ios::scientific, std::ios::floatfield);
1443 for (index_t i=0; i < getNumNodes(); i++) {
1444 std::cout << " " << std::setw(5) << m_nodeId[i]
1445 << " " << getLocalCoordinate(i%m_NN[0], 0)
1446 << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1447 << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << std::endl;
1448 }
1449 }
1450 }
1451
1452
1453 //protected
1454 void Brick::assembleCoordinates(escript::Data& arg) const
1455 {
1456 int numDim = m_numDim;
1457 if (!arg.isDataPointShapeEqual(1, &numDim))
1458 throw ValueError("setToX: Invalid Data object shape");
1459 if (!arg.numSamplesEqual(1, getNumNodes()))
1460 throw ValueError("setToX: Illegal number of samples in Data object");
1461
1462 const dim_t NN0 = m_NN[0];
1463 const dim_t NN1 = m_NN[1];
1464 const dim_t NN2 = m_NN[2];
1465 arg.requireWrite();
1466 #pragma omp parallel for
1467 for (dim_t i2 = 0; i2 < NN2; i2++) {
1468 for (dim_t i1 = 0; i1 < NN1; i1++) {
1469 for (dim_t i0 = 0; i0 < NN0; i0++) {
1470 double* point = arg.getSampleDataRW(i0+NN0*i1+NN0*NN1*i2);
1471 point[0] = getLocalCoordinate(i0, 0);
1472 point[1] = getLocalCoordinate(i1, 1);
1473 point[2] = getLocalCoordinate(i2, 2);
1474 }
1475 }
1476 }
1477 }
1478
1479 //protected
1480 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1481 {
1482 const dim_t numComp = in.getDataPointSize();
1483 const double C0 = .044658198738520451079;
1484 const double C1 = .16666666666666666667;
1485 const double C2 = .21132486540518711775;
1486 const double C3 = .25;
1487 const double C4 = .5;
1488 const double C5 = .62200846792814621559;
1489 const double C6 = .78867513459481288225;
1490 const dim_t NE0 = m_NE[0];
1491 const dim_t NE1 = m_NE[1];
1492 const dim_t NE2 = m_NE[2];
1493
1494 if (out.getFunctionSpace().getTypeCode() == Elements) {
1495 out.requireWrite();
1496 #pragma omp parallel
1497 {
1498 vector<double> f_000(numComp);
1499 vector<double> f_001(numComp);
1500 vector<double> f_010(numComp);
1501 vector<double> f_011(numComp);
1502 vector<double> f_100(numComp);
1503 vector<double> f_101(numComp);
1504 vector<double> f_110(numComp);
1505 vector<double> f_111(numComp);
1506 #pragma omp for
1507 for (index_t k2=0; k2 < NE2; ++k2) {
1508 for (index_t k1=0; k1 < NE1; ++k1) {
1509 for (index_t k0=0; k0 < NE0; ++k0) {
1510 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1511 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1512 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1513 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1514 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1515 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1516 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1517 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1518 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,NE0,NE1));
1519 for (index_t i=0; i < numComp; ++i) {
1520 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1521 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1522 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1523 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1524 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1525 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1526 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1527 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1528 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1529 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1530 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1531 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1532 o[INDEX3(i,0,0,numComp,3)] = V0;
1533 o[INDEX3(i,1,0,numComp,3)] = V4;
1534 o[INDEX3(i,2,0,numComp,3)] = V8;
1535 o[INDEX3(i,0,1,numComp,3)] = V0;
1536 o[INDEX3(i,1,1,numComp,3)] = V5;
1537 o[INDEX3(i,2,1,numComp,3)] = V9;
1538 o[INDEX3(i,0,2,numComp,3)] = V1;
1539 o[INDEX3(i,1,2,numComp,3)] = V4;
1540 o[INDEX3(i,2,2,numComp,3)] = V10;
1541 o[INDEX3(i,0,3,numComp,3)] = V1;
1542 o[INDEX3(i,1,3,numComp,3)] = V5;
1543 o[INDEX3(i,2,3,numComp,3)] = V11;
1544 o[INDEX3(i,0,4,numComp,3)] = V2;
1545 o[INDEX3(i,1,4,numComp,3)] = V6;
1546 o[INDEX3(i,2,4,numComp,3)] = V8;
1547 o[INDEX3(i,0,5,numComp,3)] = V2;
1548 o[INDEX3(i,1,5,numComp,3)] = V7;
1549 o[INDEX3(i,2,5,numComp,3)] = V9;
1550 o[INDEX3(i,0,6,numComp,3)] = V3;
1551 o[INDEX3(i,1,6,numComp,3)] = V6;
1552 o[INDEX3(i,2,6,numComp,3)] = V10;
1553 o[INDEX3(i,0,7,numComp,3)] = V3;
1554 o[INDEX3(i,1,7,numComp,3)] = V7;
1555 o[INDEX3(i,2,7,numComp,3)] = V11;
1556 } // end of component loop i
1557 } // end of k0 loop
1558 } // end of k1 loop
1559 } // end of k2 loop
1560 } // end of parallel section
1561 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1562 out.requireWrite();
1563 #pragma omp parallel
1564 {
1565 vector<double> f_000(numComp);
1566 vector<double> f_001(numComp);
1567 vector<double> f_010(numComp);
1568 vector<double> f_011(numComp);
1569 vector<double> f_100(numComp);
1570 vector<double> f_101(numComp);
1571 vector<double> f_110(numComp);
1572 vector<double> f_111(numComp);
1573 #pragma omp for
1574 for (index_t k2=0; k2 < NE2; ++k2) {
1575 for (index_t k1=0; k1 < NE1; ++k1) {
1576 for (index_t k0=0; k0 < NE0; ++k0) {
1577 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1578 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1579 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1580 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1581 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1582 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1583 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1584 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1585 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,NE0,NE1));
1586 for (index_t i=0; i < numComp; ++i) {
1587 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1588 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1589 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1590 } // end of component loop i
1591 } // end of k0 loop
1592 } // end of k1 loop
1593 } // end of k2 loop
1594 } // end of parallel section
1595 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1596 out.requireWrite();
1597 #pragma omp parallel
1598 {
1599 vector<double> f_000(numComp);
1600 vector<double> f_001(numComp);
1601 vector<double> f_010(numComp);
1602 vector<double> f_011(numComp);
1603 vector<double> f_100(numComp);
1604 vector<double> f_101(numComp);
1605 vector<double> f_110(numComp);
1606 vector<double> f_111(numComp);
1607 if (m_faceOffset[0] > -1) {
1608 #pragma omp for nowait
1609 for (index_t k2=0; k2 < NE2; ++k2) {
1610 for (index_t k1=0; k1 < NE1; ++k1) {
1611 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1612 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1613 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1614 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1615 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1616 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1617 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1618 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1619 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,NE1));
1620 for (index_t i=0; i < numComp; ++i) {
1621 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1622 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1623 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1624 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1625 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1626 o[INDEX3(i,1,0,numComp,3)] = V0;
1627 o[INDEX3(i,2,0,numComp,3)] = V2;
1628 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1629 o[INDEX3(i,1,1,numComp,3)] = V0;
1630 o[INDEX3(i,2,1,numComp,3)] = V3;
1631 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1632 o[INDEX3(i,1,2,numComp,3)] = V1;
1633 o[INDEX3(i,2,2,numComp,3)] = V2;
1634 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1635 o[INDEX3(i,1,3,numComp,3)] = V1;
1636 o[INDEX3(i,2,3,numComp,3)] = V3;
1637 } // end of component loop i
1638 } // end of k1 loop
1639 } // end of k2 loop
1640 } // end of face 0
1641 if (m_faceOffset[1] > -1) {
1642 #pragma omp for nowait
1643 for (index_t k2=0; k2 < NE2; ++k2) {
1644 for (index_t k1=0; k1 < NE1; ++k1) {
1645 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1646 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1647 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1648 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1649 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1650 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1651 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1652 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1653 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,NE1));
1654 for (index_t i=0; i < numComp; ++i) {
1655 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1656 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1657 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1658 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1659 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1660 o[INDEX3(i,1,0,numComp,3)] = V0;
1661 o[INDEX3(i,2,0,numComp,3)] = V2;
1662 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1663 o[INDEX3(i,1,1,numComp,3)] = V0;
1664 o[INDEX3(i,2,1,numComp,3)] = V3;
1665 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1666 o[INDEX3(i,1,2,numComp,3)] = V1;
1667 o[INDEX3(i,2,2,numComp,3)] = V2;
1668 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1669 o[INDEX3(i,1,3,numComp,3)] = V1;
1670 o[INDEX3(i,2,3,numComp,3)] = V3;
1671 } // end of component loop i
1672 } // end of k1 loop
1673 } // end of k2 loop
1674 } // end of face 1
1675 if (m_faceOffset[2] > -1) {
1676 #pragma omp for nowait
1677 for (index_t k2=0; k2 < NE2; ++k2) {
1678 for (index_t k0=0; k0 < NE0; ++k0) {
1679 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1680 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1681 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1682 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1683 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1684 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1685 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1686 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1687 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,NE0));
1688 for (index_t i=0; i < numComp; ++i) {
1689 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1690 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1691 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1692 o[INDEX3(i,0,0,numComp,3)] = V0;
1693 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1694 o[INDEX3(i,2,0,numComp,3)] = V1;
1695 o[INDEX3(i,0,1,numComp,3)] = V0;
1696 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1697 o[INDEX3(i,2,1,numComp,3)] = V2;
1698 o[INDEX3(i,0,2,numComp,3)] = V0;
1699 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1700 o[INDEX3(i,2,2,numComp,3)] = V1;
1701 o[INDEX3(i,0,3,numComp,3)] = V0;
1702 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1703 o[INDEX3(i,2,3,numComp,3)] = V2;
1704 } // end of component loop i
1705 } // end of k0 loop
1706 } // end of k2 loop
1707 } // end of face 2
1708 if (m_faceOffset[3] > -1) {
1709 #pragma omp for nowait
1710 for (index_t k2=0; k2 < NE2; ++k2) {
1711 for (index_t k0=0; k0 < NE0; ++k0) {
1712 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1713 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1714 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1715 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1716 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1717 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1718 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1719 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1720 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,NE0));
1721 for (index_t i=0; i < numComp; ++i) {
1722 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1723 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1724 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1725 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1726 o[INDEX3(i,0,0,numComp,3)] = V0;
1727 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1728 o[INDEX3(i,2,0,numComp,3)] = V2;
1729 o[INDEX3(i,0,1,numComp,3)] = V0;
1730 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1731 o[INDEX3(i,2,1,numComp,3)] = V3;
1732 o[INDEX3(i,0,2,numComp,3)] = V1;
1733 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1734 o[INDEX3(i,2,2,numComp,3)] = V2;
1735 o[INDEX3(i,0,3,numComp,3)] = V1;
1736 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1737 o[INDEX3(i,2,3,numComp,3)] = V3;
1738 } // end of component loop i
1739 } // end of k0 loop
1740 } // end of k2 loop
1741 } // end of face 3
1742 if (m_faceOffset[4] > -1) {
1743 #pragma omp for nowait
1744 for (index_t k1=0; k1 < NE1; ++k1) {
1745 for (index_t k0=0; k0 < NE0; ++k0) {
1746 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1747 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1748 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1749 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1750 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1751 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1752 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1753 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1754 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,NE0));
1755 for (index_t i=0; i < numComp; ++i) {
1756 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1757 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1758 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1759 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1760 o[INDEX3(i,0,0,numComp,3)] = V0;
1761 o[INDEX3(i,1,0,numComp,3)] = V2;
1762 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1763 o[INDEX3(i,0,1,numComp,3)] = V0;
1764 o[INDEX3(i,1,1,numComp,3)] = V3;
1765 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1766 o[INDEX3(i,0,2,numComp,3)] = V1;
1767 o[INDEX3(i,1,2,numComp,3)] = V2;
1768 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1769 o[INDEX3(i,0,3,numComp,3)] = V1;
1770 o[INDEX3(i,1,3,numComp,3)] = V3;
1771 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1772 } // end of component loop i
1773 } // end of k0 loop
1774 } // end of k1 loop
1775 } // end of face 4
1776 if (m_faceOffset[5] > -1) {
1777 #pragma omp for nowait
1778 for (index_t k1=0; k1 < NE1; ++k1) {
1779 for (index_t k0=0; k0 < NE0; ++k0) {
1780 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1781 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1782 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1783 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1784 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1785 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1786 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1787 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1788 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,NE0));
1789 for (index_t i=0; i < numComp; ++i) {
1790 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1791 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1792 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1793 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1794 o[INDEX3(i,0,0,numComp,3)] = V0;
1795 o[INDEX3(i,1,0,numComp,3)] = V2;
1796 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1797 o[INDEX3(i,0,1,numComp,3)] = V0;
1798 o[INDEX3(i,1,1,numComp,3)] = V3;
1799 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1800 o[INDEX3(i,0,2,numComp,3)] = V1;
1801 o[INDEX3(i,1,2,numComp,3)] = V2;
1802 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1803 o[INDEX3(i,0,3,numComp,3)] = V1;
1804 o[INDEX3(i,1,3,numComp,3)] = V3;
1805 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1806 } // end of component loop i
1807 } // end of k0 loop
1808 } // end of k1 loop
1809 } // end of face 5
1810 } // end of parallel section
1811 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1812 out.requireWrite();
1813 #pragma omp parallel
1814 {
1815 vector<double> f_000(numComp);
1816 vector<double> f_001(numComp);
1817 vector<double> f_010(numComp);
1818 vector<double> f_011(numComp);
1819 vector<double> f_100(numComp);
1820 vector<double> f_101(numComp);
1821 vector<double> f_110(numComp);
1822 vector<double> f_111(numComp);
1823 if (m_faceOffset[0] > -1) {
1824 #pragma omp for nowait
1825 for (index_t k2=0; k2 < NE2; ++k2) {
1826 for (index_t k1=0; k1 < NE1; ++k1) {
1827 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1828 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1829 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1830 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1831 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1832 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1833 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1834 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1835 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,NE1));
1836 for (index_t i=0; i < numComp; ++i) {
1837 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1838 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1839 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1840 } // end of component loop i
1841 } // end of k1 loop
1842 } // end of k2 loop
1843 } // end of face 0
1844 if (m_faceOffset[1] > -1) {
1845 #pragma omp for nowait
1846 for (index_t k2=0; k2 < NE2; ++k2) {
1847 for (index_t k1=0; k1 < NE1; ++k1) {
1848 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1849 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1850 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1851 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1852 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1853 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1854 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1855 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1856 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,NE1));
1857 for (index_t i=0; i < numComp; ++i) {
1858 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1859 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1860 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1861 } // end of component loop i
1862 } // end of k1 loop
1863 } // end of k2 loop
1864 } // end of face 1
1865 if (m_faceOffset[2] > -1) {
1866 #pragma omp for nowait
1867 for (index_t k2=0; k2 < NE2; ++k2) {
1868 for (index_t k0=0; k0 < NE0; ++k0) {
1869 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1870 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1871 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1872 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1873 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1874 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1875 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1876 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1877 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,NE0));
1878 for (index_t i=0; i < numComp; ++i) {
1879 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / m_dx[0];
1880 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1881 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / m_dx[2];
1882 } // end of component loop i
1883 } // end of k0 loop
1884 } // end of k2 loop
1885 } // end of face 2
1886 if (m_faceOffset[3] > -1) {
1887 #pragma omp for nowait
1888 for (index_t k2=0; k2 < NE2; ++k2) {
1889 for (index_t k0=0; k0 < NE0; ++k0) {
1890 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1891 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1892 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1893 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1894 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1895 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1896 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1897 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1898 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,NE0));
1899 for (index_t i=0; i < numComp; ++i) {
1900 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / m_dx[0];
1901 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 /