/[escript]/trunk/ripley/src/Brick.cpp
ViewVC logotype

Contents of /trunk/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6651 - (show annotations)
Wed Feb 7 02:12:08 2018 UTC (14 months, 2 weeks ago) by jfenwick
File size: 188494 byte(s)
Make everyone sad by touching all the files

Copyright dates update

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2018 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <ripley/Brick.h>
18 #include <ripley/DefaultAssembler3D.h>
19 #include <ripley/LameAssembler3D.h>
20 #include <ripley/WaveAssembler3D.h>
21 #include <ripley/blocktools.h>
22 #include <ripley/domainhelpers.h>
23
24 #include <escript/FileWriter.h>
25 #include <escript/index.h>
26 #include <escript/Random.h>
27
28 #ifdef ESYS_HAVE_PASO
29 #include <paso/SystemMatrix.h>
30 #endif
31
32 #ifdef ESYS_HAVE_NETCDF
33 #ifdef NETCDF4
34 #include <ncVar.h>
35 #include <ncDim.h>
36 #include <escript/NCHelper.h>
37
38 #else
39 #include <netcdfcpp.h>
40 #endif
41 #endif
42
43
44
45
46 #ifdef ESYS_HAVE_SILO
47 #include <silo.h>
48 #ifdef ESYS_MPI
49 #include <pmpio.h>
50 #endif
51 #endif
52
53 #include <boost/math/special_functions/fpclassify.hpp> // for isnan
54 #include <boost/scoped_array.hpp>
55
56 #include <iomanip>
57 #include <limits>
58
59 namespace bp = boost::python;
60 namespace bm = boost::math;
61 using escript::AbstractSystemMatrix;
62 using escript::FileWriter;
63 using escript::NotImplementedError;
64 using escript::ValueError;
65 using std::vector;
66 using std::string;
67 using std::min;
68 using std::max;
69 using std::ios;
70 using std::fill;
71
72 namespace ripley {
73
74 inline int indexOfMax(dim_t a, dim_t b, dim_t c)
75 {
76 if (a > b) {
77 if (c > a) {
78 return 2;
79 }
80 return 0;
81 } else if (b > c) {
82 return 1;
83 }
84 return 2;
85 }
86
87 Brick::Brick(dim_t n0, dim_t n1, dim_t n2, double x0, double y0, double z0,
88 double x1, double y1, double z1, int d0, int d1, int d2,
89 const vector<double>& points, const vector<int>& tags,
90 const TagMap& tagnamestonums,
91 escript::SubWorld_ptr w) :
92 RipleyDomain(3, w)
93 {
94 if (static_cast<long>(n0 + 1) * static_cast<long>(n1 + 1)
95 * static_cast<long>(n2 + 1) > std::numeric_limits<dim_t>::max())
96 throw RipleyException("The number of elements has overflowed, this "
97 "limit may be raised in future releases.");
98
99 if (n0 <= 0 || n1 <= 0 || n2 <= 0)
100 throw ValueError("Number of elements in each spatial dimension "
101 "must be positive");
102
103 // ignore subdivision parameters for serial run
104 if (m_mpiInfo->size == 1) {
105 d0=1;
106 d1=1;
107 d2=1;
108 }
109 bool warn=false;
110
111 vector<int> factors;
112 int ranks = m_mpiInfo->size;
113 dim_t epr[3] = {n0,n1,n2};
114 int d[3] = {d0,d1,d2};
115 if (d0<=0 || d1<=0 || d2<=0) {
116 for (int i = 0; i < 3; i++) {
117 if (d[i] < 1) {
118 d[i] = 1;
119 continue;
120 }
121 epr[i] = -1; // can no longer be max
122 if (ranks % d[i] != 0) {
123 throw ValueError("Invalid number of spatial subdivisions");
124 }
125 //remove
126 ranks /= d[i];
127 }
128 factorise(factors, ranks);
129 if (factors.size() != 0) {
130 warn = true;
131 }
132 }
133 while (factors.size() > 0) {
134 int i = indexOfMax(epr[0],epr[1],epr[2]);
135 int f = factors.back();
136 factors.pop_back();
137 d[i] *= f;
138 epr[i] /= f;
139 }
140 d0 = d[0]; d1 = d[1]; d2 = d[2];
141
142 // ensure number of subdivisions is valid and nodes can be distributed
143 // among number of ranks
144 if (d0*d1*d2 != m_mpiInfo->size){
145 throw ValueError("Invalid number of spatial subdivisions");
146 }
147 if (warn) {
148 std::cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
149 << d1 << ", d2=" << d2 << "). This may not be optimal!" << std::endl;
150 }
151
152 double l0 = x1-x0;
153 double l1 = y1-y0;
154 double l2 = z1-z0;
155 m_dx[0] = l0/n0;
156 m_dx[1] = l1/n1;
157 m_dx[2] = l2/n2;
158
159 warn = false;
160 if ((n0+1)%d0 > 0) {
161 switch (getDecompositionPolicy()) {
162 case DECOMP_EXPAND:
163 l0 = m_dx[0]*n0; // fall through
164 case DECOMP_ADD_ELEMENTS:
165 n0 = (dim_t)round((float)(n0+1)/d0+0.5)*d0-1; // fall through
166 case DECOMP_STRICT:
167 warn = true;
168 break;
169 }
170 // reset spacing
171 m_dx[0] = l0/n0;
172 }
173 if ((n1+1)%d1 > 0) {
174 switch (getDecompositionPolicy()) {
175 case DECOMP_EXPAND:
176 l1 = m_dx[1]*n1; // fall through
177 case DECOMP_ADD_ELEMENTS:
178 n1 = (dim_t)round((float)(n1+1)/d1+0.5)*d1-1; // fall through
179 case DECOMP_STRICT:
180 warn = true;
181 break;
182 }
183 // reset spacing
184 m_dx[1] = l1/n1;
185 }
186 if ((n2+1)%d2 > 0) {
187 switch (getDecompositionPolicy()) {
188 case DECOMP_EXPAND:
189 l2 = m_dx[2]*n2; // fall through
190 case DECOMP_ADD_ELEMENTS:
191 n2 = (dim_t)round((float)(n2+1)/d2+0.5)*d2-1; // fall through
192 case DECOMP_STRICT:
193 warn = true;
194 break;
195 }
196 // reset spacing
197 m_dx[2] = l2/n2;
198 }
199
200 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
201 throw ValueError("Too few elements for the number of ranks");
202
203 if (warn) {
204 if (getDecompositionPolicy() == DECOMP_STRICT) {
205 throw ValueError("Unable to decompose domain to the number of "
206 "MPI ranks without adding elements and the policy "
207 "is set to STRICT. Use setDecompositionPolicy() "
208 "to allow adding elements.");
209 } else {
210 std::cout << "Warning: Domain setup has been adjusted as follows "
211 "to allow decomposition into " << m_mpiInfo->size
212 << " MPI ranks:" << std::endl
213 << " N0=" << n0 << ", l0=" << l0 << std::endl
214 << " N1=" << n1 << ", l1=" << l1 << std::endl
215 << " N2=" << n2 << ", l2=" << l1 << std::endl;
216 }
217 }
218 m_gNE[0] = n0;
219 m_gNE[1] = n1;
220 m_gNE[2] = n2;
221 m_origin[0] = x0;
222 m_origin[1] = y0;
223 m_origin[2] = z0;
224 m_length[0] = l0;
225 m_length[1] = l1;
226 m_length[2] = l2;
227 m_NX[0] = d0;
228 m_NX[1] = d1;
229 m_NX[2] = d2;
230
231 // local number of elements (including overlap)
232 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
233 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
234 m_NE[0]++;
235 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
236 m_ownNE[0]--;
237
238 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
239 if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
240 m_NE[1]++;
241 else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
242 m_ownNE[1]--;
243
244 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
245 if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
246 m_NE[2]++;
247 else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
248 m_ownNE[2]--;
249
250 // local number of nodes
251 m_NN[0] = m_NE[0]+1;
252 m_NN[1] = m_NE[1]+1;
253 m_NN[2] = m_NE[2]+1;
254
255 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
256 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
257 if (m_offset[0] > 0)
258 m_offset[0]--;
259 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
260 if (m_offset[1] > 0)
261 m_offset[1]--;
262 m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
263 if (m_offset[2] > 0)
264 m_offset[2]--;
265
266 populateSampleIds();
267
268 for (TagMap::const_iterator i = tagnamestonums.begin();
269 i != tagnamestonums.end(); i++) {
270 setTagMap(i->first, i->second);
271 }
272 addPoints(points, tags);
273 }
274
275 Brick::~Brick()
276 {
277 }
278
279 string Brick::getDescription() const
280 {
281 return "ripley::Brick";
282 }
283
284 bool Brick::operator==(const escript::AbstractDomain& other) const
285 {
286 const Brick* o=dynamic_cast<const Brick*>(&other);
287 if (o) {
288 return (RipleyDomain::operator==(other) &&
289 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
290 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
291 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
292 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
293 }
294
295 return false;
296 }
297
298 #ifdef NETCDF4
299
300 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
301 const ReaderParameters& params) const
302 {
303 #ifdef ESYS_HAVE_NETCDF
304 using namespace netCDF;
305 // check destination function space
306 dim_t myN0, myN1, myN2;
307 if (out.getFunctionSpace().getTypeCode() == Nodes) {
308 myN0 = m_NN[0];
309 myN1 = m_NN[1];
310 myN2 = m_NN[2];
311 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
312 out.getFunctionSpace().getTypeCode() == ReducedElements) {
313 myN0 = m_NE[0];
314 myN1 = m_NE[1];
315 myN2 = m_NE[2];
316 } else
317 throw ValueError("readNcGrid(): invalid function space for output data object");
318
319 if (params.first.size() != 3)
320 throw ValueError("readNcGrid(): argument 'first' must have 3 entries");
321
322 if (params.numValues.size() != 3)
323 throw ValueError("readNcGrid(): argument 'numValues' must have 3 entries");
324
325 if (params.multiplier.size() != 3)
326 throw ValueError("readNcGrid(): argument 'multiplier' must have 3 entries");
327 for (size_t i=0; i<params.multiplier.size(); i++)
328 if (params.multiplier[i]<1)
329 throw ValueError("readNcGrid(): all multipliers must be positive");
330
331 // check file existence and size
332
333 NcFile f;
334 if (!escript::openNcFile(f, filename))
335 {
336 throw RipleyException("readNcGrid(): cannot open file");
337 }
338 NcVar var = f.getVar(varname.c_str());
339 if (var.isNull())
340 throw RipleyException("readNcGrid(): invalid variable name");
341
342 // TODO: rank>0 data support
343 const int numComp = out.getDataPointSize();
344 if (numComp > 1)
345 throw RipleyException("readNcGrid(): only scalar data supported");
346
347 const int dims = var.getDimCount();
348 vector<long> edges(dims);
349 std::vector< NcDim > vard=var.getDims();
350 for (size_t i=0;i<vard.size();++i)
351 {
352 edges[i]=vard[i].getSize();
353 }
354
355 // is this a slice of the data object (dims!=3)?
356 // note the expected ordering of edges (as in numpy: z,y,x)
357 if ( (dims==3 && (params.numValues[2] > edges[0] ||
358 params.numValues[1] > edges[1] ||
359 params.numValues[0] > edges[2]))
360 || (dims==2 && params.numValues[2]>1)
361 || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
362 throw RipleyException("readNcGrid(): not enough data in file");
363 }
364
365 // check if this rank contributes anything
366 if (params.first[0] >= m_offset[0]+myN0 ||
367 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
368 params.first[1] >= m_offset[1]+myN1 ||
369 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
370 params.first[2] >= m_offset[2]+myN2 ||
371 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
372 return;
373 }
374
375 // now determine how much this rank has to write
376
377 // first coordinates in data object to write to
378 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
379 const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
380 const dim_t first2 = max(dim_t(0), params.first[2]-m_offset[2]);
381 // indices to first value in file (not accounting for reverse yet)
382 dim_t idx0 = max(dim_t(0), m_offset[0]-params.first[0]);
383 dim_t idx1 = max(dim_t(0), m_offset[1]-params.first[1]);
384 dim_t idx2 = max(dim_t(0), m_offset[2]-params.first[2]);
385 // number of values to read
386 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
387 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
388 const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
389
390 // make sure we read the right block if going backwards through file
391 if (params.reverse[0])
392 idx0 = edges[dims-1]-num0-idx0;
393 if (dims>1 && params.reverse[1])
394 idx1 = edges[dims-2]-num1-idx1;
395 if (dims>2 && params.reverse[2])
396 idx2 = edges[dims-3]-num2-idx2;
397
398
399 vector<double> values(num0*num1*num2);
400 vector<size_t> startindex;
401 vector<size_t> counts;
402 if (dims==3) {
403 //var->set_cur(idx2, idx1, idx0); // from old API
404 startindex.push_back(idx2);
405 startindex.push_back(idx1);
406 startindex.push_back(idx0);
407 counts.push_back(num2);
408 counts.push_back(num1);
409 counts.push_back(num0);
410 var.getVar(startindex, counts, &values[0]);
411 } else if (dims==2) {
412 // var->set_cur(idx1, idx0); // from old API
413 startindex.push_back(idx1);
414 startindex.push_back(idx0);
415 counts.push_back(num1);
416 counts.push_back(num0);
417 var.getVar(startindex, counts, &values[0]);
418 } else {
419 //var->set_cur(idx0);
420 //var->get(&values[0], num0);
421 startindex.push_back(idx0);
422 counts.push_back(num0);
423 var.getVar(startindex, counts, &values[0]);
424 }
425
426 const int dpp = out.getNumDataPointsPerSample();
427 out.requireWrite();
428
429 // helpers for reversing
430 const dim_t x0 = (params.reverse[0] ? num0-1 : 0);
431 const int x_mult = (params.reverse[0] ? -1 : 1);
432 const dim_t y0 = (params.reverse[1] ? num1-1 : 0);
433 const int y_mult = (params.reverse[1] ? -1 : 1);
434 const dim_t z0 = (params.reverse[2] ? num2-1 : 0);
435 const int z_mult = (params.reverse[2] ? -1 : 1);
436
437 for (index_t z=0; z<num2; z++) {
438 for (index_t y=0; y<num1; y++) {
439 #pragma omp parallel for
440 for (index_t x=0; x<num0; x++) {
441 const dim_t baseIndex = first0+x*params.multiplier[0]
442 +(first1+y*params.multiplier[1])*myN0
443 +(first2+z*params.multiplier[2])*myN0*myN1;
444 const dim_t srcIndex=(z0+z_mult*z)*num1*num0
445 +(y0+y_mult*y)*num0
446 +(x0+x_mult*x);
447 if (!bm::isnan(values[srcIndex])) {
448 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
449 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
450 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
451 const dim_t dataIndex = baseIndex+m0
452 +m1*myN0
453 +m2*myN0*myN1;
454 double* dest = out.getSampleDataRW(dataIndex);
455 for (index_t q=0; q<dpp; q++) {
456 *dest++ = values[srcIndex];
457 }
458 }
459 }
460 }
461 }
462 }
463 }
464 }
465 #else
466 throw RipleyException("readNcGrid(): not compiled with netCDF support");
467 #endif
468 }
469
470
471 #else
472
473 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
474 const ReaderParameters& params) const
475 {
476 #ifdef ESYS_HAVE_NETCDF
477 // check destination function space
478 dim_t myN0, myN1, myN2;
479 if (out.getFunctionSpace().getTypeCode() == Nodes) {
480 myN0 = m_NN[0];
481 myN1 = m_NN[1];
482 myN2 = m_NN[2];
483 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
484 out.getFunctionSpace().getTypeCode() == ReducedElements) {
485 myN0 = m_NE[0];
486 myN1 = m_NE[1];
487 myN2 = m_NE[2];
488 } else
489 throw ValueError("readNcGrid(): invalid function space for output data object");
490
491 if (params.first.size() != 3)
492 throw ValueError("readNcGrid(): argument 'first' must have 3 entries");
493
494 if (params.numValues.size() != 3)
495 throw ValueError("readNcGrid(): argument 'numValues' must have 3 entries");
496
497 if (params.multiplier.size() != 3)
498 throw ValueError("readNcGrid(): argument 'multiplier' must have 3 entries");
499 for (size_t i=0; i<params.multiplier.size(); i++)
500 if (params.multiplier[i]<1)
501 throw ValueError("readNcGrid(): all multipliers must be positive");
502
503 // check file existence and size
504 NcFile f(filename.c_str(), NcFile::ReadOnly);
505 if (!f.is_valid())
506 throw RipleyException("readNcGrid(): cannot open file");
507
508 NcVar* var = f.get_var(varname.c_str());
509 if (!var)
510 throw RipleyException("readNcGrid(): invalid variable name");
511
512 // TODO: rank>0 data support
513 const int numComp = out.getDataPointSize();
514 if (numComp > 1)
515 throw RipleyException("readNcGrid(): only scalar data supported");
516
517 const int dims = var->num_dims();
518 boost::scoped_array<long> edges(var->edges());
519
520 // is this a slice of the data object (dims!=3)?
521 // note the expected ordering of edges (as in numpy: z,y,x)
522 if ( (dims==3 && (params.numValues[2] > edges[0] ||
523 params.numValues[1] > edges[1] ||
524 params.numValues[0] > edges[2]))
525 || (dims==2 && params.numValues[2]>1)
526 || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
527 throw RipleyException("readNcGrid(): not enough data in file");
528 }
529
530 // check if this rank contributes anything
531 if (params.first[0] >= m_offset[0]+myN0 ||
532 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
533 params.first[1] >= m_offset[1]+myN1 ||
534 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
535 params.first[2] >= m_offset[2]+myN2 ||
536 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
537 return;
538 }
539
540 // now determine how much this rank has to write
541
542 // first coordinates in data object to write to
543 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
544 const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
545 const dim_t first2 = max(dim_t(0), params.first[2]-m_offset[2]);
546 // indices to first value in file (not accounting for reverse yet)
547 dim_t idx0 = max(dim_t(0), m_offset[0]-params.first[0]);
548 dim_t idx1 = max(dim_t(0), m_offset[1]-params.first[1]);
549 dim_t idx2 = max(dim_t(0), m_offset[2]-params.first[2]);
550 // number of values to read
551 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
552 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
553 const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
554
555 // make sure we read the right block if going backwards through file
556 if (params.reverse[0])
557 idx0 = edges[dims-1]-num0-idx0;
558 if (dims>1 && params.reverse[1])
559 idx1 = edges[dims-2]-num1-idx1;
560 if (dims>2 && params.reverse[2])
561 idx2 = edges[dims-3]-num2-idx2;
562
563
564 vector<double> values(num0*num1*num2);
565 if (dims==3) {
566 var->set_cur(idx2, idx1, idx0);
567 var->get(&values[0], num2, num1, num0);
568 } else if (dims==2) {
569 var->set_cur(idx1, idx0);
570 var->get(&values[0], num1, num0);
571 } else {
572 var->set_cur(idx0);
573 var->get(&values[0], num0);
574 }
575
576 const int dpp = out.getNumDataPointsPerSample();
577 out.requireWrite();
578
579 // helpers for reversing
580 const dim_t x0 = (params.reverse[0] ? num0-1 : 0);
581 const int x_mult = (params.reverse[0] ? -1 : 1);
582 const dim_t y0 = (params.reverse[1] ? num1-1 : 0);
583 const int y_mult = (params.reverse[1] ? -1 : 1);
584 const dim_t z0 = (params.reverse[2] ? num2-1 : 0);
585 const int z_mult = (params.reverse[2] ? -1 : 1);
586
587 for (index_t z=0; z<num2; z++) {
588 for (index_t y=0; y<num1; y++) {
589 #pragma omp parallel for
590 for (index_t x=0; x<num0; x++) {
591 const dim_t baseIndex = first0+x*params.multiplier[0]
592 +(first1+y*params.multiplier[1])*myN0
593 +(first2+z*params.multiplier[2])*myN0*myN1;
594 const dim_t srcIndex=(z0+z_mult*z)*num1*num0
595 +(y0+y_mult*y)*num0
596 +(x0+x_mult*x);
597 if (!bm::isnan(values[srcIndex])) {
598 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
599 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
600 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
601 const dim_t dataIndex = baseIndex+m0
602 +m1*myN0
603 +m2*myN0*myN1;
604 double* dest = out.getSampleDataRW(dataIndex);
605 for (index_t q=0; q<dpp; q++) {
606 *dest++ = values[srcIndex];
607 }
608 }
609 }
610 }
611 }
612 }
613 }
614 }
615 #else
616 throw RipleyException("readNcGrid(): not compiled with netCDF support");
617 #endif
618 }
619
620 #endif
621
622 void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
623 const ReaderParameters& params) const
624 {
625 #ifdef ESYS_HAVE_BOOST_IO
626 // the mapping is not universally correct but should work on our
627 // supported platforms
628 switch (params.dataType) {
629 case DATATYPE_INT32:
630 readBinaryGridZippedImpl<int>(out, filename, params);
631 break;
632 case DATATYPE_FLOAT32:
633 readBinaryGridZippedImpl<float>(out, filename, params);
634 break;
635 case DATATYPE_FLOAT64:
636 readBinaryGridZippedImpl<double>(out, filename, params);
637 break;
638 default:
639 throw ValueError("readBinaryGridZipped(): invalid or unsupported datatype");
640 }
641 #else
642 throw RipleyException("readBinaryGridZipped(): not compiled with zip support");
643 #endif
644 }
645
646 void Brick::readBinaryGrid(escript::Data& out, string filename,
647 const ReaderParameters& params) const
648 {
649 // the mapping is not universally correct but should work on our
650 // supported platforms
651 switch (params.dataType) {
652 case DATATYPE_INT32:
653 readBinaryGridImpl<int>(out, filename, params);
654 break;
655 case DATATYPE_FLOAT32:
656 readBinaryGridImpl<float>(out, filename, params);
657 break;
658 case DATATYPE_FLOAT64:
659 readBinaryGridImpl<double>(out, filename, params);
660 break;
661 default:
662 throw ValueError("readBinaryGrid(): invalid or unsupported datatype");
663 }
664 }
665
666 template<typename ValueType>
667 void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
668 const ReaderParameters& params) const
669 {
670 // check destination function space
671 dim_t myN0, myN1, myN2;
672 if (out.getFunctionSpace().getTypeCode() == Nodes) {
673 myN0 = m_NN[0];
674 myN1 = m_NN[1];
675 myN2 = m_NN[2];
676 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
677 out.getFunctionSpace().getTypeCode() == ReducedElements) {
678 myN0 = m_NE[0];
679 myN1 = m_NE[1];
680 myN2 = m_NE[2];
681 } else
682 throw ValueError("readBinaryGrid(): invalid function space for output data object");
683
684 if (params.first.size() != 3)
685 throw ValueError("readBinaryGrid(): argument 'first' must have 3 entries");
686
687 if (params.numValues.size() != 3)
688 throw ValueError("readBinaryGrid(): argument 'numValues' must have 3 entries");
689
690 if (params.multiplier.size() != 3)
691 throw ValueError("readBinaryGrid(): argument 'multiplier' must have 3 entries");
692 for (size_t i=0; i<params.multiplier.size(); i++)
693 if (params.multiplier[i]<1)
694 throw ValueError("readBinaryGrid(): all multipliers must be positive");
695 if (params.reverse[0] != 0 || params.reverse[1] != 0)
696 throw RipleyException("readBinaryGrid(): reversing only supported in Z-direction currently");
697
698 // check file existence and size
699 std::ifstream f(filename.c_str(), std::ifstream::binary);
700 if (f.fail()) {
701 throw RipleyException("readBinaryGrid(): cannot open file " + filename);
702 }
703 f.seekg(0, std::ios::end);
704 const int numComp = out.getDataPointSize();
705 const dim_t filesize = f.tellg();
706 const dim_t reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
707 if (filesize < reqsize) {
708 f.close();
709 throw RipleyException("readBinaryGrid(): not enough data in file");
710 }
711
712 // check if this rank contributes anything
713 if (params.first[0] >= m_offset[0]+myN0 ||
714 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
715 params.first[1] >= m_offset[1]+myN1 ||
716 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
717 params.first[2] >= m_offset[2]+myN2 ||
718 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
719 f.close();
720 return;
721 }
722
723 // now determine how much this rank has to write
724
725 // first coordinates in data object to write to
726 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
727 const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
728 const dim_t first2 = max(dim_t(0), params.first[2]-m_offset[2]);
729 // indices to first value in file (not accounting for reverse yet)
730 dim_t idx0 = max(dim_t(0), (m_offset[0]/params.multiplier[0])-params.first[0]);
731 dim_t idx1 = max(dim_t(0), (m_offset[1]/params.multiplier[1])-params.first[1]);
732 dim_t idx2 = max(dim_t(0), (m_offset[2]/params.multiplier[2])-params.first[2]);
733 // if restX > 0 the first value in the respective dimension has been
734 // written restX times already in a previous rank so this rank only
735 // contributes (multiplier-rank) copies of that value
736 const dim_t rest0 = m_offset[0]%params.multiplier[0];
737 const dim_t rest1 = m_offset[1]%params.multiplier[1];
738 const dim_t rest2 = m_offset[2]%params.multiplier[2];
739
740 // number of values to read
741 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
742 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
743 const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
744
745 // make sure we read the right block if going backwards through file
746 if (params.reverse[2])
747 idx2 = params.numValues[2]-idx2-1;
748
749 // helpers for reversing
750 const int z_mult = (params.reverse[2] ? -1 : 1);
751
752 out.requireWrite();
753 vector<ValueType> values(num0*numComp);
754 const int dpp = out.getNumDataPointsPerSample();
755
756 for (dim_t z=0; z<num2; z++) {
757 const dim_t m2limit = (z==0 ? params.multiplier[2]-rest2 : params.multiplier[2]);
758 dim_t dataZbase = first2 + z*params.multiplier[2];
759 if (z>0)
760 dataZbase -= rest2;
761
762 for (dim_t y=0; y<num1; y++) {
763 const dim_t fileofs = numComp*(idx0 +
764 (idx1+y)*params.numValues[0] +
765 (idx2+z_mult*z)*params.numValues[0]*params.numValues[1]);
766 f.seekg(fileofs*sizeof(ValueType));
767 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
768 const dim_t m1limit = (y==0 ? params.multiplier[1]-rest1 : params.multiplier[1]);
769 dim_t dataYbase = first1 + y*params.multiplier[1];
770 if (y>0)
771 dataYbase -= rest1;
772
773 for (dim_t x=0; x<num0; x++) {
774 const dim_t m0limit = (x==0 ? params.multiplier[0]-rest0 : params.multiplier[0]);
775 dim_t dataXbase = first0 + x*params.multiplier[0];
776 if (x>0)
777 dataXbase -= rest0;
778 // write a block of mult0 x mult1 x mult2 identical values into
779 // Data object
780 for (dim_t m2=0; m2 < m2limit; m2++) {
781 const dim_t dataZ = dataZbase + m2;
782 if (dataZ >= myN2)
783 break;
784 for (dim_t m1=0; m1 < m1limit; m1++) {
785 const dim_t dataY = dataYbase + m1;
786 if (dataY >= myN1)
787 break;
788 for (dim_t m0=0; m0 < m0limit; m0++) {
789 const dim_t dataX = dataXbase + m0;
790 if (dataX >= myN0)
791 break;
792 const dim_t dataIndex = dataX + dataY*myN0 + dataZ*myN0*myN1;
793 double* dest = out.getSampleDataRW(dataIndex);
794 for (int c=0; c<numComp; c++) {
795 ValueType val = values[x*numComp+c];
796
797 if (params.byteOrder != BYTEORDER_NATIVE) {
798 char* cval = reinterpret_cast<char*>(&val);
799 // this will alter val!!
800 if (sizeof(ValueType)>4) {
801 byte_swap64(cval);
802 } else {
803 byte_swap32(cval);
804 }
805 }
806 if (!bm::isnan(val)) {
807 for (int q=0; q<dpp; q++) {
808 *dest++ = static_cast<double>(val);
809 }
810 }
811 }
812 }
813 }
814 }
815 }
816 }
817 }
818
819 f.close();
820 }
821
822 #ifdef ESYS_HAVE_BOOST_IO
823 template<typename ValueType>
824 void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
825 const ReaderParameters& params) const
826 {
827 // check destination function space
828 dim_t myN0, myN1, myN2;
829 if (out.getFunctionSpace().getTypeCode() == Nodes) {
830 myN0 = m_NN[0];
831 myN1 = m_NN[1];
832 myN2 = m_NN[2];
833 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
834 out.getFunctionSpace().getTypeCode() == ReducedElements) {
835 myN0 = m_NE[0];
836 myN1 = m_NE[1];
837 myN2 = m_NE[2];
838 } else
839 throw ValueError("readBinaryGridFromZipped(): invalid function space for output data object");
840
841 if (params.first.size() != 3)
842 throw ValueError("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
843
844 if (params.numValues.size() != 3)
845 throw ValueError("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
846
847 if (params.multiplier.size() != 3)
848 throw ValueError("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
849 for (size_t i=0; i<params.multiplier.size(); i++)
850 if (params.multiplier[i]<1)
851 throw ValueError("readBinaryGridFromZipped(): all multipliers must be positive");
852
853 // check file existence and size
854 std::ifstream f(filename.c_str(), std::ifstream::binary);
855 if (f.fail()) {
856 throw RipleyException("readBinaryGridFromZipped(): cannot open file " + filename);
857 }
858 f.seekg(0, std::ios::end);
859 const int numComp = out.getDataPointSize();
860 dim_t filesize = f.tellg();
861 f.seekg(0, ios::beg);
862 vector<char> compressed(filesize);
863 f.read((char*)&compressed[0], filesize);
864 f.close();
865 vector<char> decompressed = unzip(compressed);
866 filesize = decompressed.size();
867 const dim_t reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
868 if (filesize < reqsize) {
869 throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
870 }
871
872 // check if this rank contributes anything
873 if (params.first[0] >= m_offset[0]+myN0 ||
874 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
875 params.first[1] >= m_offset[1]+myN1 ||
876 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
877 params.first[2] >= m_offset[2]+myN2 ||
878 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
879 return;
880 }
881
882 // now determine how much this rank has to write
883
884 // first coordinates in data object to write to
885 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
886 const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
887 const dim_t first2 = max(dim_t(0), params.first[2]-m_offset[2]);
888 // indices to first value in file
889 const dim_t idx0 = max(dim_t(0), m_offset[0]-params.first[0]);
890 const dim_t idx1 = max(dim_t(0), m_offset[1]-params.first[1]);
891 const dim_t idx2 = max(dim_t(0), m_offset[2]-params.first[2]);
892 // number of values to read
893 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
894 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
895 const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
896
897 out.requireWrite();
898 vector<ValueType> values(num0*numComp);
899 const int dpp = out.getNumDataPointsPerSample();
900
901 for (dim_t z=0; z<num2; z++) {
902 for (dim_t y=0; y<num1; y++) {
903 const dim_t fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
904 +(idx2+z)*params.numValues[0]*params.numValues[1]);
905 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
906
907 for (dim_t x=0; x<num0; x++) {
908 const dim_t baseIndex = first0+x*params.multiplier[0]
909 +(first1+y*params.multiplier[1])*myN0
910 +(first2+z*params.multiplier[2])*myN0*myN1;
911 for (dim_t m2=0; m2<params.multiplier[2]; m2++) {
912 for (dim_t m1=0; m1<params.multiplier[1]; m1++) {
913 for (dim_t m0=0; m0<params.multiplier[0]; m0++) {
914 const dim_t dataIndex = baseIndex+m0
915 +m1*myN0
916 +m2*myN0*myN1;
917 double* dest = out.getSampleDataRW(dataIndex);
918 for (int c=0; c<numComp; c++) {
919 ValueType val = values[x*numComp+c];
920
921 if (params.byteOrder != BYTEORDER_NATIVE) {
922 char* cval = reinterpret_cast<char*>(&val);
923 // this will alter val!!
924 byte_swap32(cval);
925 }
926 if (!bm::isnan(val)) {
927 for (int q=0; q<dpp; q++) {
928 *dest++ = static_cast<double>(val);
929 }
930 }
931 }
932 }
933 }
934 }
935 }
936 }
937 }
938 }
939 #endif
940
941 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
942 int byteOrder, int dataType) const
943 {
944 // the mapping is not universally correct but should work on our
945 // supported platforms
946 switch (dataType) {
947 case DATATYPE_INT32:
948 writeBinaryGridImpl<int>(in, filename, byteOrder);
949 break;
950 case DATATYPE_FLOAT32:
951 writeBinaryGridImpl<float>(in, filename, byteOrder);
952 break;
953 case DATATYPE_FLOAT64:
954 writeBinaryGridImpl<double>(in, filename, byteOrder);
955 break;
956 default:
957 throw ValueError("writeBinaryGrid(): invalid or unsupported datatype");
958 }
959 }
960
961 template<typename ValueType>
962 void Brick::writeBinaryGridImpl(const escript::Data& in,
963 const string& filename, int byteOrder) const
964 {
965 // check function space and determine number of points
966 dim_t myN0, myN1, myN2;
967 dim_t totalN0, totalN1, totalN2;
968 dim_t offset0, offset1, offset2;
969 if (in.getFunctionSpace().getTypeCode() == Nodes) {
970 myN0 = m_NN[0];
971 myN1 = m_NN[1];
972 myN2 = m_NN[2];
973 totalN0 = m_gNE[0]+1;
974 totalN1 = m_gNE[1]+1;
975 totalN2 = m_gNE[2]+1;
976 offset0 = m_offset[0];
977 offset1 = m_offset[1];
978 offset2 = m_offset[2];
979 } else if (in.getFunctionSpace().getTypeCode() == DegreesOfFreedom ||
980 in.getFunctionSpace().getTypeCode() == ReducedDegreesOfFreedom) {
981 myN0 = (m_gNE[0]+1)/m_NX[0];
982 myN1 = (m_gNE[1]+1)/m_NX[1];
983 myN2 = (m_gNE[2]+1)/m_NX[2];
984 totalN0 = m_gNE[0]+1;
985 totalN1 = m_gNE[1]+1;
986 totalN2 = m_gNE[2]+1;
987 offset0 = (m_offset[0]>0 ? m_offset[0]+1 : 0);
988 offset1 = (m_offset[1]>0 ? m_offset[1]+1 : 0);
989 offset2 = (m_offset[2]>0 ? m_offset[2]+1 : 0);
990 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
991 in.getFunctionSpace().getTypeCode() == ReducedElements) {
992 myN0 = m_NE[0];
993 myN1 = m_NE[1];
994 myN2 = m_NE[2];
995 totalN0 = m_gNE[0];
996 totalN1 = m_gNE[1];
997 totalN2 = m_gNE[2];
998 offset0 = m_offset[0];
999 offset1 = m_offset[1];
1000 offset2 = m_offset[2];
1001 } else
1002 throw RipleyException("writeBinaryGrid(): unsupported function space");
1003
1004 const int numComp = in.getDataPointSize();
1005 const int dpp = in.getNumDataPointsPerSample();
1006 const dim_t fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
1007
1008 if (numComp > 1 || dpp > 1)
1009 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
1010
1011 // from here on we know that each sample consists of one value
1012 FileWriter fw(m_mpiInfo->comm);
1013 fw.openFile(filename, fileSize, true);
1014 MPIBarrier();
1015
1016 for (index_t z=0; z<myN2; z++) {
1017 for (index_t y=0; y<myN1; y++) {
1018 const dim_t fileofs = (offset0+(offset1+y)*totalN0
1019 +(offset2+z)*totalN0*totalN1)*sizeof(ValueType);
1020 std::ostringstream oss;
1021
1022 for (index_t x=0; x<myN0; x++) {
1023 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
1024 ValueType fvalue = static_cast<ValueType>(*sample);
1025 if (byteOrder == BYTEORDER_NATIVE) {
1026 oss.write((char*)&fvalue, sizeof(fvalue));
1027 } else {
1028 char* value = reinterpret_cast<char*>(&fvalue);
1029 if (sizeof(fvalue)>4) {
1030 byte_swap64(value);
1031 } else {
1032 byte_swap32(value);
1033 }
1034 oss.write(value, sizeof(fvalue));
1035 }
1036 }
1037 fw.writeAt(oss, fileofs);
1038 }
1039 }
1040 fw.close();
1041 }
1042
1043 void Brick::write(const std::string& filename) const
1044 {
1045 throw RipleyException("write: not supported");
1046 }
1047
1048 void Brick::dump(const string& fileName) const
1049 {
1050 #ifdef ESYS_HAVE_SILO
1051 string fn(fileName);
1052 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
1053 fn+=".silo";
1054 }
1055
1056 int driver=DB_HDF5;
1057 string siloPath;
1058 DBfile* dbfile = NULL;
1059
1060 #ifdef ESYS_MPI
1061 PMPIO_baton_t* baton = NULL;
1062 const int NUM_SILO_FILES = 1;
1063 const char* blockDirFmt = "/block%04d";
1064 #endif
1065
1066 if (m_mpiInfo->size > 1) {
1067 #ifdef ESYS_MPI
1068 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
1069 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
1070 PMPIO_DefaultClose, (void*)&driver);
1071 // try the fallback driver in case of error
1072 if (!baton && driver != DB_PDB) {
1073 driver = DB_PDB;
1074 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
1075 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
1076 PMPIO_DefaultClose, (void*)&driver);
1077 }
1078 if (baton) {
1079 char str[64];
1080 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
1081 siloPath = str;
1082 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
1083 }
1084 #endif
1085 } else {
1086 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
1087 getDescription().c_str(), driver);
1088 // try the fallback driver in case of error
1089 if (!dbfile && driver != DB_PDB) {
1090 driver = DB_PDB;
1091 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
1092 getDescription().c_str(), driver);
1093 }
1094 }
1095
1096 if (!dbfile)
1097 throw RipleyException("dump: Could not create Silo file");
1098
1099 /*
1100 if (driver==DB_HDF5) {
1101 // gzip level 1 already provides good compression with minimal
1102 // performance penalty. Some tests showed that gzip levels >3 performed
1103 // rather badly on escript data both in terms of time and space
1104 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
1105 }
1106 */
1107
1108 boost::scoped_ptr<double> x(new double[m_NN[0]]);
1109 boost::scoped_ptr<double> y(new double[m_NN[1]]);
1110 boost::scoped_ptr<double> z(new double[m_NN[2]]);
1111 double* coords[3] = { x.get(), y.get(), z.get() };
1112 const dim_t NN0 = m_NN[0];
1113 const dim_t NN1 = m_NN[1];
1114 const dim_t NN2 = m_NN[2];
1115
1116 #pragma omp parallel
1117 {
1118 #pragma omp for
1119 for (dim_t i0 = 0; i0 < NN0; i0++) {
1120 coords[0][i0]=getLocalCoordinate(i0, 0);
1121 }
1122 #pragma omp for
1123 for (dim_t i1 = 0; i1 < NN1; i1++) {
1124 coords[1][i1]=getLocalCoordinate(i1, 1);
1125 }
1126 #pragma omp for
1127 for (dim_t i2 = 0; i2 < NN2; i2++) {
1128 coords[2][i2]=getLocalCoordinate(i2, 2);
1129 }
1130 }
1131 // converting to int!
1132 vector<int> dims(m_NN, m_NN+3);
1133
1134 // write mesh
1135 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
1136 DB_COLLINEAR, NULL);
1137
1138 // write node ids
1139 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
1140 NULL, 0, DB_INT, DB_NODECENT, NULL);
1141
1142 // write element ids
1143 dims.assign(m_NE, m_NE+3);
1144 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
1145 &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
1146
1147 // rank 0 writes multimesh and multivar
1148 if (m_mpiInfo->rank == 0) {
1149 vector<string> tempstrings;
1150 vector<char*> names;
1151 for (dim_t i=0; i<m_mpiInfo->size; i++) {
1152 std::stringstream path;
1153 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/mesh";
1154 tempstrings.push_back(path.str());
1155 names.push_back((char*)tempstrings.back().c_str());
1156 }
1157 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
1158 DBSetDir(dbfile, "/");
1159 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
1160 &types[0], NULL);
1161 tempstrings.clear();
1162 names.clear();
1163 for (dim_t i=0; i<m_mpiInfo->size; i++) {
1164 std::stringstream path;
1165 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/nodeId";
1166 tempstrings.push_back(path.str());
1167 names.push_back((char*)tempstrings.back().c_str());
1168 }
1169 types.assign(m_mpiInfo->size, DB_QUADVAR);
1170 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
1171 &types[0], NULL);
1172 tempstrings.clear();
1173 names.clear();
1174 for (dim_t i=0; i<m_mpiInfo->size; i++) {
1175 std::stringstream path;
1176 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/elementId";
1177 tempstrings.push_back(path.str());
1178 names.push_back((char*)tempstrings.back().c_str());
1179 }
1180 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
1181 &types[0], NULL);
1182 }
1183
1184 if (m_mpiInfo->size > 1) {
1185 #ifdef ESYS_MPI
1186 PMPIO_HandOffBaton(baton, dbfile);
1187 PMPIO_Finish(baton);
1188 #endif
1189 } else {
1190 DBClose(dbfile);
1191 }
1192
1193 #else // ESYS_HAVE_SILO
1194 throw RipleyException("dump: no Silo support");
1195 #endif
1196 }
1197
1198 const dim_t* Brick::borrowSampleReferenceIDs(int fsType) const
1199 {
1200 switch (fsType) {
1201 case Nodes:
1202 case ReducedNodes: //FIXME: reduced
1203 return &m_nodeId[0];
1204 case DegreesOfFreedom:
1205 case ReducedDegreesOfFreedom: //FIXME: reduced
1206 return &m_dofId[0];
1207 case Elements:
1208 case ReducedElements:
1209 return &m_elementId[0];
1210 case FaceElements:
1211 case ReducedFaceElements:
1212 return &m_faceId[0];
1213 case Points:
1214 return &m_diracPointNodeIDs[0];
1215 default:
1216 break;
1217 }
1218
1219 std::stringstream msg;
1220 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
1221 throw ValueError(msg.str());
1222 }
1223
1224 bool Brick::ownSample(int fsType, index_t id) const
1225 {
1226 if (getMPISize()==1)
1227 return true;
1228
1229 switch (fsType) {
1230 case Nodes:
1231 case ReducedNodes: //FIXME: reduced
1232 return (m_dofMap[id] < getNumDOF());
1233 case DegreesOfFreedom:
1234 case ReducedDegreesOfFreedom:
1235 return true;
1236 case Elements:
1237 case ReducedElements:
1238 {
1239 // check ownership of element's _last_ node
1240 const index_t x=id%m_NE[0] + 1;
1241 const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
1242 const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
1243 return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
1244 }
1245 case FaceElements:
1246 case ReducedFaceElements:
1247 {
1248 // check ownership of face element's last node
1249 dim_t n=0;
1250 for (size_t i=0; i<6; i++) {
1251 n+=m_faceCount[i];
1252 if (id<n) {
1253 const index_t j=id-n+m_faceCount[i];
1254 if (i>=4) { // front or back
1255 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
1256 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
1257 } else if (i>=2) { // bottom or top
1258 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
1259 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
1260 } else { // left or right
1261 const index_t first=(i==0 ? 0 : m_NN[0]-1);
1262 return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
1263 }
1264 }
1265 }
1266 return false;
1267 }
1268 default:
1269 break;
1270 }
1271
1272 std::stringstream msg;
1273 msg << "ownSample: invalid function space type " << fsType;
1274 throw ValueError(msg.str());
1275 }
1276
1277 RankVector Brick::getOwnerVector(int fsType) const
1278 {
1279 RankVector owner;
1280 const int rank = m_mpiInfo->rank;
1281
1282 if (fsType == Elements || fsType == ReducedElements) {
1283 owner.assign(getNumElements(), rank);
1284 // shared plane in Y-Z
1285 if (m_faceCount[0]==0) {
1286 for (dim_t k2=0; k2<m_NE[2]; k2++) {
1287 for (dim_t k1=0; k1<m_NE[1]; k1++) {
1288 const dim_t e=k2*m_NE[0]*m_NE[1]+k1*m_NE[0];
1289 owner[e] = rank-1;
1290 }
1291 }
1292 }
1293 // shared plane in X-Z
1294 if (m_faceCount[2]==0) {
1295 for (dim_t k2=0; k2<m_NE[2]; k2++) {
1296 for (dim_t k0=0; k0<m_NE[0]; k0++) {
1297 const dim_t e=k2*m_NE[0]*m_NE[1]+k0;
1298 owner[e] = rank-m_NX[0];
1299 }
1300 }
1301 }
1302 // shared plane in X-Y
1303 if (m_faceCount[4]==0) {
1304 for (dim_t k1=0; k1<m_NE[1]; k1++) {
1305 for (dim_t k0=0; k0<m_NE[0]; k0++) {
1306 const dim_t e=k1*m_NE[0]+k0;
1307 owner[e] = rank-m_NX[0]*m_NX[1];
1308 }
1309 }
1310 }
1311
1312 } else if (fsType == FaceElements || fsType == ReducedFaceElements) {
1313 owner.assign(getNumFaceElements(), rank);
1314 index_t offset=0;
1315 if (m_faceCount[0] > 0) {
1316 if (m_faceCount[2]==0) {
1317 for (dim_t k2=0; k2<m_NE[2]; k2++)
1318 owner[k2*m_NE[1]] = rank-m_NX[0];
1319 }
1320 if (m_faceCount[4]==0) {
1321 for (dim_t k1=0; k1<m_NE[1]; k1++)
1322 owner[k1] = rank-m_NX[0]*m_NX[1];
1323 }
1324 offset += m_faceCount[0];
1325 }
1326 if (m_faceCount[1] > 0) {
1327 if (m_faceCount[2]==0) {
1328 for (dim_t k2=0; k2<m_NE[2]; k2++)
1329 owner[offset+k2*m_NE[1]] = rank-m_NX[0];
1330 }
1331 if (m_faceCount[4]==0) {
1332 for (dim_t k1=0; k1<m_NE[1]; k1++)
1333 owner[offset+k1] = rank-m_NX[0]*m_NX[1];
1334 }
1335 offset += m_faceCount[1];
1336 }
1337 if (m_faceCount[2] > 0) {
1338 if (m_faceCount[0]==0) {
1339 for (dim_t k2=0; k2<m_NE[2]; k2++)
1340 owner[offset+k2*m_NE[0]] = rank-1;
1341 }
1342 if (m_faceCount[4]==0) {
1343 for (dim_t k0=0; k0<m_NE[0]; k0++)
1344 owner[offset+k0] = rank-m_NX[0]*m_NX[1];
1345 }
1346 offset += m_faceCount[2];
1347 }
1348 if (m_faceCount[3] > 0) {
1349 if (m_faceCount[0]==0) {
1350 for (dim_t k2=0; k2<m_NE[2]; k2++)
1351 owner[offset+k2*m_NE[0]] = rank-1;
1352 }
1353 if (m_faceCount[4]==0) {
1354 for (dim_t k0=0; k0<m_NE[0]; k0++)
1355 owner[offset+k0] = rank-m_NX[0]*m_NX[1];
1356 }
1357 offset += m_faceCount[3];
1358 }
1359 if (m_faceCount[4] > 0) {
1360 if (m_faceCount[0]==0) {
1361 for (dim_t k1=0; k1<m_NE[1]; k1++)
1362 owner[offset+k1*m_NE[0]] = rank-1;
1363 }
1364 if (m_faceCount[2]==0) {
1365 for (dim_t k0=0; k0<m_NE[0]; k0++)
1366 owner[offset+k0] = rank-m_NX[0];
1367 }
1368 offset += m_faceCount[4];
1369 }
1370 if (m_faceCount[5] > 0) {
1371 if (m_faceCount[0]==0) {
1372 for (dim_t k1=0; k1<m_NE[1]; k1++)
1373 owner[offset+k1*m_NE[0]] = rank-1;
1374 }
1375 if (m_faceCount[2]==0) {
1376 for (dim_t k0=0; k0<m_NE[0]; k0++)
1377 owner[offset+k0] = rank-m_NX[0];
1378 }
1379 }
1380 } else {
1381 throw ValueError("getOwnerVector: only valid for element types");
1382 }
1383
1384 return owner;
1385 }
1386
1387 void Brick::setToNormal(escript::Data& out) const
1388 {
1389 const dim_t NE0 = m_NE[0];
1390 const dim_t NE1 = m_NE[1];
1391 const dim_t NE2 = m_NE[2];
1392
1393 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1394 out.requireWrite();
1395 #pragma omp parallel
1396 {
1397 if (m_faceOffset[0] > -1) {
1398 #pragma omp for nowait
1399 for (index_t k2 = 0; k2 < NE2; ++k2) {
1400 for (index_t k1 = 0; k1 < NE1; ++k1) {
1401 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1402 // set vector at four quadrature points
1403 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1404 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1405 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1406 *o++ = -1.; *o++ = 0.; *o = 0.;
1407 }
1408 }
1409 }
1410
1411 if (m_faceOffset[1] > -1) {
1412 #pragma omp for nowait
1413 for (index_t k2 = 0; k2 < NE2; ++k2) {
1414 for (index_t k1 = 0; k1 < NE1; ++k1) {
1415 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1416 // set vector at four quadrature points
1417 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1418 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1419 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1420 *o++ = 1.; *o++ = 0.; *o = 0.;
1421 }
1422 }
1423 }
1424
1425 if (m_faceOffset[2] > -1) {
1426 #pragma omp for nowait
1427 for (index_t k2 = 0; k2 < NE2; ++k2) {
1428 for (index_t k0 = 0; k0 < NE0; ++k0) {
1429 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1430 // set vector at four quadrature points
1431 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1432 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1433 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1434 *o++ = 0.; *o++ = -1.; *o = 0.;
1435 }
1436 }
1437 }
1438
1439 if (m_faceOffset[3] > -1) {
1440 #pragma omp for nowait
1441 for (index_t k2 = 0; k2 < NE2; ++k2) {
1442 for (index_t k0 = 0; k0 < NE0; ++k0) {
1443 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1444 // set vector at four quadrature points
1445 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1446 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1447 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1448 *o++ = 0.; *o++ = 1.; *o = 0.;
1449 }
1450 }
1451 }
1452
1453 if (m_faceOffset[4] > -1) {
1454 #pragma omp for nowait
1455 for (index_t k1 = 0; k1 < NE1; ++k1) {
1456 for (index_t k0 = 0; k0 < NE0; ++k0) {
1457 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1458 // set vector at four quadrature points
1459 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1460 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1461 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1462 *o++ = 0.; *o++ = 0.; *o = -1.;
1463 }
1464 }
1465 }
1466
1467 if (m_faceOffset[5] > -1) {
1468 #pragma omp for nowait
1469 for (index_t k1 = 0; k1 < NE1; ++k1) {
1470 for (index_t k0 = 0; k0 < NE0; ++k0) {
1471 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1472 // set vector at four quadrature points
1473 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1474 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1475 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1476 *o++ = 0.; *o++ = 0.; *o = 1.;
1477 }
1478 }
1479 }
1480 } // end of parallel section
1481 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1482 out.requireWrite();
1483 #pragma omp parallel
1484 {
1485 if (m_faceOffset[0] > -1) {
1486 #pragma omp for nowait
1487 for (index_t k2 = 0; k2 < NE2; ++k2) {
1488 for (index_t k1 = 0; k1 < NE1; ++k1) {
1489 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1490 *o++ = -1.;
1491 *o++ = 0.;
1492 *o = 0.;
1493 }
1494 }
1495 }
1496
1497 if (m_faceOffset[1] > -1) {
1498 #pragma omp for nowait
1499 for (index_t k2 = 0; k2 < NE2; ++k2) {
1500 for (index_t k1 = 0; k1 < NE1; ++k1) {
1501 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1502 *o++ = 1.;
1503 *o++ = 0.;
1504 *o = 0.;
1505 }
1506 }
1507 }
1508
1509 if (m_faceOffset[2] > -1) {
1510 #pragma omp for nowait
1511 for (index_t k2 = 0; k2 < NE2; ++k2) {
1512 for (index_t k0 = 0; k0 < NE0; ++k0) {
1513 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1514 *o++ = 0.;
1515 *o++ = -1.;
1516 *o = 0.;
1517 }
1518 }
1519 }
1520
1521 if (m_faceOffset[3] > -1) {
1522 #pragma omp for nowait
1523 for (index_t k2 = 0; k2 < NE2; ++k2) {
1524 for (index_t k0 = 0; k0 < NE0; ++k0) {
1525 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1526 *o++ = 0.;
1527 *o++ = 1.;
1528 *o = 0.;
1529 }
1530 }
1531 }
1532
1533 if (m_faceOffset[4] > -1) {
1534 #pragma omp for nowait
1535 for (index_t k1 = 0; k1 < NE1; ++k1) {
1536 for (index_t k0 = 0; k0 < NE0; ++k0) {
1537 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1538 *o++ = 0.;
1539 *o++ = 0.;
1540 *o = -1.;
1541 }
1542 }
1543 }
1544
1545 if (m_faceOffset[5] > -1) {
1546 #pragma omp for nowait
1547 for (index_t k1 = 0; k1 < NE1; ++k1) {
1548 for (index_t k0 = 0; k0 < NE0; ++k0) {
1549 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1550 *o++ = 0.;
1551 *o++ = 0.;
1552 *o = 1.;
1553 }
1554 }
1555 }
1556 } // end of parallel section
1557
1558 } else {
1559 std::stringstream msg;
1560 msg << "setToNormal: invalid function space type "
1561 << out.getFunctionSpace().getTypeCode();
1562 throw ValueError(msg.str());
1563 }
1564 }
1565
1566 void Brick::setToSize(escript::Data& out) const
1567 {
1568 if (out.getFunctionSpace().getTypeCode() == Elements
1569 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1570 out.requireWrite();
1571 const dim_t numQuad=out.getNumDataPointsPerSample();
1572 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1573 const dim_t NE = getNumElements();
1574 #pragma omp parallel for
1575 for (index_t k = 0; k < NE; ++k) {
1576 double* o = out.getSampleDataRW(k);
1577 fill(o, o+numQuad, size);
1578 }
1579 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1580 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1581 out.requireWrite();
1582 const dim_t numQuad=out.getNumDataPointsPerSample();
1583 const dim_t NE0 = m_NE[0];
1584 const dim_t NE1 = m_NE[1];
1585 const dim_t NE2 = m_NE[2];
1586 #pragma omp parallel
1587 {
1588 if (m_faceOffset[0] > -1) {
1589 const double size=min(m_dx[1],m_dx[2]);
1590 #pragma omp for nowait
1591 for (index_t k2 = 0; k2 < NE2; ++k2) {
1592 for (index_t k1 = 0; k1 < NE1; ++k1) {
1593 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1594 fill(o, o+numQuad, size);
1595 }
1596 }
1597 }
1598
1599 if (m_faceOffset[1] > -1) {
1600 const double size=min(m_dx[1],m_dx[2]);
1601 #pragma omp for nowait
1602 for (index_t k2 = 0; k2 < NE2; ++k2) {
1603 for (index_t k1 = 0; k1 < NE1; ++k1) {
1604 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1605 fill(o, o+numQuad, size);
1606 }
1607 }
1608 }
1609
1610 if (m_faceOffset[2] > -1) {
1611 const double size=min(m_dx[0],m_dx[2]);
1612 #pragma omp for nowait
1613 for (index_t k2 = 0; k2 < NE2; ++k2) {
1614 for (index_t k0 = 0; k0 < NE0; ++k0) {
1615 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1616 fill(o, o+numQuad, size);
1617 }
1618 }
1619 }
1620
1621 if (m_faceOffset[3] > -1) {
1622 const double size=min(m_dx[0],m_dx[2]);
1623 #pragma omp for nowait
1624 for (index_t k2 = 0; k2 < NE2; ++k2) {
1625 for (index_t k0 = 0; k0 < NE0; ++k0) {
1626 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1627 fill(o, o+numQuad, size);
1628 }
1629 }
1630 }
1631
1632 if (m_faceOffset[4] > -1) {
1633 const double size=min(m_dx[0],m_dx[1]);
1634 #pragma omp for nowait
1635 for (index_t k1 = 0; k1 < NE1; ++k1) {
1636 for (index_t k0 = 0; k0 < NE0; ++k0) {
1637 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1638 fill(o, o+numQuad, size);
1639 }
1640 }
1641 }
1642
1643 if (m_faceOffset[5] > -1) {
1644 const double size=min(m_dx[0],m_dx[1]);
1645 #pragma omp for nowait
1646 for (index_t k1 = 0; k1 < NE1; ++k1) {
1647 for (index_t k0 = 0; k0 < NE0; ++k0) {
1648 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1649 fill(o, o+numQuad, size);
1650 }
1651 }
1652 }
1653 } // end of parallel section
1654
1655 } else {
1656 std::stringstream msg;
1657 msg << "setToSize: invalid function space type "
1658 << out.getFunctionSpace().getTypeCode();
1659 throw ValueError(msg.str());
1660 }
1661 }
1662
1663 void Brick::Print_Mesh_Info(const bool full) const
1664 {
1665 RipleyDomain::Print_Mesh_Info(full);
1666 if (full) {
1667 std::cout << " Id Coordinates" << std::endl;
1668 std::cout.precision(15);
1669 std::cout.setf(ios::scientific, std::ios::floatfield);
1670 for (index_t i=0; i < getNumNodes(); i++) {
1671 std::cout << " " << std::setw(5) << m_nodeId[i]
1672 << " " << getLocalCoordinate(i%m_NN[0], 0)
1673 << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1674 << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << std::endl;
1675 }
1676 }
1677 }
1678
1679
1680 //protected
1681 void Brick::assembleCoordinates(escript::Data& arg) const
1682 {
1683 int numDim = m_numDim;
1684 if (!arg.isDataPointShapeEqual(1, &numDim))
1685 throw ValueError("setToX: Invalid Data object shape");
1686 if (!arg.numSamplesEqual(1, getNumNodes()))
1687 throw ValueError("setToX: Illegal number of samples in Data object");
1688
1689 const dim_t NN0 = m_NN[0];
1690 const dim_t NN1 = m_NN[1];
1691 const dim_t NN2 = m_NN[2];
1692 arg.requireWrite();
1693 #pragma omp parallel for
1694 for (dim_t i2 = 0; i2 < NN2; i2++) {
1695 for (dim_t i1 = 0; i1 < NN1; i1++) {
1696 for (dim_t i0 = 0; i0 < NN0; i0++) {
1697 double* point = arg.getSampleDataRW(i0+NN0*i1+NN0*NN1*i2);
1698 point[0] = getLocalCoordinate(i0, 0);
1699 point[1] = getLocalCoordinate(i1, 1);
1700 point[2] = getLocalCoordinate(i2, 2);
1701 }
1702 }
1703 }
1704 }
1705
1706 //protected
1707 void Brick::assembleGradient(escript::Data& out,
1708 const escript::Data& in) const
1709 {
1710 if (out.isComplex() != in.isComplex())
1711 throw ValueError("Gradient: input & output complexity must match.");
1712 else if (in.isComplex())
1713 assembleGradientImpl<cplx_t>(out, in);
1714 else
1715 assembleGradientImpl<real_t>(out, in);
1716 }
1717
1718 //protected
1719 template<typename Scalar>
1720 void Brick::assembleGradientImpl(escript::Data& out,
1721 const escript::Data& in) const
1722 {
1723 const dim_t numComp = in.getDataPointSize();
1724 const double C0 = .044658198738520451079;
1725 const double C1 = .16666666666666666667;
1726 const double C2 = .21132486540518711775;
1727 const double C3 = .25;
1728 const double C4 = .5;
1729 const double C5 = .62200846792814621559;
1730 const double C6 = .78867513459481288225;
1731 const dim_t NE0 = m_NE[0];
1732 const dim_t NE1 = m_NE[1];
1733 const dim_t NE2 = m_NE[2];
1734 const Scalar zero = static_cast<Scalar>(0);
1735
1736 if (out.getFunctionSpace().getTypeCode() == Elements) {
1737 out.requireWrite();
1738 #pragma omp parallel
1739 {
1740 vector<Scalar> f_000(numComp, zero);
1741 vector<Scalar> f_001(numComp, zero);
1742 vector<Scalar> f_010(numComp, zero);
1743 vector<Scalar> f_011(numComp, zero);
1744 vector<Scalar> f_100(numComp, zero);
1745 vector<Scalar> f_101(numComp, zero);
1746 vector<Scalar> f_110(numComp, zero);
1747 vector<Scalar> f_111(numComp, zero);
1748 #pragma omp for
1749 for (index_t k2=0; k2 < NE2; ++k2) {
1750 for (index_t k1=0; k1 < NE1; ++k1) {
1751 for (index_t k0=0; k0 < NE0; ++k0) {
1752 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1753 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1754 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1755 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1756 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1757 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1758 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1759 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1760 Scalar* o = out.getSampleDataRW(INDEX3(k0,k1,k2,NE0,NE1), zero);
1761 for (index_t i=0; i < numComp; ++i) {
1762 const Scalar V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1763 const Scalar V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1764 const Scalar V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1765 const Scalar V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1766 const Scalar V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1767 const Scalar V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1768 const Scalar V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1769 const Scalar V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1770 const Scalar V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1771 const Scalar V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1772 const Scalar V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1773 const Scalar V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1774 o[INDEX3(i,0,0,numComp,3)] = V0;
1775 o[INDEX3(i,1,0,numComp,3)] = V4;
1776 o[INDEX3(i,2,0,numComp,3)] = V8;
1777 o[INDEX3(i,0,1,numComp,3)] = V0;
1778 o[INDEX3(i,1,1,numComp,3)] = V5;
1779 o[INDEX3(i,2,1,numComp,3)] = V9;
1780 o[INDEX3(i,0,2,numComp,3)] = V1;
1781 o[INDEX3(i,1,2,numComp,3)] = V4;
1782 o[INDEX3(i,2,2,numComp,3)] = V10;
1783 o[INDEX3(i,0,3,numComp,3)] = V1;
1784 o[INDEX3(i,1,3,numComp,3)] = V5;
1785 o[INDEX3(i,2,3,numComp,3)] = V11;
1786 o[INDEX3(i,0,4,numComp,3)] = V2;
1787 o[INDEX3(i,1,4,numComp,3)] = V6;
1788 o[INDEX3(i,2,4,numComp,3)] = V8;
1789 o[INDEX3(i,0,5,numComp,3)] = V2;
1790 o[INDEX3(i,1,5,numComp,3)] = V7;
1791 o[INDEX3(i,2,5,numComp,3)] = V9;
1792 o[INDEX3(i,0,6,numComp,3)] = V3;
1793 o[INDEX3(i,1,6,numComp,3)] = V6;
1794 o[INDEX3(i,2,6,numComp,3)] = V10;
1795 o[INDEX3(i,0,7,numComp,3)] = V3;
1796 o[INDEX3(i,1,7,numComp,3)] = V7;
1797 o[INDEX3(i,2,7,numComp,3)] = V11;
1798 } // end of component loop i
1799 } // end of k0 loop
1800 } // end of k1 loop
1801 } // end of k2 loop
1802 } // end of parallel section
1803 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1804 out.requireWrite();
1805 #pragma omp parallel
1806 {
1807 vector<Scalar> f_000(numComp, zero);
1808 vector<Scalar> f_001(numComp, zero);
1809 vector<Scalar> f_010(numComp, zero);
1810 vector<Scalar> f_011(numComp, zero);
1811 vector<Scalar> f_100(numComp, zero);
1812 vector<Scalar> f_101(numComp, zero);
1813 vector<Scalar> f_110(numComp, zero);
1814 vector<Scalar> f_111(numComp, zero);
1815 #pragma omp for
1816 for (index_t k2=0; k2 < NE2; ++k2) {
1817 for (index_t k1=0; k1 < NE1; ++k1) {
1818 for (index_t k0=0; k0 < NE0; ++k0) {
1819 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1820 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1821 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1822 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1823 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1824 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1825 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1826 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1827 Scalar* o = out.getSampleDataRW(INDEX3(k0,k1,k2,NE0,NE1), zero);
1828 for (index_t i=0; i < numComp; ++i) {
1829 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1830 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1831 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1832 } // end of component loop i
1833 } // end of k0 loop
1834 } // end of k1 loop
1835 } // end of k2 loop
1836 } // end of parallel section
1837 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1838 out.requireWrite();
1839 #pragma omp parallel
1840 {
1841 vector<Scalar> f_000(numComp, zero);
1842 vector<Scalar> f_001(numComp, zero);
1843 vector<Scalar> f_010(numComp, zero);
1844 vector<Scalar> f_011(numComp, zero);
1845 vector<Scalar> f_100(numComp, zero);
1846 vector<Scalar> f_101(numComp, zero);
1847 vector<Scalar> f_110(numComp, zero);
1848 vector<Scalar> f_111(numComp, zero);
1849 if (m_faceOffset[0] > -1) {
1850 #pragma omp for nowait
1851 for (index_t k2=0; k2 < NE2; ++k2) {
1852 for (index_t k1=0; k1 < NE1; ++k1) {
1853 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1854 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1855 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1856 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1857 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1858 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1859 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1860 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1861 Scalar* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,NE1), zero);
1862 for (index_t i=0; i < numComp; ++i) {
1863 const Scalar V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1864 const Scalar V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1865 const Scalar V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1866 const Scalar V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1867 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1868 o[INDEX3(i,1,0,numComp,3)] = V0;
1869 o[INDEX3(i,2,0,numComp,3)] = V2;
1870 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1871 o[INDEX3(i,1,1,numComp,3)] = V0;
1872 o[INDEX3(i,2,1,numComp,3)] = V3;
1873 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1874 o[INDEX3(i,1,2,numComp,3)] = V1;
1875 o[INDEX3(i,2,2,numComp,3)] = V2;
1876 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1877 o[INDEX3(i,1,3,numComp,3)] = V1;
1878 o[INDEX3(i,2,3,numComp,3)] = V3;
1879 } // end of component loop i
1880 } // end of k1 loop
1881 } // end of k2 loop
1882 } // end of face 0
1883 if (m_faceOffset[1] > -1) {
1884 #pragma omp for nowait
1885 for (index_t k2=0; k2 < NE2; ++k2) {
1886 for (index_t k1=0; k1 < NE1; ++k1) {
1887 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1888 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1889 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1890 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1891 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1892 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1893 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1894 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1895 Scalar* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,NE1), zero);
1896 for (index_t i=0; i < numComp; ++i) {
1897 const Scalar V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1898 const Scalar V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1899 const Scalar V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1900 const Scalar V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1901 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1902 o[INDEX3(i,1,0,numComp,3)] = V0;
1903 o[INDEX3(i,2,0,numComp,3)] = V2;
1904 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1905 o[INDEX3(i,1,1,numComp,3)] = V0;
1906 o[INDEX3(i,2,1,numComp,3)] = V3;
1907 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1908 o[INDEX3(i,1,2,numComp,3)] = V1;
1909 o[INDEX3(i,2,2,numComp,3)] = V2;
1910 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1911 o[INDEX3(i,1,3,numComp,3)] = V1;
1912 o[INDEX3(i,2,3,numComp,3)] = V3;
1913 } // end of component loop i
1914 } // end of k1 loop
1915 } // end of k2 loop
1916 } // end of face 1
1917 if (m_faceOffset[2] > -1) {
1918 #pragma omp for nowait
1919 for (index_t k2=0; k2 < NE2; ++k2) {
1920 for (index_t k0=0; k0 < NE0; ++k0) {
1921 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1922 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1923 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1924 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1925 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1926 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1927 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1928 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1929 Scalar* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,NE0), zero);
1930 for (index_t i=0; i < numComp; ++i) {
1931 const Scalar V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1932 const Scalar V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1933 const Scalar V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1934 o[INDEX3(i,0,0,numComp,3)] = V0;
1935 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1936 o[INDEX3(i,2,0,numComp,3)] = V1;
1937 o[INDEX3(i,0,1,numComp,3)] = V0;
1938 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1939 o[INDEX3(i,2,1,numComp,3)] = V2;
1940 o[INDEX3(i,0,2,numComp,3)] = V0;
1941 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1942 o[INDEX3(i,2,2,numComp,3)] = V1;
1943 o[INDEX3(i,0,3,numComp,3)] = V0;
1944 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1945 o[INDEX3(i,2,3,numComp,3)] = V2;
1946 } // end of component loop i
1947 } // end of k0 loop
1948 } // end of k2 loop
1949 } // end of face 2
1950 if (m_faceOffset[3] > -1) {
1951 #pragma omp for nowait
1952 for (index_t k2=0; k2 < NE2; ++k2) {
1953 for (index_t k0=0; k0 < NE0; ++k0) {
1954 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1955 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1956 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1957 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1958 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1959 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1960 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1961 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1962 Scalar* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,NE0), zero);
1963 for (index_t i=0; i < numComp; ++i) {
1964 const Scalar V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1965 const Scalar V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1966 const Scalar V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1967 const Scalar V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1968 o[INDEX3(i,0,0,numComp,3)] = V0;
1969 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1970 o[INDEX3(i,2,0,numComp,3)] = V2;
1971 o[INDEX3(i,0,1,numComp,3)] = V0;
1972 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1973 o[INDEX3(i,2,1,numComp,3)] = V3;
1974 o[INDEX3(i,0,2,numComp,3)] = V1;
1975 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1976 o[INDEX3(i,2,2,numComp,3)] = V2;
1977 o[INDEX3(i,0,3,numComp,3)] = V1;
1978 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1979 o[INDEX3(i,2,3,numComp,3)] = V3;
1980 } // end of component loop i
1981 } // end of k0 loop
1982 } // end of k2 loop
1983 } // end of face 3
1984 if (m_faceOffset[4] > -1) {
1985 #pragma omp for nowait
1986 for (index_t k1=0; k1 < NE1; ++k1) {
1987 for (index_t k0=0; k0 < NE0; ++k0) {
1988 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1989 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1990 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1991 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1992 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1993 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1994 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1995 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1996 Scalar* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,NE0), zero);
1997 for (index_t i=0; i < numComp; ++i) {
1998 const Scalar V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1999 const Scalar V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
2000 const Scalar V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
2001 const Scalar V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
2002 o[INDEX3(i,0,0,numComp,3)] = V0;
2003 o[INDEX3(i,1,0,numComp,3)] = V2;
2004 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
2005 o[INDEX3(i,0,1,numComp,3)] = V0;
2006 o[INDEX3(i,1,1,numComp,3)] = V3;
2007 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
2008 o[INDEX3(i,0,2,numComp,3)] = V1;
2009 o[INDEX3(i,1,2,numComp,3)] = V2;
2010 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
2011 o[INDEX3(i,0,3,numComp,3)] = V1;
2012 o[INDEX3(i,1,3,numComp,3)] = V3;
2013 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
2014 } // end of component loop i
2015 } // end of k0 loop
2016 } // end of k1 loop
2017 } // end of face 4
2018 if (m_faceOffset[5] > -1) {
2019 #pragma omp for nowait
2020 for (index_t k1=0; k1 < NE1; ++k1) {
2021 for (index_t k0=0; k0 < NE0; ++k0) {
2022 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
2023 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
2024 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
2025 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
2026 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
2027 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
2028 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
2029 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
2030 Scalar* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,NE0), zero);
2031 for (index_t i=0; i < numComp; ++i) {
2032 const Scalar V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
2033 const Scalar V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
2034 const Scalar V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
2035 const Scalar V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
2036 o[INDEX3(i,0,0,numComp,3)] = V0;
2037 o[INDEX3(i,1,0,numComp,3)] = V2;
2038 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
2039 o[INDEX3(i,0,1,numComp,3)] = V0;
2040 o[INDEX3(i,1,1,numComp,3)] = V3;
2041 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])