/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4988 - (show annotations)
Wed Jun 4 01:15:10 2014 UTC (4 years, 9 months ago) by caltinay
File size: 165016 byte(s)
merge to current trunk

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <limits>
18
19 #include <ripley/Brick.h>
20 #include <esysUtils/esysFileWriter.h>
21 #include <ripley/DefaultAssembler3D.h>
22 #include <ripley/WaveAssembler3D.h>
23 #include <ripley/LameAssembler3D.h>
24 #include <ripley/domainhelpers.h>
25 #include <boost/scoped_array.hpp>
26
27 #ifdef USE_NETCDF
28 #include <netcdfcpp.h>
29 #endif
30
31 #if USE_SILO
32 #include <silo.h>
33 #ifdef ESYS_MPI
34 #include <pmpio.h>
35 #endif
36 #endif
37
38 #include <iomanip>
39
40 #include "esysUtils/EsysRandom.h"
41 #include "blocktools.h"
42
43
44 using namespace std;
45 using esysUtils::FileWriter;
46
47 namespace ripley {
48
49 int indexOfMax(int a, int b, int c) {
50 if (a > b) {
51 if (c > a) {
52 return 2;
53 }
54 return 0;
55 } else if (b > c) {
56 return 1;
57 }
58 return 2;
59 }
60
61 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
62 double x1, double y1, double z1, int d0, int d1, int d2,
63 const std::vector<double>& points, const std::vector<int>& tags,
64 const simap_t& tagnamestonums,
65 escript::SubWorld_ptr w) :
66 RipleyDomain(3, w)
67 {
68 if (static_cast<long>(n0 + 1) * static_cast<long>(n1 + 1)
69 * static_cast<long>(n2 + 1) > std::numeric_limits<int>::max())
70 throw RipleyException("The number of elements has overflowed, this "
71 "limit may be raised in future releases.");
72
73 if (n0 <= 0 || n1 <= 0 || n2 <= 0)
74 throw RipleyException("Number of elements in each spatial dimension "
75 "must be positive");
76
77 // ignore subdivision parameters for serial run
78 if (m_mpiInfo->size == 1) {
79 d0=1;
80 d1=1;
81 d2=1;
82 }
83 bool warn=false;
84
85 std::vector<int> factors;
86 int ranks = m_mpiInfo->size;
87 int epr[3] = {n0,n1,n2};
88 int d[3] = {d0,d1,d2};
89 if (d0<=0 || d1<=0 || d2<=0) {
90 for (int i = 0; i < 3; i++) {
91 if (d[i] < 1) {
92 d[i] = 1;
93 continue;
94 }
95 epr[i] = -1; // can no longer be max
96 if (ranks % d[i] != 0) {
97 throw RipleyException("Invalid number of spatial subdivisions");
98 }
99 //remove
100 ranks /= d[i];
101 }
102 factorise(factors, ranks);
103 if (factors.size() != 0) {
104 warn = true;
105 }
106 }
107 while (factors.size() > 0) {
108 int i = indexOfMax(epr[0],epr[1],epr[2]);
109 int f = factors.back();
110 factors.pop_back();
111 d[i] *= f;
112 epr[i] /= f;
113 }
114 d0 = d[0]; d1 = d[1]; d2 = d[2];
115
116 // ensure number of subdivisions is valid and nodes can be distributed
117 // among number of ranks
118 if (d0*d1*d2 != m_mpiInfo->size){
119 throw RipleyException("Invalid number of spatial subdivisions");
120 }
121 if (warn) {
122 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
123 << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
124 }
125
126 double l0 = x1-x0;
127 double l1 = y1-y0;
128 double l2 = z1-z0;
129 m_dx[0] = l0/n0;
130 m_dx[1] = l1/n1;
131 m_dx[2] = l2/n2;
132
133 if ((n0+1)%d0 > 0) {
134 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
135 l0=m_dx[0]*n0;
136 cout << "Warning: Adjusted number of elements and length. N0="
137 << n0 << ", l0=" << l0 << endl;
138 }
139 if ((n1+1)%d1 > 0) {
140 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
141 l1=m_dx[1]*n1;
142 cout << "Warning: Adjusted number of elements and length. N1="
143 << n1 << ", l1=" << l1 << endl;
144 }
145 if ((n2+1)%d2 > 0) {
146 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
147 l2=m_dx[2]*n2;
148 cout << "Warning: Adjusted number of elements and length. N2="
149 << n2 << ", l2=" << l2 << endl;
150 }
151
152 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
153 throw RipleyException("Too few elements for the number of ranks");
154
155 m_gNE[0] = n0;
156 m_gNE[1] = n1;
157 m_gNE[2] = n2;
158 m_origin[0] = x0;
159 m_origin[1] = y0;
160 m_origin[2] = z0;
161 m_length[0] = l0;
162 m_length[1] = l1;
163 m_length[2] = l2;
164 m_NX[0] = d0;
165 m_NX[1] = d1;
166 m_NX[2] = d2;
167
168 // local number of elements (including overlap)
169 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
170 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
171 m_NE[0]++;
172 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
173 m_ownNE[0]--;
174
175 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
176 if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
177 m_NE[1]++;
178 else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
179 m_ownNE[1]--;
180
181 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
182 if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
183 m_NE[2]++;
184 else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
185 m_ownNE[2]--;
186
187 // local number of nodes
188 m_NN[0] = m_NE[0]+1;
189 m_NN[1] = m_NE[1]+1;
190 m_NN[2] = m_NE[2]+1;
191
192 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
193 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
194 if (m_offset[0] > 0)
195 m_offset[0]--;
196 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
197 if (m_offset[1] > 0)
198 m_offset[1]--;
199 m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
200 if (m_offset[2] > 0)
201 m_offset[2]--;
202
203 populateSampleIds();
204 createPattern();
205
206 for (map<string, int>::const_iterator i = tagnamestonums.begin();
207 i != tagnamestonums.end(); i++) {
208 setTagMap(i->first, i->second);
209 }
210 addPoints(tags.size(), &points[0], &tags[0]);
211 }
212
213
214 Brick::~Brick()
215 {
216 }
217
218 string Brick::getDescription() const
219 {
220 return "ripley::Brick";
221 }
222
223 bool Brick::operator==(const AbstractDomain& other) const
224 {
225 const Brick* o=dynamic_cast<const Brick*>(&other);
226 if (o) {
227 return (RipleyDomain::operator==(other) &&
228 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
229 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
230 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
231 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
232 }
233
234 return false;
235 }
236
237 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
238 const ReaderParameters& params) const
239 {
240 #ifdef USE_NETCDF
241 // check destination function space
242 int myN0, myN1, myN2;
243 if (out.getFunctionSpace().getTypeCode() == Nodes) {
244 myN0 = m_NN[0];
245 myN1 = m_NN[1];
246 myN2 = m_NN[2];
247 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
248 out.getFunctionSpace().getTypeCode() == ReducedElements) {
249 myN0 = m_NE[0];
250 myN1 = m_NE[1];
251 myN2 = m_NE[2];
252 } else
253 throw RipleyException("readNcGrid(): invalid function space for output data object");
254
255 if (params.first.size() != 3)
256 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
257
258 if (params.numValues.size() != 3)
259 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
260
261 if (params.multiplier.size() != 3)
262 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
263 for (size_t i=0; i<params.multiplier.size(); i++)
264 if (params.multiplier[i]<1)
265 throw RipleyException("readNcGrid(): all multipliers must be positive");
266
267 // check file existence and size
268 NcFile f(filename.c_str(), NcFile::ReadOnly);
269 if (!f.is_valid())
270 throw RipleyException("readNcGrid(): cannot open file");
271
272 NcVar* var = f.get_var(varname.c_str());
273 if (!var)
274 throw RipleyException("readNcGrid(): invalid variable name");
275
276 // TODO: rank>0 data support
277 const int numComp = out.getDataPointSize();
278 if (numComp > 1)
279 throw RipleyException("readNcGrid(): only scalar data supported");
280
281 const int dims = var->num_dims();
282 boost::scoped_array<long> edges(var->edges());
283
284 // is this a slice of the data object (dims!=3)?
285 // note the expected ordering of edges (as in numpy: z,y,x)
286 if ( (dims==3 && (params.numValues[2] > edges[0] ||
287 params.numValues[1] > edges[1] ||
288 params.numValues[0] > edges[2]))
289 || (dims==2 && params.numValues[2]>1)
290 || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
291 throw RipleyException("readNcGrid(): not enough data in file");
292 }
293
294 // check if this rank contributes anything
295 if (params.first[0] >= m_offset[0]+myN0 ||
296 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
297 params.first[1] >= m_offset[1]+myN1 ||
298 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
299 params.first[2] >= m_offset[2]+myN2 ||
300 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
301 return;
302 }
303
304 // now determine how much this rank has to write
305
306 // first coordinates in data object to write to
307 const int first0 = max(0, params.first[0]-m_offset[0]);
308 const int first1 = max(0, params.first[1]-m_offset[1]);
309 const int first2 = max(0, params.first[2]-m_offset[2]);
310 // indices to first value in file (not accounting for reverse yet)
311 int idx0 = max(0, m_offset[0]-params.first[0]);
312 int idx1 = max(0, m_offset[1]-params.first[1]);
313 int idx2 = max(0, m_offset[2]-params.first[2]);
314 // number of values to read
315 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
316 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
317 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
318
319 // make sure we read the right block if going backwards through file
320 if (params.reverse[0])
321 idx0 = edges[dims-1]-num0-idx0;
322 if (dims>1 && params.reverse[1])
323 idx1 = edges[dims-2]-num1-idx1;
324 if (dims>2 && params.reverse[2])
325 idx2 = edges[dims-3]-num2-idx2;
326
327
328 vector<double> values(num0*num1*num2);
329 if (dims==3) {
330 var->set_cur(idx2, idx1, idx0);
331 var->get(&values[0], num2, num1, num0);
332 } else if (dims==2) {
333 var->set_cur(idx1, idx0);
334 var->get(&values[0], num1, num0);
335 } else {
336 var->set_cur(idx0);
337 var->get(&values[0], num0);
338 }
339
340 const int dpp = out.getNumDataPointsPerSample();
341 out.requireWrite();
342
343 // helpers for reversing
344 const int x0 = (params.reverse[0] ? num0-1 : 0);
345 const int x_mult = (params.reverse[0] ? -1 : 1);
346 const int y0 = (params.reverse[1] ? num1-1 : 0);
347 const int y_mult = (params.reverse[1] ? -1 : 1);
348 const int z0 = (params.reverse[2] ? num2-1 : 0);
349 const int z_mult = (params.reverse[2] ? -1 : 1);
350
351 for (index_t z=0; z<num2; z++) {
352 for (index_t y=0; y<num1; y++) {
353 #pragma omp parallel for
354 for (index_t x=0; x<num0; x++) {
355 const int baseIndex = first0+x*params.multiplier[0]
356 +(first1+y*params.multiplier[1])*myN0
357 +(first2+z*params.multiplier[2])*myN0*myN1;
358 const int srcIndex=(z0+z_mult*z)*num1*num0
359 +(y0+y_mult*y)*num0
360 +(x0+x_mult*x);
361 if (!isnan(values[srcIndex])) {
362 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
363 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
364 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
365 const int dataIndex = baseIndex+m0
366 +m1*myN0
367 +m2*myN0*myN1;
368 double* dest = out.getSampleDataRW(dataIndex);
369 for (index_t q=0; q<dpp; q++) {
370 *dest++ = values[srcIndex];
371 }
372 }
373 }
374 }
375 }
376 }
377 }
378 }
379 #else
380 throw RipleyException("readNcGrid(): not compiled with netCDF support");
381 #endif
382 }
383
384 #ifdef USE_BOOSTIO
385 void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
386 const ReaderParameters& params) const
387 {
388 // the mapping is not universally correct but should work on our
389 // supported platforms
390 switch (params.dataType) {
391 case DATATYPE_INT32:
392 readBinaryGridZippedImpl<int>(out, filename, params);
393 break;
394 case DATATYPE_FLOAT32:
395 readBinaryGridZippedImpl<float>(out, filename, params);
396 break;
397 case DATATYPE_FLOAT64:
398 readBinaryGridZippedImpl<double>(out, filename, params);
399 break;
400 default:
401 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
402 }
403 }
404 #endif
405
406 void Brick::readBinaryGrid(escript::Data& out, string filename,
407 const ReaderParameters& params) const
408 {
409 // the mapping is not universally correct but should work on our
410 // supported platforms
411 switch (params.dataType) {
412 case DATATYPE_INT32:
413 readBinaryGridImpl<int>(out, filename, params);
414 break;
415 case DATATYPE_FLOAT32:
416 readBinaryGridImpl<float>(out, filename, params);
417 break;
418 case DATATYPE_FLOAT64:
419 readBinaryGridImpl<double>(out, filename, params);
420 break;
421 default:
422 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
423 }
424 }
425
426 template<typename ValueType>
427 void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
428 const ReaderParameters& params) const
429 {
430 // check destination function space
431 int myN0, myN1, myN2;
432 if (out.getFunctionSpace().getTypeCode() == Nodes) {
433 myN0 = m_NN[0];
434 myN1 = m_NN[1];
435 myN2 = m_NN[2];
436 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
437 out.getFunctionSpace().getTypeCode() == ReducedElements) {
438 myN0 = m_NE[0];
439 myN1 = m_NE[1];
440 myN2 = m_NE[2];
441 } else
442 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
443
444 if (params.first.size() != 3)
445 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
446
447 if (params.numValues.size() != 3)
448 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
449
450 if (params.multiplier.size() != 3)
451 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
452 for (size_t i=0; i<params.multiplier.size(); i++)
453 if (params.multiplier[i]<1)
454 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
455 if (params.reverse[0] != 0 || params.reverse[1] != 0)
456 throw RipleyException("readBinaryGrid(): reversing only supported in Z-direction currently");
457
458 // check file existence and size
459 ifstream f(filename.c_str(), ifstream::binary);
460 if (f.fail()) {
461 throw RipleyException("readBinaryGrid(): cannot open file");
462 }
463 f.seekg(0, ios::end);
464 const int numComp = out.getDataPointSize();
465 const int filesize = f.tellg();
466 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
467 if (filesize < reqsize) {
468 f.close();
469 throw RipleyException("readBinaryGrid(): not enough data in file");
470 }
471
472 // check if this rank contributes anything
473 if (params.first[0] >= m_offset[0]+myN0 ||
474 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
475 params.first[1] >= m_offset[1]+myN1 ||
476 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
477 params.first[2] >= m_offset[2]+myN2 ||
478 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
479 f.close();
480 return;
481 }
482
483 // now determine how much this rank has to write
484
485 // first coordinates in data object to write to
486 const int first0 = max(0, params.first[0]-m_offset[0]);
487 const int first1 = max(0, params.first[1]-m_offset[1]);
488 const int first2 = max(0, params.first[2]-m_offset[2]);
489 // indices to first value in file (not accounting for reverse yet)
490 int idx0 = max(0, m_offset[0]-params.first[0]);
491 int idx1 = max(0, m_offset[1]-params.first[1]);
492 int idx2 = max(0, m_offset[2]-params.first[2]);
493 // number of values to read
494 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
495 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
496 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
497
498 // make sure we read the right block if going backwards through file
499 if (params.reverse[2])
500 idx2 = params.numValues[2]-idx2-1;
501
502 // helpers for reversing
503 const int z_mult = (params.reverse[2] ? -1 : 1);
504
505 out.requireWrite();
506 vector<ValueType> values(num0*numComp);
507 const int dpp = out.getNumDataPointsPerSample();
508
509 for (int z=0; z<num2; z++) {
510 for (int y=0; y<num1; y++) {
511 const int fileofs = numComp*(idx0 +
512 (idx1+y)*params.numValues[0] +
513 (idx2+z_mult*z)*params.numValues[0]*params.numValues[1]);
514 f.seekg(fileofs*sizeof(ValueType));
515 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
516
517 for (int x=0; x<num0; x++) {
518 const int baseIndex = first0+x*params.multiplier[0]
519 +(first1+y*params.multiplier[1])*myN0
520 +(first2+z*params.multiplier[2])*myN0*myN1;
521 for (int m2=0; m2<params.multiplier[2]; m2++) {
522 for (int m1=0; m1<params.multiplier[1]; m1++) {
523 for (int m0=0; m0<params.multiplier[0]; m0++) {
524 const int dataIndex = baseIndex+m0
525 +m1*myN0
526 +m2*myN0*myN1;
527 double* dest = out.getSampleDataRW(dataIndex);
528 for (int c=0; c<numComp; c++) {
529 ValueType val = values[x*numComp+c];
530
531 if (params.byteOrder != BYTEORDER_NATIVE) {
532 char* cval = reinterpret_cast<char*>(&val);
533 // this will alter val!!
534 byte_swap32(cval);
535 }
536 if (!std::isnan(val)) {
537 for (int q=0; q<dpp; q++) {
538 *dest++ = static_cast<double>(val);
539 }
540 }
541 }
542 }
543 }
544 }
545 }
546 }
547 }
548
549 f.close();
550 }
551
552 #ifdef USE_BOOSTIO
553 template<typename ValueType>
554 void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
555 const ReaderParameters& params) const
556 {
557 // check destination function space
558 int myN0, myN1, myN2;
559 if (out.getFunctionSpace().getTypeCode() == Nodes) {
560 myN0 = m_NN[0];
561 myN1 = m_NN[1];
562 myN2 = m_NN[2];
563 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
564 out.getFunctionSpace().getTypeCode() == ReducedElements) {
565 myN0 = m_NE[0];
566 myN1 = m_NE[1];
567 myN2 = m_NE[2];
568 } else
569 throw RipleyException("readBinaryGridFromZipped(): invalid function space for output data object");
570
571 if (params.first.size() != 3)
572 throw RipleyException("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
573
574 if (params.numValues.size() != 3)
575 throw RipleyException("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
576
577 if (params.multiplier.size() != 3)
578 throw RipleyException("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
579 for (size_t i=0; i<params.multiplier.size(); i++)
580 if (params.multiplier[i]<1)
581 throw RipleyException("readBinaryGridFromZipped(): all multipliers must be positive");
582
583 // check file existence and size
584 ifstream f(filename.c_str(), ifstream::binary);
585 if (f.fail()) {
586 throw RipleyException("readBinaryGridFromZipped(): cannot open file");
587 }
588 f.seekg(0, ios::end);
589 const int numComp = out.getDataPointSize();
590 int filesize = f.tellg();
591 f.seekg(0, ios::beg);
592 std::vector<char> compressed(filesize);
593 f.read((char*)&compressed[0], filesize);
594 f.close();
595 std::vector<char> decompressed = unzip(compressed);
596 filesize = decompressed.size();
597 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
598 if (filesize < reqsize) {
599 throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
600 }
601
602 // check if this rank contributes anything
603 if (params.first[0] >= m_offset[0]+myN0 ||
604 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
605 params.first[1] >= m_offset[1]+myN1 ||
606 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
607 params.first[2] >= m_offset[2]+myN2 ||
608 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
609 return;
610 }
611
612 // now determine how much this rank has to write
613
614 // first coordinates in data object to write to
615 const int first0 = max(0, params.first[0]-m_offset[0]);
616 const int first1 = max(0, params.first[1]-m_offset[1]);
617 const int first2 = max(0, params.first[2]-m_offset[2]);
618 // indices to first value in file
619 const int idx0 = max(0, m_offset[0]-params.first[0]);
620 const int idx1 = max(0, m_offset[1]-params.first[1]);
621 const int idx2 = max(0, m_offset[2]-params.first[2]);
622 // number of values to read
623 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
624 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
625 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
626
627 out.requireWrite();
628 vector<ValueType> values(num0*numComp);
629 const int dpp = out.getNumDataPointsPerSample();
630
631 for (int z=0; z<num2; z++) {
632 for (int y=0; y<num1; y++) {
633 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
634 +(idx2+z)*params.numValues[0]*params.numValues[1]);
635 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
636
637 for (int x=0; x<num0; x++) {
638 const int baseIndex = first0+x*params.multiplier[0]
639 +(first1+y*params.multiplier[1])*myN0
640 +(first2+z*params.multiplier[2])*myN0*myN1;
641 for (int m2=0; m2<params.multiplier[2]; m2++) {
642 for (int m1=0; m1<params.multiplier[1]; m1++) {
643 for (int m0=0; m0<params.multiplier[0]; m0++) {
644 const int dataIndex = baseIndex+m0
645 +m1*myN0
646 +m2*myN0*myN1;
647 double* dest = out.getSampleDataRW(dataIndex);
648 for (int c=0; c<numComp; c++) {
649 ValueType val = values[x*numComp+c];
650
651 if (params.byteOrder != BYTEORDER_NATIVE) {
652 char* cval = reinterpret_cast<char*>(&val);
653 // this will alter val!!
654 byte_swap32(cval);
655 }
656 if (!std::isnan(val)) {
657 for (int q=0; q<dpp; q++) {
658 *dest++ = static_cast<double>(val);
659 }
660 }
661 }
662 }
663 }
664 }
665 }
666 }
667 }
668 }
669 #endif
670
671 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
672 int byteOrder, int dataType) const
673 {
674 // the mapping is not universally correct but should work on our
675 // supported platforms
676 switch (dataType) {
677 case DATATYPE_INT32:
678 writeBinaryGridImpl<int>(in, filename, byteOrder);
679 break;
680 case DATATYPE_FLOAT32:
681 writeBinaryGridImpl<float>(in, filename, byteOrder);
682 break;
683 case DATATYPE_FLOAT64:
684 writeBinaryGridImpl<double>(in, filename, byteOrder);
685 break;
686 default:
687 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
688 }
689 }
690
691 template<typename ValueType>
692 void Brick::writeBinaryGridImpl(const escript::Data& in,
693 const string& filename, int byteOrder) const
694 {
695 // check function space and determine number of points
696 int myN0, myN1, myN2;
697 int totalN0, totalN1, totalN2;
698 if (in.getFunctionSpace().getTypeCode() == Nodes) {
699 myN0 = m_NN[0];
700 myN1 = m_NN[1];
701 myN2 = m_NN[2];
702 totalN0 = m_gNE[0]+1;
703 totalN1 = m_gNE[1]+1;
704 totalN2 = m_gNE[2]+1;
705 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
706 in.getFunctionSpace().getTypeCode() == ReducedElements) {
707 myN0 = m_NE[0];
708 myN1 = m_NE[1];
709 myN2 = m_NE[2];
710 totalN0 = m_gNE[0];
711 totalN1 = m_gNE[1];
712 totalN2 = m_gNE[2];
713 } else
714 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
715
716 const int numComp = in.getDataPointSize();
717 const int dpp = in.getNumDataPointsPerSample();
718 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
719
720 if (numComp > 1 || dpp > 1)
721 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
722
723 // from here on we know that each sample consists of one value
724 FileWriter fw;
725 fw.openFile(filename, fileSize);
726 MPIBarrier();
727
728 for (index_t z=0; z<myN2; z++) {
729 for (index_t y=0; y<myN1; y++) {
730 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
731 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
732 ostringstream oss;
733
734 for (index_t x=0; x<myN0; x++) {
735 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
736 ValueType fvalue = static_cast<ValueType>(*sample);
737 if (byteOrder == BYTEORDER_NATIVE) {
738 oss.write((char*)&fvalue, sizeof(fvalue));
739 } else {
740 char* value = reinterpret_cast<char*>(&fvalue);
741 oss.write(byte_swap32(value), sizeof(fvalue));
742 }
743 }
744 fw.writeAt(oss, fileofs);
745 }
746 }
747 fw.close();
748 }
749
750 void Brick::dump(const string& fileName) const
751 {
752 #if USE_SILO
753 string fn(fileName);
754 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
755 fn+=".silo";
756 }
757
758 int driver=DB_HDF5;
759 string siloPath;
760 DBfile* dbfile = NULL;
761
762 #ifdef ESYS_MPI
763 PMPIO_baton_t* baton = NULL;
764 const int NUM_SILO_FILES = 1;
765 const char* blockDirFmt = "/block%04d";
766 #endif
767
768 if (m_mpiInfo->size > 1) {
769 #ifdef ESYS_MPI
770 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
771 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
772 PMPIO_DefaultClose, (void*)&driver);
773 // try the fallback driver in case of error
774 if (!baton && driver != DB_PDB) {
775 driver = DB_PDB;
776 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
777 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
778 PMPIO_DefaultClose, (void*)&driver);
779 }
780 if (baton) {
781 char str[64];
782 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
783 siloPath = str;
784 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
785 }
786 #endif
787 } else {
788 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
789 getDescription().c_str(), driver);
790 // try the fallback driver in case of error
791 if (!dbfile && driver != DB_PDB) {
792 driver = DB_PDB;
793 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
794 getDescription().c_str(), driver);
795 }
796 }
797
798 if (!dbfile)
799 throw RipleyException("dump: Could not create Silo file");
800
801 /*
802 if (driver==DB_HDF5) {
803 // gzip level 1 already provides good compression with minimal
804 // performance penalty. Some tests showed that gzip levels >3 performed
805 // rather badly on escript data both in terms of time and space
806 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
807 }
808 */
809
810 boost::scoped_ptr<double> x(new double[m_NN[0]]);
811 boost::scoped_ptr<double> y(new double[m_NN[1]]);
812 boost::scoped_ptr<double> z(new double[m_NN[2]]);
813 double* coords[3] = { x.get(), y.get(), z.get() };
814 #pragma omp parallel
815 {
816 #pragma omp for
817 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
818 coords[0][i0]=getLocalCoordinate(i0, 0);
819 }
820 #pragma omp for
821 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
822 coords[1][i1]=getLocalCoordinate(i1, 1);
823 }
824 #pragma omp for
825 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
826 coords[2][i2]=getLocalCoordinate(i2, 2);
827 }
828 }
829 int* dims = const_cast<int*>(getNumNodesPerDim());
830
831 // write mesh
832 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
833 DB_COLLINEAR, NULL);
834
835 // write node ids
836 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
837 NULL, 0, DB_INT, DB_NODECENT, NULL);
838
839 // write element ids
840 dims = const_cast<int*>(getNumElementsPerDim());
841 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
842 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
843
844 // rank 0 writes multimesh and multivar
845 if (m_mpiInfo->rank == 0) {
846 vector<string> tempstrings;
847 vector<char*> names;
848 for (dim_t i=0; i<m_mpiInfo->size; i++) {
849 stringstream path;
850 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
851 tempstrings.push_back(path.str());
852 names.push_back((char*)tempstrings.back().c_str());
853 }
854 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
855 DBSetDir(dbfile, "/");
856 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
857 &types[0], NULL);
858 tempstrings.clear();
859 names.clear();
860 for (dim_t i=0; i<m_mpiInfo->size; i++) {
861 stringstream path;
862 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
863 tempstrings.push_back(path.str());
864 names.push_back((char*)tempstrings.back().c_str());
865 }
866 types.assign(m_mpiInfo->size, DB_QUADVAR);
867 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
868 &types[0], NULL);
869 tempstrings.clear();
870 names.clear();
871 for (dim_t i=0; i<m_mpiInfo->size; i++) {
872 stringstream path;
873 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
874 tempstrings.push_back(path.str());
875 names.push_back((char*)tempstrings.back().c_str());
876 }
877 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
878 &types[0], NULL);
879 }
880
881 if (m_mpiInfo->size > 1) {
882 #ifdef ESYS_MPI
883 PMPIO_HandOffBaton(baton, dbfile);
884 PMPIO_Finish(baton);
885 #endif
886 } else {
887 DBClose(dbfile);
888 }
889
890 #else // USE_SILO
891 throw RipleyException("dump: no Silo support");
892 #endif
893 }
894
895 const int* Brick::borrowSampleReferenceIDs(int fsType) const
896 {
897 switch (fsType) {
898 case Nodes:
899 case ReducedNodes: //FIXME: reduced
900 return &m_nodeId[0];
901 case DegreesOfFreedom:
902 case ReducedDegreesOfFreedom: //FIXME: reduced
903 return &m_dofId[0];
904 case Elements:
905 case ReducedElements:
906 return &m_elementId[0];
907 case FaceElements:
908 case ReducedFaceElements:
909 return &m_faceId[0];
910 case Points:
911 return &m_diracPointNodeIDs[0];
912 default:
913 break;
914 }
915
916 stringstream msg;
917 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
918 throw RipleyException(msg.str());
919 }
920
921 bool Brick::ownSample(int fsType, index_t id) const
922 {
923 if (getMPISize()==1)
924 return true;
925
926 switch (fsType) {
927 case Nodes:
928 case ReducedNodes: //FIXME: reduced
929 return (m_dofMap[id] < getNumDOF());
930 case DegreesOfFreedom:
931 case ReducedDegreesOfFreedom:
932 return true;
933 case Elements:
934 case ReducedElements:
935 {
936 // check ownership of element's _last_ node
937 const index_t x=id%m_NE[0] + 1;
938 const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
939 const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
940 return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
941 }
942 case FaceElements:
943 case ReducedFaceElements:
944 {
945 // check ownership of face element's last node
946 dim_t n=0;
947 for (size_t i=0; i<6; i++) {
948 n+=m_faceCount[i];
949 if (id<n) {
950 const index_t j=id-n+m_faceCount[i];
951 if (i>=4) { // front or back
952 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
953 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
954 } else if (i>=2) { // bottom or top
955 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
956 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
957 } else { // left or right
958 const index_t first=(i==0 ? 0 : m_NN[0]-1);
959 return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
960 }
961 }
962 }
963 return false;
964 }
965 default:
966 break;
967 }
968
969 stringstream msg;
970 msg << "ownSample: invalid function space type " << fsType;
971 throw RipleyException(msg.str());
972 }
973
974 void Brick::setToNormal(escript::Data& out) const
975 {
976 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
977 out.requireWrite();
978 #pragma omp parallel
979 {
980 if (m_faceOffset[0] > -1) {
981 #pragma omp for nowait
982 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
983 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
984 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
985 // set vector at four quadrature points
986 *o++ = -1.; *o++ = 0.; *o++ = 0.;
987 *o++ = -1.; *o++ = 0.; *o++ = 0.;
988 *o++ = -1.; *o++ = 0.; *o++ = 0.;
989 *o++ = -1.; *o++ = 0.; *o = 0.;
990 }
991 }
992 }
993
994 if (m_faceOffset[1] > -1) {
995 #pragma omp for nowait
996 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
997 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
998 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
999 // set vector at four quadrature points
1000 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1001 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1002 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1003 *o++ = 1.; *o++ = 0.; *o = 0.;
1004 }
1005 }
1006 }
1007
1008 if (m_faceOffset[2] > -1) {
1009 #pragma omp for nowait
1010 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1011 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1012 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1013 // set vector at four quadrature points
1014 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1015 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1016 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1017 *o++ = 0.; *o++ = -1.; *o = 0.;
1018 }
1019 }
1020 }
1021
1022 if (m_faceOffset[3] > -1) {
1023 #pragma omp for nowait
1024 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1025 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1026 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1027 // set vector at four quadrature points
1028 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1029 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1030 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1031 *o++ = 0.; *o++ = 1.; *o = 0.;
1032 }
1033 }
1034 }
1035
1036 if (m_faceOffset[4] > -1) {
1037 #pragma omp for nowait
1038 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1039 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1040 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1041 // set vector at four quadrature points
1042 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1043 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1044 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1045 *o++ = 0.; *o++ = 0.; *o = -1.;
1046 }
1047 }
1048 }
1049
1050 if (m_faceOffset[5] > -1) {
1051 #pragma omp for nowait
1052 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1053 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1054 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1055 // set vector at four quadrature points
1056 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1057 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1058 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1059 *o++ = 0.; *o++ = 0.; *o = 1.;
1060 }
1061 }
1062 }
1063 } // end of parallel section
1064 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1065 out.requireWrite();
1066 #pragma omp parallel
1067 {
1068 if (m_faceOffset[0] > -1) {
1069 #pragma omp for nowait
1070 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1071 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1072 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1073 *o++ = -1.;
1074 *o++ = 0.;
1075 *o = 0.;
1076 }
1077 }
1078 }
1079
1080 if (m_faceOffset[1] > -1) {
1081 #pragma omp for nowait
1082 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1083 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1084 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1085 *o++ = 1.;
1086 *o++ = 0.;
1087 *o = 0.;
1088 }
1089 }
1090 }
1091
1092 if (m_faceOffset[2] > -1) {
1093 #pragma omp for nowait
1094 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1095 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1096 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1097 *o++ = 0.;
1098 *o++ = -1.;
1099 *o = 0.;
1100 }
1101 }
1102 }
1103
1104 if (m_faceOffset[3] > -1) {
1105 #pragma omp for nowait
1106 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1107 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1108 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1109 *o++ = 0.;
1110 *o++ = 1.;
1111 *o = 0.;
1112 }
1113 }
1114 }
1115
1116 if (m_faceOffset[4] > -1) {
1117 #pragma omp for nowait
1118 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1119 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1120 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1121 *o++ = 0.;
1122 *o++ = 0.;
1123 *o = -1.;
1124 }
1125 }
1126 }
1127
1128 if (m_faceOffset[5] > -1) {
1129 #pragma omp for nowait
1130 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1131 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1132 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1133 *o++ = 0.;
1134 *o++ = 0.;
1135 *o = 1.;
1136 }
1137 }
1138 }
1139 } // end of parallel section
1140
1141 } else {
1142 stringstream msg;
1143 msg << "setToNormal: invalid function space type "
1144 << out.getFunctionSpace().getTypeCode();
1145 throw RipleyException(msg.str());
1146 }
1147 }
1148
1149 void Brick::setToSize(escript::Data& out) const
1150 {
1151 if (out.getFunctionSpace().getTypeCode() == Elements
1152 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1153 out.requireWrite();
1154 const dim_t numQuad=out.getNumDataPointsPerSample();
1155 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1156 #pragma omp parallel for
1157 for (index_t k = 0; k < getNumElements(); ++k) {
1158 double* o = out.getSampleDataRW(k);
1159 fill(o, o+numQuad, size);
1160 }
1161 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1162 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1163 out.requireWrite();
1164 const dim_t numQuad=out.getNumDataPointsPerSample();
1165 #pragma omp parallel
1166 {
1167 if (m_faceOffset[0] > -1) {
1168 const double size=min(m_dx[1],m_dx[2]);
1169 #pragma omp for nowait
1170 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1171 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1172 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1173 fill(o, o+numQuad, size);
1174 }
1175 }
1176 }
1177
1178 if (m_faceOffset[1] > -1) {
1179 const double size=min(m_dx[1],m_dx[2]);
1180 #pragma omp for nowait
1181 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1182 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1183 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1184 fill(o, o+numQuad, size);
1185 }
1186 }
1187 }
1188
1189 if (m_faceOffset[2] > -1) {
1190 const double size=min(m_dx[0],m_dx[2]);
1191 #pragma omp for nowait
1192 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1193 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1194 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1195 fill(o, o+numQuad, size);
1196 }
1197 }
1198 }
1199
1200 if (m_faceOffset[3] > -1) {
1201 const double size=min(m_dx[0],m_dx[2]);
1202 #pragma omp for nowait
1203 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1204 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1205 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1206 fill(o, o+numQuad, size);
1207 }
1208 }
1209 }
1210
1211 if (m_faceOffset[4] > -1) {
1212 const double size=min(m_dx[0],m_dx[1]);
1213 #pragma omp for nowait
1214 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1215 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1216 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1217 fill(o, o+numQuad, size);
1218 }
1219 }
1220 }
1221
1222 if (m_faceOffset[5] > -1) {
1223 const double size=min(m_dx[0],m_dx[1]);
1224 #pragma omp for nowait
1225 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1226 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1227 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1228 fill(o, o+numQuad, size);
1229 }
1230 }
1231 }
1232 } // end of parallel section
1233
1234 } else {
1235 stringstream msg;
1236 msg << "setToSize: invalid function space type "
1237 << out.getFunctionSpace().getTypeCode();
1238 throw RipleyException(msg.str());
1239 }
1240 }
1241
1242 void Brick::Print_Mesh_Info(const bool full) const
1243 {
1244 RipleyDomain::Print_Mesh_Info(full);
1245 if (full) {
1246 cout << " Id Coordinates" << endl;
1247 cout.precision(15);
1248 cout.setf(ios::scientific, ios::floatfield);
1249 for (index_t i=0; i < getNumNodes(); i++) {
1250 cout << " " << setw(5) << m_nodeId[i]
1251 << " " << getLocalCoordinate(i%m_NN[0], 0)
1252 << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1253 << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1254 }
1255 }
1256 }
1257
1258
1259 //protected
1260 void Brick::assembleCoordinates(escript::Data& arg) const
1261 {
1262 escriptDataC x = arg.getDataC();
1263 int numDim = m_numDim;
1264 if (!isDataPointShapeEqual(&x, 1, &numDim))
1265 throw RipleyException("setToX: Invalid Data object shape");
1266 if (!numSamplesEqual(&x, 1, getNumNodes()))
1267 throw RipleyException("setToX: Illegal number of samples in Data object");
1268
1269 arg.requireWrite();
1270 #pragma omp parallel for
1271 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1272 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1273 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1274 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1275 point[0] = getLocalCoordinate(i0, 0);
1276 point[1] = getLocalCoordinate(i1, 1);
1277 point[2] = getLocalCoordinate(i2, 2);
1278 }
1279 }
1280 }
1281 }
1282
1283 //protected
1284 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1285 {
1286 const dim_t numComp = in.getDataPointSize();
1287 const double C0 = .044658198738520451079;
1288 const double C1 = .16666666666666666667;
1289 const double C2 = .21132486540518711775;
1290 const double C3 = .25;
1291 const double C4 = .5;
1292 const double C5 = .62200846792814621559;
1293 const double C6 = .78867513459481288225;
1294
1295 if (out.getFunctionSpace().getTypeCode() == Elements) {
1296 out.requireWrite();
1297 #pragma omp parallel
1298 {
1299 vector<double> f_000(numComp);
1300 vector<double> f_001(numComp);
1301 vector<double> f_010(numComp);
1302 vector<double> f_011(numComp);
1303 vector<double> f_100(numComp);
1304 vector<double> f_101(numComp);
1305 vector<double> f_110(numComp);
1306 vector<double> f_111(numComp);
1307 #pragma omp for
1308 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1309 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1310 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1311 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1312 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1313 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1314 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1315 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1316 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1317 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1318 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1319 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1320 for (index_t i=0; i < numComp; ++i) {
1321 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1322 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1323 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1324 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1325 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1326 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1327 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1328 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1329 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1330 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1331 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1332 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1333 o[INDEX3(i,0,0,numComp,3)] = V0;
1334 o[INDEX3(i,1,0,numComp,3)] = V4;
1335 o[INDEX3(i,2,0,numComp,3)] = V8;
1336 o[INDEX3(i,0,1,numComp,3)] = V0;
1337 o[INDEX3(i,1,1,numComp,3)] = V5;
1338 o[INDEX3(i,2,1,numComp,3)] = V9;
1339 o[INDEX3(i,0,2,numComp,3)] = V1;
1340 o[INDEX3(i,1,2,numComp,3)] = V4;
1341 o[INDEX3(i,2,2,numComp,3)] = V10;
1342 o[INDEX3(i,0,3,numComp,3)] = V1;
1343 o[INDEX3(i,1,3,numComp,3)] = V5;
1344 o[INDEX3(i,2,3,numComp,3)] = V11;
1345 o[INDEX3(i,0,4,numComp,3)] = V2;
1346 o[INDEX3(i,1,4,numComp,3)] = V6;
1347 o[INDEX3(i,2,4,numComp,3)] = V8;
1348 o[INDEX3(i,0,5,numComp,3)] = V2;
1349 o[INDEX3(i,1,5,numComp,3)] = V7;
1350 o[INDEX3(i,2,5,numComp,3)] = V9;
1351 o[INDEX3(i,0,6,numComp,3)] = V3;
1352 o[INDEX3(i,1,6,numComp,3)] = V6;
1353 o[INDEX3(i,2,6,numComp,3)] = V10;
1354 o[INDEX3(i,0,7,numComp,3)] = V3;
1355 o[INDEX3(i,1,7,numComp,3)] = V7;
1356 o[INDEX3(i,2,7,numComp,3)] = V11;
1357 } // end of component loop i
1358 } // end of k0 loop
1359 } // end of k1 loop
1360 } // end of k2 loop
1361 } // end of parallel section
1362 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1363 out.requireWrite();
1364 #pragma omp parallel
1365 {
1366 vector<double> f_000(numComp);
1367 vector<double> f_001(numComp);
1368 vector<double> f_010(numComp);
1369 vector<double> f_011(numComp);
1370 vector<double> f_100(numComp);
1371 vector<double> f_101(numComp);
1372 vector<double> f_110(numComp);
1373 vector<double> f_111(numComp);
1374 #pragma omp for
1375 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1376 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1377 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1378 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1379 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1380 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1381 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1382 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1383 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1384 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1385 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1386 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1387 for (index_t i=0; i < numComp; ++i) {
1388 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1389 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1390 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1391 } // end of component loop i
1392 } // end of k0 loop
1393 } // end of k1 loop
1394 } // end of k2 loop
1395 } // end of parallel section
1396 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1397 out.requireWrite();
1398 #pragma omp parallel
1399 {
1400 vector<double> f_000(numComp);
1401 vector<double> f_001(numComp);
1402 vector<double> f_010(numComp);
1403 vector<double> f_011(numComp);
1404 vector<double> f_100(numComp);
1405 vector<double> f_101(numComp);
1406 vector<double> f_110(numComp);
1407 vector<double> f_111(numComp);
1408 if (m_faceOffset[0] > -1) {
1409 #pragma omp for nowait
1410 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1411 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1412 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1413 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1414 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1415 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1416 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1417 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1418 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1419 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1420 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1421 for (index_t i=0; i < numComp; ++i) {
1422 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1423 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1424 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1425 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1426 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1427 o[INDEX3(i,1,0,numComp,3)] = V0;
1428 o[INDEX3(i,2,0,numComp,3)] = V2;
1429 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1430 o[INDEX3(i,1,1,numComp,3)] = V0;
1431 o[INDEX3(i,2,1,numComp,3)] = V3;
1432 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1433 o[INDEX3(i,1,2,numComp,3)] = V1;
1434 o[INDEX3(i,2,2,numComp,3)] = V2;
1435 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1436 o[INDEX3(i,1,3,numComp,3)] = V1;
1437 o[INDEX3(i,2,3,numComp,3)] = V3;
1438 } // end of component loop i
1439 } // end of k1 loop
1440 } // end of k2 loop
1441 } // end of face 0
1442 if (m_faceOffset[1] > -1) {
1443 #pragma omp for nowait
1444 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1445 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1446 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1447 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1448 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1449 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1450 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1451 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1452 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1453 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1454 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1455 for (index_t i=0; i < numComp; ++i) {
1456 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1457 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1458 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1459 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1460 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1461 o[INDEX3(i,1,0,numComp,3)] = V0;
1462 o[INDEX3(i,2,0,numComp,3)] = V2;
1463 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1464 o[INDEX3(i,1,1,numComp,3)] = V0;
1465 o[INDEX3(i,2,1,numComp,3)] = V3;
1466 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1467 o[INDEX3(i,1,2,numComp,3)] = V1;
1468 o[INDEX3(i,2,2,numComp,3)] = V2;
1469 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1470 o[INDEX3(i,1,3,numComp,3)] = V1;
1471 o[INDEX3(i,2,3,numComp,3)] = V3;
1472 } // end of component loop i
1473 } // end of k1 loop
1474 } // end of k2 loop
1475 } // end of face 1
1476 if (m_faceOffset[2] > -1) {
1477 #pragma omp for nowait
1478 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1479 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1480 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1481 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1482 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1483 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1484 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1485 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1486 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1487 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1488 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1489 for (index_t i=0; i < numComp; ++i) {
1490 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1491 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1492 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1493 o[INDEX3(i,0,0,numComp,3)] = V0;
1494 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1495 o[INDEX3(i,2,0,numComp,3)] = V1;
1496 o[INDEX3(i,0,1,numComp,3)] = V0;
1497 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1498 o[INDEX3(i,2,1,numComp,3)] = V2;
1499 o[INDEX3(i,0,2,numComp,3)] = V0;
1500 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1501 o[INDEX3(i,2,2,numComp,3)] = V1;
1502 o[INDEX3(i,0,3,numComp,3)] = V0;
1503 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1504 o[INDEX3(i,2,3,numComp,3)] = V2;
1505 } // end of component loop i
1506 } // end of k0 loop
1507 } // end of k2 loop
1508 } // end of face 2
1509 if (m_faceOffset[3] > -1) {
1510 #pragma omp for nowait
1511 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1512 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1513 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1514 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1515 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1516 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1517 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1518 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1519 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1520 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1521 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1522 for (index_t i=0; i < numComp; ++i) {
1523 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1524 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1525 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1526 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1527 o[INDEX3(i,0,0,numComp,3)] = V0;
1528 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1529 o[INDEX3(i,2,0,numComp,3)] = V2;
1530 o[INDEX3(i,0,1,numComp,3)] = V0;
1531 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1532 o[INDEX3(i,2,1,numComp,3)] = V3;
1533 o[INDEX3(i,0,2,numComp,3)] = V1;
1534 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1535 o[INDEX3(i,2,2,numComp,3)] = V2;
1536 o[INDEX3(i,0,3,numComp,3)] = V1;
1537 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1538 o[INDEX3(i,2,3,numComp,3)] = V3;
1539 } // end of component loop i
1540 } // end of k0 loop
1541 } // end of k2 loop
1542 } // end of face 3
1543 if (m_faceOffset[4] > -1) {
1544 #pragma omp for nowait
1545 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1546 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1547 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1548 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1549 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1550 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1551 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1552 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1553 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1554 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1555 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1556 for (index_t i=0; i < numComp; ++i) {
1557 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1558 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1559 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1560 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1561 o[INDEX3(i,0,0,numComp,3)] = V0;
1562 o[INDEX3(i,1,0,numComp,3)] = V2;
1563 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1564 o[INDEX3(i,0,1,numComp,3)] = V0;
1565 o[INDEX3(i,1,1,numComp,3)] = V3;
1566 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1567 o[INDEX3(i,0,2,numComp,3)] = V1;
1568 o[INDEX3(i,1,2,numComp,3)] = V2;
1569 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1570 o[INDEX3(i,0,3,numComp,3)] = V1;
1571 o[INDEX3(i,1,3,numComp,3)] = V3;
1572 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1573 } // end of component loop i
1574 } // end of k0 loop
1575 } // end of k1 loop
1576 } // end of face 4
1577 if (m_faceOffset[5] > -1) {
1578 #pragma omp for nowait
1579 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1580 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1581 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1582 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1583 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1584 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1585 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1586 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1587 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1588 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1589 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1590 for (index_t i=0; i < numComp; ++i) {
1591 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1592 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1593 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1594 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1595 o[INDEX3(i,0,0,numComp,3)] = V0;
1596 o[INDEX3(i,1,0,numComp,3)] = V2;
1597 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1598 o[INDEX3(i,0,1,numComp,3)] = V0;
1599 o[INDEX3(i,1,1,numComp,3)] = V3;
1600 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1601 o[INDEX3(i,0,2,numComp,3)] = V1;
1602 o[INDEX3(i,1,2,numComp,3)] = V2;
1603 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1604 o[INDEX3(i,0,3,numComp,3)] = V1;
1605 o[INDEX3(i,1,3,numComp,3)] = V3;
1606 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1607 } // end of component loop i
1608 } // end of k0 loop
1609 } // end of k1 loop
1610 } // end of face 5
1611 } // end of parallel section
1612 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1613 out.requireWrite();
1614 #pragma omp parallel
1615 {
1616 vector<double> f_000(numComp);
1617 vector<double> f_001(numComp);
1618 vector<double> f_010(numComp);
1619 vector<double> f_011(numComp);
1620 vector<double> f_100(numComp);
1621 vector<double> f_101(numComp);
1622 vector<double> f_110(numComp);
1623 vector<double> f_111(numComp);
1624 if (m_faceOffset[0] > -1) {
1625 #pragma omp for nowait
1626 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1627 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1628 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1629 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1630 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1631 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1632 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1633 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1634 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1635 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1636 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1637 for (index_t i=0; i < numComp; ++i) {
1638 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1639 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1640 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1641 } // end of component loop i
1642 } // end of k1 loop
1643 } // end of k2 loop
1644 } // end of face 0
1645 if (m_faceOffset[1] > -1) {
1646 #pragma omp for nowait
1647 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1648 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1649 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1650 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1651 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1652 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1653 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1654 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1655 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1656 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1657 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1658 for (index_t i=0; i < numComp; ++i) {
1659 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1660 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1661 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1662 } // end of component loop i
1663 } // end of k1 loop
1664 } // end of k2 loop
1665 } // end of face 1
1666 if (m_faceOffset[2] > -1) {
1667 #pragma omp for nowait
1668 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1669 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1670 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1671 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1672 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1673 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1674 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1675 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1676 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1677 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1678 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1679 for (index_t i=0; i < numComp; ++i) {
1680 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / m_dx[0];
1681 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1682 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / m_dx[2];
1683 } // end of component loop i
1684 } // end of k0 loop
1685 } // end of k2 loop
1686 } // end of face 2
1687 if (m_faceOffset[3] > -1) {
1688 #pragma omp for nowait
1689 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1690 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1691 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1692 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1693 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1694 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1695 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1696 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1697 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1698 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1699 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1700 for (index_t i=0; i < numComp; ++i) {
1701 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / m_dx[0];
1702 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1703 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / m_dx[2];
1704 } // end of component loop i
1705 } // end of k0 loop
1706 } // end of k2 loop
1707 } // end of face 3
1708 if (m_faceOffset[4] > -1) {
1709 #pragma omp for nowait
1710 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1711 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1712 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1713 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1714 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1715 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1716 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1717 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1718 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1719 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1720 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1721 for (index_t i=0; i < numComp; ++i) {
1722 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / m_dx[0];
1723 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / m_dx[1];
1724 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1725 } // end of component loop i
1726 } // end of k0 loop
1727 } // end of k1 loop
1728 } // end of face 4
1729 if (m_faceOffset[5] > -1) {
1730 #pragma omp for nowait
1731 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1732 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1733 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1734 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1735 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1736 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1737 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1738 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1739 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1740 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1741 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1742 for (index_t i=0; i < numComp; ++i) {
1743 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / m_dx[0];
1744 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / m_dx[1];
1745 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1746 } // end of component loop i
1747 } // end of k0 loop
1748 } // end of k1 loop
1749 } // end of face 5
1750 } // end of parallel section
1751 }
1752 }
1753
1754 //protected
1755 void Brick::assembleIntegrate(vector<double>& integrals, const escript::Data& arg) const
1756 {
1757 const dim_t numComp = arg.getDataPointSize();
1758 const index_t left = (m_offset[0]==0 ? 0 : 1);
1759 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1760 const index_t front = (m_offset[2]==0 ? 0 : 1);
1761 const int fs = arg.getFunctionSpace().getTypeCode();
1762 if (fs == Elements && arg.actsExpanded()) {
1763 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2]/8.;
1764 #pragma omp parallel
1765 {
1766 vector<double> int_local(numComp, 0);
1767 #pragma omp for nowait
1768 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1769 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1770 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1771 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1772 for (index_t i=0; i < numComp; ++i) {
1773 const double f_0 = f[INDEX2(i,0,numComp)];
1774 const double f_1 = f[INDEX2(i,1,numComp)];
1775 const double f_2 = f[INDEX2(i,2,numComp)];
1776 const double f_3 = f[INDEX2(i,3,numComp)];
1777 const double f_4 = f[INDEX2(i,4,numComp)];
1778 const double f_5 = f[INDEX2(i,5,numComp)];
1779 const double f_6 = f[INDEX2(i,6,numComp)];
1780 const double f_7 = f[INDEX2(i,7,numComp)];
1781 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
1782 } // end of component loop i
1783 } // end of k0 loop
1784 } // end of k1 loop
1785 } // end of k2 loop
1786
1787 #pragma omp critical
1788 for (index_t i=0; i<numComp; i++)
1789 integrals[i]+=int_local[i];
1790 } // end of parallel section
1791
1792 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1793 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2];
1794 #pragma omp parallel
1795 {
1796 vector<double> int_local(numComp, 0);
1797 #pragma omp for nowait
1798 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1799 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1800 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1801 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1802 for (index_t i=0; i < numComp; ++i) {
1803 int_local[i]+=f[i]*w_0;
1804 } // end of component loop i
1805 } // end of k0 loop
1806 } // end of k1 loop
1807 } // end of k2 loop
1808
1809 #pragma omp critical
1810 for (index_t i=0; i<numComp; i++)
1811 integrals[i]+=int_local[i];
1812 } // end of parallel section
1813
1814 } else if (fs == FaceElements && arg.actsExpanded()) {
1815 const double w_0 = m_dx[1]*m_dx[2]/4.;
1816 const double w_1 = m_dx[0]*m_dx[2]/4.;
1817 const double w_2 = m_dx[0]*m_dx[1]/4.;
1818 #pragma omp parallel
1819 {
1820 vector<double> int_local(numComp, 0);
1821 if (m_faceOffset[0] > -1) {
1822 #pragma omp for nowait
1823 for (index_t k2 = fro