/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4941 - (show annotations)
Thu May 15 01:49:48 2014 UTC (4 years, 11 months ago) by caltinay
File size: 164926 byte(s)
first proof of concept - self-contained ripley system matrix with diagonal
storage, CG solver, no preconditioner, no MPI, no blocks, quite a few
quick'n'dirty hacks.
Solves poisson faster than paso :-)


1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <limits>
18
19 #include <ripley/Brick.h>
20 #include <esysUtils/esysFileWriter.h>
21 #include <ripley/DefaultAssembler3D.h>
22 #include <ripley/WaveAssembler3D.h>
23 #include <ripley/LameAssembler3D.h>
24 #include <ripley/domainhelpers.h>
25 #include <boost/scoped_array.hpp>
26
27 #ifdef USE_NETCDF
28 #include <netcdfcpp.h>
29 #endif
30
31 #if USE_SILO
32 #include <silo.h>
33 #ifdef ESYS_MPI
34 #include <pmpio.h>
35 #endif
36 #endif
37
38 #include <iomanip>
39
40 #include "esysUtils/EsysRandom.h"
41 #include "blocktools.h"
42
43
44 using namespace std;
45 using esysUtils::FileWriter;
46
47 namespace ripley {
48
49 int indexOfMax(int a, int b, int c) {
50 if (a > b) {
51 if (c > a) {
52 return 2;
53 }
54 return 0;
55 } else if (b > c) {
56 return 1;
57 }
58 return 2;
59 }
60
61 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
62 double x1, double y1, double z1, int d0, int d1, int d2,
63 const std::vector<double>& points, const std::vector<int>& tags,
64 const simap_t& tagnamestonums,
65 escript::SubWorld_ptr w) :
66 RipleyDomain(3, w)
67 {
68 if (static_cast<long>(n0 + 1) * static_cast<long>(n1 + 1)
69 * static_cast<long>(n2 + 1) > std::numeric_limits<int>::max())
70 throw RipleyException("The number of elements has overflowed, this "
71 "limit may be raised in future releases.");
72
73 if (n0 <= 0 || n1 <= 0 || n2 <= 0)
74 throw RipleyException("Number of elements in each spatial dimension "
75 "must be positive");
76
77 // ignore subdivision parameters for serial run
78 if (m_mpiInfo->size == 1) {
79 d0=1;
80 d1=1;
81 d2=1;
82 }
83 bool warn=false;
84
85 std::vector<int> factors;
86 int ranks = m_mpiInfo->size;
87 int epr[3] = {n0,n1,n2};
88 int d[3] = {d0,d1,d2};
89 if (d0<=0 || d1<=0 || d2<=0) {
90 for (int i = 0; i < 3; i++) {
91 if (d[i] < 1) {
92 d[i] = 1;
93 continue;
94 }
95 epr[i] = -1; // can no longer be max
96 if (ranks % d[i] != 0) {
97 throw RipleyException("Invalid number of spatial subdivisions");
98 }
99 //remove
100 ranks /= d[i];
101 }
102 factorise(factors, ranks);
103 if (factors.size() != 0) {
104 warn = true;
105 }
106 }
107 while (factors.size() > 0) {
108 int i = indexOfMax(epr[0],epr[1],epr[2]);
109 int f = factors.back();
110 factors.pop_back();
111 d[i] *= f;
112 epr[i] /= f;
113 }
114 d0 = d[0]; d1 = d[1]; d2 = d[2];
115
116 // ensure number of subdivisions is valid and nodes can be distributed
117 // among number of ranks
118 if (d0*d1*d2 != m_mpiInfo->size){
119 throw RipleyException("Invalid number of spatial subdivisions");
120 }
121 if (warn) {
122 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
123 << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
124 }
125
126 double l0 = x1-x0;
127 double l1 = y1-y0;
128 double l2 = z1-z0;
129 m_dx[0] = l0/n0;
130 m_dx[1] = l1/n1;
131 m_dx[2] = l2/n2;
132
133 if ((n0+1)%d0 > 0) {
134 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
135 l0=m_dx[0]*n0;
136 cout << "Warning: Adjusted number of elements and length. N0="
137 << n0 << ", l0=" << l0 << endl;
138 }
139 if ((n1+1)%d1 > 0) {
140 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
141 l1=m_dx[1]*n1;
142 cout << "Warning: Adjusted number of elements and length. N1="
143 << n1 << ", l1=" << l1 << endl;
144 }
145 if ((n2+1)%d2 > 0) {
146 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
147 l2=m_dx[2]*n2;
148 cout << "Warning: Adjusted number of elements and length. N2="
149 << n2 << ", l2=" << l2 << endl;
150 }
151
152 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
153 throw RipleyException("Too few elements for the number of ranks");
154
155 m_gNE[0] = n0;
156 m_gNE[1] = n1;
157 m_gNE[2] = n2;
158 m_origin[0] = x0;
159 m_origin[1] = y0;
160 m_origin[2] = z0;
161 m_length[0] = l0;
162 m_length[1] = l1;
163 m_length[2] = l2;
164 m_NX[0] = d0;
165 m_NX[1] = d1;
166 m_NX[2] = d2;
167
168 // local number of elements (including overlap)
169 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
170 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
171 m_NE[0]++;
172 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
173 m_ownNE[0]--;
174
175 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
176 if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
177 m_NE[1]++;
178 else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
179 m_ownNE[1]--;
180
181 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
182 if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
183 m_NE[2]++;
184 else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
185 m_ownNE[2]--;
186
187 // local number of nodes
188 m_NN[0] = m_NE[0]+1;
189 m_NN[1] = m_NE[1]+1;
190 m_NN[2] = m_NE[2]+1;
191
192 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
193 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
194 if (m_offset[0] > 0)
195 m_offset[0]--;
196 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
197 if (m_offset[1] > 0)
198 m_offset[1]--;
199 m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
200 if (m_offset[2] > 0)
201 m_offset[2]--;
202
203 populateSampleIds();
204 createPattern();
205
206 assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
207 for (map<string, int>::const_iterator i = tagnamestonums.begin();
208 i != tagnamestonums.end(); i++) {
209 setTagMap(i->first, i->second);
210 }
211 addPoints(tags.size(), &points[0], &tags[0]);
212 }
213
214
215 Brick::~Brick()
216 {
217 delete assembler;
218 }
219
220 string Brick::getDescription() const
221 {
222 return "ripley::Brick";
223 }
224
225 bool Brick::operator==(const AbstractDomain& other) const
226 {
227 const Brick* o=dynamic_cast<const Brick*>(&other);
228 if (o) {
229 return (RipleyDomain::operator==(other) &&
230 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
231 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
232 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
233 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
234 }
235
236 return false;
237 }
238
239 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
240 const ReaderParameters& params) const
241 {
242 #ifdef USE_NETCDF
243 // check destination function space
244 int myN0, myN1, myN2;
245 if (out.getFunctionSpace().getTypeCode() == Nodes) {
246 myN0 = m_NN[0];
247 myN1 = m_NN[1];
248 myN2 = m_NN[2];
249 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
250 out.getFunctionSpace().getTypeCode() == ReducedElements) {
251 myN0 = m_NE[0];
252 myN1 = m_NE[1];
253 myN2 = m_NE[2];
254 } else
255 throw RipleyException("readNcGrid(): invalid function space for output data object");
256
257 if (params.first.size() != 3)
258 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
259
260 if (params.numValues.size() != 3)
261 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
262
263 if (params.multiplier.size() != 3)
264 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
265 for (size_t i=0; i<params.multiplier.size(); i++)
266 if (params.multiplier[i]<1)
267 throw RipleyException("readNcGrid(): all multipliers must be positive");
268
269 // check file existence and size
270 NcFile f(filename.c_str(), NcFile::ReadOnly);
271 if (!f.is_valid())
272 throw RipleyException("readNcGrid(): cannot open file");
273
274 NcVar* var = f.get_var(varname.c_str());
275 if (!var)
276 throw RipleyException("readNcGrid(): invalid variable name");
277
278 // TODO: rank>0 data support
279 const int numComp = out.getDataPointSize();
280 if (numComp > 1)
281 throw RipleyException("readNcGrid(): only scalar data supported");
282
283 const int dims = var->num_dims();
284 boost::scoped_array<long> edges(var->edges());
285
286 // is this a slice of the data object (dims!=3)?
287 // note the expected ordering of edges (as in numpy: z,y,x)
288 if ( (dims==3 && (params.numValues[2] > edges[0] ||
289 params.numValues[1] > edges[1] ||
290 params.numValues[0] > edges[2]))
291 || (dims==2 && params.numValues[2]>1)
292 || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
293 throw RipleyException("readNcGrid(): not enough data in file");
294 }
295
296 // check if this rank contributes anything
297 if (params.first[0] >= m_offset[0]+myN0 ||
298 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
299 params.first[1] >= m_offset[1]+myN1 ||
300 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
301 params.first[2] >= m_offset[2]+myN2 ||
302 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
303 return;
304 }
305
306 // now determine how much this rank has to write
307
308 // first coordinates in data object to write to
309 const int first0 = max(0, params.first[0]-m_offset[0]);
310 const int first1 = max(0, params.first[1]-m_offset[1]);
311 const int first2 = max(0, params.first[2]-m_offset[2]);
312 // indices to first value in file (not accounting for reverse yet)
313 int idx0 = max(0, m_offset[0]-params.first[0]);
314 int idx1 = max(0, m_offset[1]-params.first[1]);
315 int idx2 = max(0, m_offset[2]-params.first[2]);
316 // number of values to read
317 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
318 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
319 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
320
321 // make sure we read the right block if going backwards through file
322 if (params.reverse[0])
323 idx0 = edges[dims-1]-num0-idx0;
324 if (dims>1 && params.reverse[1])
325 idx1 = edges[dims-2]-num1-idx1;
326 if (dims>2 && params.reverse[2])
327 idx2 = edges[dims-3]-num2-idx2;
328
329
330 vector<double> values(num0*num1*num2);
331 if (dims==3) {
332 var->set_cur(idx2, idx1, idx0);
333 var->get(&values[0], num2, num1, num0);
334 } else if (dims==2) {
335 var->set_cur(idx1, idx0);
336 var->get(&values[0], num1, num0);
337 } else {
338 var->set_cur(idx0);
339 var->get(&values[0], num0);
340 }
341
342 const int dpp = out.getNumDataPointsPerSample();
343 out.requireWrite();
344
345 // helpers for reversing
346 const int x0 = (params.reverse[0] ? num0-1 : 0);
347 const int x_mult = (params.reverse[0] ? -1 : 1);
348 const int y0 = (params.reverse[1] ? num1-1 : 0);
349 const int y_mult = (params.reverse[1] ? -1 : 1);
350 const int z0 = (params.reverse[2] ? num2-1 : 0);
351 const int z_mult = (params.reverse[2] ? -1 : 1);
352
353 for (index_t z=0; z<num2; z++) {
354 for (index_t y=0; y<num1; y++) {
355 #pragma omp parallel for
356 for (index_t x=0; x<num0; x++) {
357 const int baseIndex = first0+x*params.multiplier[0]
358 +(first1+y*params.multiplier[1])*myN0
359 +(first2+z*params.multiplier[2])*myN0*myN1;
360 const int srcIndex=(z0+z_mult*z)*num1*num0
361 +(y0+y_mult*y)*num0
362 +(x0+x_mult*x);
363 if (!isnan(values[srcIndex])) {
364 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
365 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
366 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
367 const int dataIndex = baseIndex+m0
368 +m1*myN0
369 +m2*myN0*myN1;
370 double* dest = out.getSampleDataRW(dataIndex);
371 for (index_t q=0; q<dpp; q++) {
372 *dest++ = values[srcIndex];
373 }
374 }
375 }
376 }
377 }
378 }
379 }
380 }
381 #else
382 throw RipleyException("readNcGrid(): not compiled with netCDF support");
383 #endif
384 }
385
386 #ifdef USE_BOOSTIO
387 void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
388 const ReaderParameters& params) const
389 {
390 // the mapping is not universally correct but should work on our
391 // supported platforms
392 switch (params.dataType) {
393 case DATATYPE_INT32:
394 readBinaryGridZippedImpl<int>(out, filename, params);
395 break;
396 case DATATYPE_FLOAT32:
397 readBinaryGridZippedImpl<float>(out, filename, params);
398 break;
399 case DATATYPE_FLOAT64:
400 readBinaryGridZippedImpl<double>(out, filename, params);
401 break;
402 default:
403 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
404 }
405 }
406 #endif
407
408 void Brick::readBinaryGrid(escript::Data& out, string filename,
409 const ReaderParameters& params) const
410 {
411 // the mapping is not universally correct but should work on our
412 // supported platforms
413 switch (params.dataType) {
414 case DATATYPE_INT32:
415 readBinaryGridImpl<int>(out, filename, params);
416 break;
417 case DATATYPE_FLOAT32:
418 readBinaryGridImpl<float>(out, filename, params);
419 break;
420 case DATATYPE_FLOAT64:
421 readBinaryGridImpl<double>(out, filename, params);
422 break;
423 default:
424 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
425 }
426 }
427
428 template<typename ValueType>
429 void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
430 const ReaderParameters& params) const
431 {
432 // check destination function space
433 int myN0, myN1, myN2;
434 if (out.getFunctionSpace().getTypeCode() == Nodes) {
435 myN0 = m_NN[0];
436 myN1 = m_NN[1];
437 myN2 = m_NN[2];
438 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
439 out.getFunctionSpace().getTypeCode() == ReducedElements) {
440 myN0 = m_NE[0];
441 myN1 = m_NE[1];
442 myN2 = m_NE[2];
443 } else
444 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
445
446 if (params.first.size() != 3)
447 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
448
449 if (params.numValues.size() != 3)
450 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
451
452 if (params.multiplier.size() != 3)
453 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
454 for (size_t i=0; i<params.multiplier.size(); i++)
455 if (params.multiplier[i]<1)
456 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
457
458 // check file existence and size
459 ifstream f(filename.c_str(), ifstream::binary);
460 if (f.fail()) {
461 throw RipleyException("readBinaryGrid(): cannot open file");
462 }
463 f.seekg(0, ios::end);
464 const int numComp = out.getDataPointSize();
465 const int filesize = f.tellg();
466 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
467 if (filesize < reqsize) {
468 f.close();
469 throw RipleyException("readBinaryGrid(): not enough data in file");
470 }
471
472 // check if this rank contributes anything
473 if (params.first[0] >= m_offset[0]+myN0 ||
474 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
475 params.first[1] >= m_offset[1]+myN1 ||
476 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
477 params.first[2] >= m_offset[2]+myN2 ||
478 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
479 f.close();
480 return;
481 }
482
483 // now determine how much this rank has to write
484
485 // first coordinates in data object to write to
486 const int first0 = max(0, params.first[0]-m_offset[0]);
487 const int first1 = max(0, params.first[1]-m_offset[1]);
488 const int first2 = max(0, params.first[2]-m_offset[2]);
489 // indices to first value in file
490 const int idx0 = max(0, m_offset[0]-params.first[0]);
491 const int idx1 = max(0, m_offset[1]-params.first[1]);
492 const int idx2 = max(0, m_offset[2]-params.first[2]);
493 // number of values to read
494 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
495 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
496 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
497
498 out.requireWrite();
499 vector<ValueType> values(num0*numComp);
500 const int dpp = out.getNumDataPointsPerSample();
501
502 for (int z=0; z<num2; z++) {
503 for (int y=0; y<num1; y++) {
504 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
505 +(idx2+z)*params.numValues[0]*params.numValues[1]);
506 f.seekg(fileofs*sizeof(ValueType));
507 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
508
509 for (int x=0; x<num0; x++) {
510 const int baseIndex = first0+x*params.multiplier[0]
511 +(first1+y*params.multiplier[1])*myN0
512 +(first2+z*params.multiplier[2])*myN0*myN1;
513 for (int m2=0; m2<params.multiplier[2]; m2++) {
514 for (int m1=0; m1<params.multiplier[1]; m1++) {
515 for (int m0=0; m0<params.multiplier[0]; m0++) {
516 const int dataIndex = baseIndex+m0
517 +m1*myN0
518 +m2*myN0*myN1;
519 double* dest = out.getSampleDataRW(dataIndex);
520 for (int c=0; c<numComp; c++) {
521 ValueType val = values[x*numComp+c];
522
523 if (params.byteOrder != BYTEORDER_NATIVE) {
524 char* cval = reinterpret_cast<char*>(&val);
525 // this will alter val!!
526 byte_swap32(cval);
527 }
528 if (!std::isnan(val)) {
529 for (int q=0; q<dpp; q++) {
530 *dest++ = static_cast<double>(val);
531 }
532 }
533 }
534 }
535 }
536 }
537 }
538 }
539 }
540
541 f.close();
542 }
543
544 #ifdef USE_BOOSTIO
545 template<typename ValueType>
546 void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
547 const ReaderParameters& params) const
548 {
549 // check destination function space
550 int myN0, myN1, myN2;
551 if (out.getFunctionSpace().getTypeCode() == Nodes) {
552 myN0 = m_NN[0];
553 myN1 = m_NN[1];
554 myN2 = m_NN[2];
555 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
556 out.getFunctionSpace().getTypeCode() == ReducedElements) {
557 myN0 = m_NE[0];
558 myN1 = m_NE[1];
559 myN2 = m_NE[2];
560 } else
561 throw RipleyException("readBinaryGridFromZipped(): invalid function space for output data object");
562
563 if (params.first.size() != 3)
564 throw RipleyException("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
565
566 if (params.numValues.size() != 3)
567 throw RipleyException("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
568
569 if (params.multiplier.size() != 3)
570 throw RipleyException("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
571 for (size_t i=0; i<params.multiplier.size(); i++)
572 if (params.multiplier[i]<1)
573 throw RipleyException("readBinaryGridFromZipped(): all multipliers must be positive");
574
575 // check file existence and size
576 ifstream f(filename.c_str(), ifstream::binary);
577 if (f.fail()) {
578 throw RipleyException("readBinaryGridFromZipped(): cannot open file");
579 }
580 f.seekg(0, ios::end);
581 const int numComp = out.getDataPointSize();
582 int filesize = f.tellg();
583 f.seekg(0, ios::beg);
584 std::vector<char> compressed(filesize);
585 f.read((char*)&compressed[0], filesize);
586 f.close();
587 std::vector<char> decompressed = unzip(compressed);
588 filesize = decompressed.size();
589 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
590 if (filesize < reqsize) {
591 throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
592 }
593
594 // check if this rank contributes anything
595 if (params.first[0] >= m_offset[0]+myN0 ||
596 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
597 params.first[1] >= m_offset[1]+myN1 ||
598 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
599 params.first[2] >= m_offset[2]+myN2 ||
600 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
601 return;
602 }
603
604 // now determine how much this rank has to write
605
606 // first coordinates in data object to write to
607 const int first0 = max(0, params.first[0]-m_offset[0]);
608 const int first1 = max(0, params.first[1]-m_offset[1]);
609 const int first2 = max(0, params.first[2]-m_offset[2]);
610 // indices to first value in file
611 const int idx0 = max(0, m_offset[0]-params.first[0]);
612 const int idx1 = max(0, m_offset[1]-params.first[1]);
613 const int idx2 = max(0, m_offset[2]-params.first[2]);
614 // number of values to read
615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
616 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
617 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
618
619 out.requireWrite();
620 vector<ValueType> values(num0*numComp);
621 const int dpp = out.getNumDataPointsPerSample();
622
623 for (int z=0; z<num2; z++) {
624 for (int y=0; y<num1; y++) {
625 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
626 +(idx2+z)*params.numValues[0]*params.numValues[1]);
627 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
628
629 for (int x=0; x<num0; x++) {
630 const int baseIndex = first0+x*params.multiplier[0]
631 +(first1+y*params.multiplier[1])*myN0
632 +(first2+z*params.multiplier[2])*myN0*myN1;
633 for (int m2=0; m2<params.multiplier[2]; m2++) {
634 for (int m1=0; m1<params.multiplier[1]; m1++) {
635 for (int m0=0; m0<params.multiplier[0]; m0++) {
636 const int dataIndex = baseIndex+m0
637 +m1*myN0
638 +m2*myN0*myN1;
639 double* dest = out.getSampleDataRW(dataIndex);
640 for (int c=0; c<numComp; c++) {
641 ValueType val = values[x*numComp+c];
642
643 if (params.byteOrder != BYTEORDER_NATIVE) {
644 char* cval = reinterpret_cast<char*>(&val);
645 // this will alter val!!
646 byte_swap32(cval);
647 }
648 if (!std::isnan(val)) {
649 for (int q=0; q<dpp; q++) {
650 *dest++ = static_cast<double>(val);
651 }
652 }
653 }
654 }
655 }
656 }
657 }
658 }
659 }
660 }
661 #endif
662
663 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
664 int byteOrder, int dataType) const
665 {
666 // the mapping is not universally correct but should work on our
667 // supported platforms
668 switch (dataType) {
669 case DATATYPE_INT32:
670 writeBinaryGridImpl<int>(in, filename, byteOrder);
671 break;
672 case DATATYPE_FLOAT32:
673 writeBinaryGridImpl<float>(in, filename, byteOrder);
674 break;
675 case DATATYPE_FLOAT64:
676 writeBinaryGridImpl<double>(in, filename, byteOrder);
677 break;
678 default:
679 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
680 }
681 }
682
683 template<typename ValueType>
684 void Brick::writeBinaryGridImpl(const escript::Data& in,
685 const string& filename, int byteOrder) const
686 {
687 // check function space and determine number of points
688 int myN0, myN1, myN2;
689 int totalN0, totalN1, totalN2;
690 if (in.getFunctionSpace().getTypeCode() == Nodes) {
691 myN0 = m_NN[0];
692 myN1 = m_NN[1];
693 myN2 = m_NN[2];
694 totalN0 = m_gNE[0]+1;
695 totalN1 = m_gNE[1]+1;
696 totalN2 = m_gNE[2]+1;
697 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
698 in.getFunctionSpace().getTypeCode() == ReducedElements) {
699 myN0 = m_NE[0];
700 myN1 = m_NE[1];
701 myN2 = m_NE[2];
702 totalN0 = m_gNE[0];
703 totalN1 = m_gNE[1];
704 totalN2 = m_gNE[2];
705 } else
706 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
707
708 const int numComp = in.getDataPointSize();
709 const int dpp = in.getNumDataPointsPerSample();
710 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
711
712 if (numComp > 1 || dpp > 1)
713 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
714
715 // from here on we know that each sample consists of one value
716 FileWriter fw;
717 fw.openFile(filename, fileSize);
718 MPIBarrier();
719
720 for (index_t z=0; z<myN2; z++) {
721 for (index_t y=0; y<myN1; y++) {
722 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
723 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
724 ostringstream oss;
725
726 for (index_t x=0; x<myN0; x++) {
727 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
728 ValueType fvalue = static_cast<ValueType>(*sample);
729 if (byteOrder == BYTEORDER_NATIVE) {
730 oss.write((char*)&fvalue, sizeof(fvalue));
731 } else {
732 char* value = reinterpret_cast<char*>(&fvalue);
733 oss.write(byte_swap32(value), sizeof(fvalue));
734 }
735 }
736 fw.writeAt(oss, fileofs);
737 }
738 }
739 fw.close();
740 }
741
742 void Brick::dump(const string& fileName) const
743 {
744 #if USE_SILO
745 string fn(fileName);
746 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
747 fn+=".silo";
748 }
749
750 int driver=DB_HDF5;
751 string siloPath;
752 DBfile* dbfile = NULL;
753
754 #ifdef ESYS_MPI
755 PMPIO_baton_t* baton = NULL;
756 const int NUM_SILO_FILES = 1;
757 const char* blockDirFmt = "/block%04d";
758 #endif
759
760 if (m_mpiInfo->size > 1) {
761 #ifdef ESYS_MPI
762 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
763 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
764 PMPIO_DefaultClose, (void*)&driver);
765 // try the fallback driver in case of error
766 if (!baton && driver != DB_PDB) {
767 driver = DB_PDB;
768 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
769 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
770 PMPIO_DefaultClose, (void*)&driver);
771 }
772 if (baton) {
773 char str[64];
774 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
775 siloPath = str;
776 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
777 }
778 #endif
779 } else {
780 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
781 getDescription().c_str(), driver);
782 // try the fallback driver in case of error
783 if (!dbfile && driver != DB_PDB) {
784 driver = DB_PDB;
785 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
786 getDescription().c_str(), driver);
787 }
788 }
789
790 if (!dbfile)
791 throw RipleyException("dump: Could not create Silo file");
792
793 /*
794 if (driver==DB_HDF5) {
795 // gzip level 1 already provides good compression with minimal
796 // performance penalty. Some tests showed that gzip levels >3 performed
797 // rather badly on escript data both in terms of time and space
798 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
799 }
800 */
801
802 boost::scoped_ptr<double> x(new double[m_NN[0]]);
803 boost::scoped_ptr<double> y(new double[m_NN[1]]);
804 boost::scoped_ptr<double> z(new double[m_NN[2]]);
805 double* coords[3] = { x.get(), y.get(), z.get() };
806 #pragma omp parallel
807 {
808 #pragma omp for
809 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
810 coords[0][i0]=getLocalCoordinate(i0, 0);
811 }
812 #pragma omp for
813 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
814 coords[1][i1]=getLocalCoordinate(i1, 1);
815 }
816 #pragma omp for
817 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
818 coords[2][i2]=getLocalCoordinate(i2, 2);
819 }
820 }
821 int* dims = const_cast<int*>(getNumNodesPerDim());
822
823 // write mesh
824 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
825 DB_COLLINEAR, NULL);
826
827 // write node ids
828 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
829 NULL, 0, DB_INT, DB_NODECENT, NULL);
830
831 // write element ids
832 dims = const_cast<int*>(getNumElementsPerDim());
833 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
834 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
835
836 // rank 0 writes multimesh and multivar
837 if (m_mpiInfo->rank == 0) {
838 vector<string> tempstrings;
839 vector<char*> names;
840 for (dim_t i=0; i<m_mpiInfo->size; i++) {
841 stringstream path;
842 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
843 tempstrings.push_back(path.str());
844 names.push_back((char*)tempstrings.back().c_str());
845 }
846 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
847 DBSetDir(dbfile, "/");
848 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
849 &types[0], NULL);
850 tempstrings.clear();
851 names.clear();
852 for (dim_t i=0; i<m_mpiInfo->size; i++) {
853 stringstream path;
854 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
855 tempstrings.push_back(path.str());
856 names.push_back((char*)tempstrings.back().c_str());
857 }
858 types.assign(m_mpiInfo->size, DB_QUADVAR);
859 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
860 &types[0], NULL);
861 tempstrings.clear();
862 names.clear();
863 for (dim_t i=0; i<m_mpiInfo->size; i++) {
864 stringstream path;
865 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
866 tempstrings.push_back(path.str());
867 names.push_back((char*)tempstrings.back().c_str());
868 }
869 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
870 &types[0], NULL);
871 }
872
873 if (m_mpiInfo->size > 1) {
874 #ifdef ESYS_MPI
875 PMPIO_HandOffBaton(baton, dbfile);
876 PMPIO_Finish(baton);
877 #endif
878 } else {
879 DBClose(dbfile);
880 }
881
882 #else // USE_SILO
883 throw RipleyException("dump: no Silo support");
884 #endif
885 }
886
887 const int* Brick::borrowSampleReferenceIDs(int fsType) const
888 {
889 switch (fsType) {
890 case Nodes:
891 case ReducedNodes: //FIXME: reduced
892 return &m_nodeId[0];
893 case DegreesOfFreedom:
894 case ReducedDegreesOfFreedom: //FIXME: reduced
895 return &m_dofId[0];
896 case Elements:
897 case ReducedElements:
898 return &m_elementId[0];
899 case FaceElements:
900 case ReducedFaceElements:
901 return &m_faceId[0];
902 case Points:
903 return &m_diracPointNodeIDs[0];
904 default:
905 break;
906 }
907
908 stringstream msg;
909 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
910 throw RipleyException(msg.str());
911 }
912
913 bool Brick::ownSample(int fsType, index_t id) const
914 {
915 if (getMPISize()==1)
916 return true;
917
918 switch (fsType) {
919 case Nodes:
920 case ReducedNodes: //FIXME: reduced
921 return (m_dofMap[id] < getNumDOF());
922 case DegreesOfFreedom:
923 case ReducedDegreesOfFreedom:
924 return true;
925 case Elements:
926 case ReducedElements:
927 {
928 // check ownership of element's _last_ node
929 const index_t x=id%m_NE[0] + 1;
930 const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
931 const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
932 return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
933 }
934 case FaceElements:
935 case ReducedFaceElements:
936 {
937 // check ownership of face element's last node
938 dim_t n=0;
939 for (size_t i=0; i<6; i++) {
940 n+=m_faceCount[i];
941 if (id<n) {
942 const index_t j=id-n+m_faceCount[i];
943 if (i>=4) { // front or back
944 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
945 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
946 } else if (i>=2) { // bottom or top
947 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
948 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
949 } else { // left or right
950 const index_t first=(i==0 ? 0 : m_NN[0]-1);
951 return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
952 }
953 }
954 }
955 return false;
956 }
957 default:
958 break;
959 }
960
961 stringstream msg;
962 msg << "ownSample: invalid function space type " << fsType;
963 throw RipleyException(msg.str());
964 }
965
966 void Brick::setToNormal(escript::Data& out) const
967 {
968 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
969 out.requireWrite();
970 #pragma omp parallel
971 {
972 if (m_faceOffset[0] > -1) {
973 #pragma omp for nowait
974 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
975 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
976 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
977 // set vector at four quadrature points
978 *o++ = -1.; *o++ = 0.; *o++ = 0.;
979 *o++ = -1.; *o++ = 0.; *o++ = 0.;
980 *o++ = -1.; *o++ = 0.; *o++ = 0.;
981 *o++ = -1.; *o++ = 0.; *o = 0.;
982 }
983 }
984 }
985
986 if (m_faceOffset[1] > -1) {
987 #pragma omp for nowait
988 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
989 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
990 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
991 // set vector at four quadrature points
992 *o++ = 1.; *o++ = 0.; *o++ = 0.;
993 *o++ = 1.; *o++ = 0.; *o++ = 0.;
994 *o++ = 1.; *o++ = 0.; *o++ = 0.;
995 *o++ = 1.; *o++ = 0.; *o = 0.;
996 }
997 }
998 }
999
1000 if (m_faceOffset[2] > -1) {
1001 #pragma omp for nowait
1002 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1003 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1004 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1005 // set vector at four quadrature points
1006 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1007 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1008 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1009 *o++ = 0.; *o++ = -1.; *o = 0.;
1010 }
1011 }
1012 }
1013
1014 if (m_faceOffset[3] > -1) {
1015 #pragma omp for nowait
1016 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1017 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1018 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1019 // set vector at four quadrature points
1020 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1021 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1022 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1023 *o++ = 0.; *o++ = 1.; *o = 0.;
1024 }
1025 }
1026 }
1027
1028 if (m_faceOffset[4] > -1) {
1029 #pragma omp for nowait
1030 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1031 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1032 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1033 // set vector at four quadrature points
1034 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1035 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1036 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1037 *o++ = 0.; *o++ = 0.; *o = -1.;
1038 }
1039 }
1040 }
1041
1042 if (m_faceOffset[5] > -1) {
1043 #pragma omp for nowait
1044 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1045 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1046 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1047 // set vector at four quadrature points
1048 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1049 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1050 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1051 *o++ = 0.; *o++ = 0.; *o = 1.;
1052 }
1053 }
1054 }
1055 } // end of parallel section
1056 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1057 out.requireWrite();
1058 #pragma omp parallel
1059 {
1060 if (m_faceOffset[0] > -1) {
1061 #pragma omp for nowait
1062 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1063 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1064 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1065 *o++ = -1.;
1066 *o++ = 0.;
1067 *o = 0.;
1068 }
1069 }
1070 }
1071
1072 if (m_faceOffset[1] > -1) {
1073 #pragma omp for nowait
1074 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1075 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1076 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1077 *o++ = 1.;
1078 *o++ = 0.;
1079 *o = 0.;
1080 }
1081 }
1082 }
1083
1084 if (m_faceOffset[2] > -1) {
1085 #pragma omp for nowait
1086 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1087 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1088 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1089 *o++ = 0.;
1090 *o++ = -1.;
1091 *o = 0.;
1092 }
1093 }
1094 }
1095
1096 if (m_faceOffset[3] > -1) {
1097 #pragma omp for nowait
1098 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1099 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1100 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1101 *o++ = 0.;
1102 *o++ = 1.;
1103 *o = 0.;
1104 }
1105 }
1106 }
1107
1108 if (m_faceOffset[4] > -1) {
1109 #pragma omp for nowait
1110 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1111 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1112 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1113 *o++ = 0.;
1114 *o++ = 0.;
1115 *o = -1.;
1116 }
1117 }
1118 }
1119
1120 if (m_faceOffset[5] > -1) {
1121 #pragma omp for nowait
1122 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1123 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1124 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1125 *o++ = 0.;
1126 *o++ = 0.;
1127 *o = 1.;
1128 }
1129 }
1130 }
1131 } // end of parallel section
1132
1133 } else {
1134 stringstream msg;
1135 msg << "setToNormal: invalid function space type "
1136 << out.getFunctionSpace().getTypeCode();
1137 throw RipleyException(msg.str());
1138 }
1139 }
1140
1141 void Brick::setToSize(escript::Data& out) const
1142 {
1143 if (out.getFunctionSpace().getTypeCode() == Elements
1144 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1145 out.requireWrite();
1146 const dim_t numQuad=out.getNumDataPointsPerSample();
1147 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1148 #pragma omp parallel for
1149 for (index_t k = 0; k < getNumElements(); ++k) {
1150 double* o = out.getSampleDataRW(k);
1151 fill(o, o+numQuad, size);
1152 }
1153 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1154 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1155 out.requireWrite();
1156 const dim_t numQuad=out.getNumDataPointsPerSample();
1157 #pragma omp parallel
1158 {
1159 if (m_faceOffset[0] > -1) {
1160 const double size=min(m_dx[1],m_dx[2]);
1161 #pragma omp for nowait
1162 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1163 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1164 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1165 fill(o, o+numQuad, size);
1166 }
1167 }
1168 }
1169
1170 if (m_faceOffset[1] > -1) {
1171 const double size=min(m_dx[1],m_dx[2]);
1172 #pragma omp for nowait
1173 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1174 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1175 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1176 fill(o, o+numQuad, size);
1177 }
1178 }
1179 }
1180
1181 if (m_faceOffset[2] > -1) {
1182 const double size=min(m_dx[0],m_dx[2]);
1183 #pragma omp for nowait
1184 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1185 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1186 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1187 fill(o, o+numQuad, size);
1188 }
1189 }
1190 }
1191
1192 if (m_faceOffset[3] > -1) {
1193 const double size=min(m_dx[0],m_dx[2]);
1194 #pragma omp for nowait
1195 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1196 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1197 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1198 fill(o, o+numQuad, size);
1199 }
1200 }
1201 }
1202
1203 if (m_faceOffset[4] > -1) {
1204 const double size=min(m_dx[0],m_dx[1]);
1205 #pragma omp for nowait
1206 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1207 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1208 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1209 fill(o, o+numQuad, size);
1210 }
1211 }
1212 }
1213
1214 if (m_faceOffset[5] > -1) {
1215 const double size=min(m_dx[0],m_dx[1]);
1216 #pragma omp for nowait
1217 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1218 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1219 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1220 fill(o, o+numQuad, size);
1221 }
1222 }
1223 }
1224 } // end of parallel section
1225
1226 } else {
1227 stringstream msg;
1228 msg << "setToSize: invalid function space type "
1229 << out.getFunctionSpace().getTypeCode();
1230 throw RipleyException(msg.str());
1231 }
1232 }
1233
1234 void Brick::Print_Mesh_Info(const bool full) const
1235 {
1236 RipleyDomain::Print_Mesh_Info(full);
1237 if (full) {
1238 cout << " Id Coordinates" << endl;
1239 cout.precision(15);
1240 cout.setf(ios::scientific, ios::floatfield);
1241 for (index_t i=0; i < getNumNodes(); i++) {
1242 cout << " " << setw(5) << m_nodeId[i]
1243 << " " << getLocalCoordinate(i%m_NN[0], 0)
1244 << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1245 << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1246 }
1247 }
1248 }
1249
1250
1251 //protected
1252 void Brick::assembleCoordinates(escript::Data& arg) const
1253 {
1254 escriptDataC x = arg.getDataC();
1255 int numDim = m_numDim;
1256 if (!isDataPointShapeEqual(&x, 1, &numDim))
1257 throw RipleyException("setToX: Invalid Data object shape");
1258 if (!numSamplesEqual(&x, 1, getNumNodes()))
1259 throw RipleyException("setToX: Illegal number of samples in Data object");
1260
1261 arg.requireWrite();
1262 #pragma omp parallel for
1263 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1264 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1265 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1266 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1267 point[0] = getLocalCoordinate(i0, 0);
1268 point[1] = getLocalCoordinate(i1, 1);
1269 point[2] = getLocalCoordinate(i2, 2);
1270 }
1271 }
1272 }
1273 }
1274
1275 //protected
1276 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1277 {
1278 const dim_t numComp = in.getDataPointSize();
1279 const double C0 = .044658198738520451079;
1280 const double C1 = .16666666666666666667;
1281 const double C2 = .21132486540518711775;
1282 const double C3 = .25;
1283 const double C4 = .5;
1284 const double C5 = .62200846792814621559;
1285 const double C6 = .78867513459481288225;
1286
1287 if (out.getFunctionSpace().getTypeCode() == Elements) {
1288 out.requireWrite();
1289 #pragma omp parallel
1290 {
1291 vector<double> f_000(numComp);
1292 vector<double> f_001(numComp);
1293 vector<double> f_010(numComp);
1294 vector<double> f_011(numComp);
1295 vector<double> f_100(numComp);
1296 vector<double> f_101(numComp);
1297 vector<double> f_110(numComp);
1298 vector<double> f_111(numComp);
1299 #pragma omp for
1300 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1301 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1302 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1303 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1304 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1305 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1306 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1307 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1308 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1309 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1310 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1311 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1312 for (index_t i=0; i < numComp; ++i) {
1313 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1314 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1315 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1316 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1317 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1318 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1319 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1320 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1321 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1322 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1323 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1324 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1325 o[INDEX3(i,0,0,numComp,3)] = V0;
1326 o[INDEX3(i,1,0,numComp,3)] = V4;
1327 o[INDEX3(i,2,0,numComp,3)] = V8;
1328 o[INDEX3(i,0,1,numComp,3)] = V0;
1329 o[INDEX3(i,1,1,numComp,3)] = V5;
1330 o[INDEX3(i,2,1,numComp,3)] = V9;
1331 o[INDEX3(i,0,2,numComp,3)] = V1;
1332 o[INDEX3(i,1,2,numComp,3)] = V4;
1333 o[INDEX3(i,2,2,numComp,3)] = V10;
1334 o[INDEX3(i,0,3,numComp,3)] = V1;
1335 o[INDEX3(i,1,3,numComp,3)] = V5;
1336 o[INDEX3(i,2,3,numComp,3)] = V11;
1337 o[INDEX3(i,0,4,numComp,3)] = V2;
1338 o[INDEX3(i,1,4,numComp,3)] = V6;
1339 o[INDEX3(i,2,4,numComp,3)] = V8;
1340 o[INDEX3(i,0,5,numComp,3)] = V2;
1341 o[INDEX3(i,1,5,numComp,3)] = V7;
1342 o[INDEX3(i,2,5,numComp,3)] = V9;
1343 o[INDEX3(i,0,6,numComp,3)] = V3;
1344 o[INDEX3(i,1,6,numComp,3)] = V6;
1345 o[INDEX3(i,2,6,numComp,3)] = V10;
1346 o[INDEX3(i,0,7,numComp,3)] = V3;
1347 o[INDEX3(i,1,7,numComp,3)] = V7;
1348 o[INDEX3(i,2,7,numComp,3)] = V11;
1349 } // end of component loop i
1350 } // end of k0 loop
1351 } // end of k1 loop
1352 } // end of k2 loop
1353 } // end of parallel section
1354 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1355 out.requireWrite();
1356 #pragma omp parallel
1357 {
1358 vector<double> f_000(numComp);
1359 vector<double> f_001(numComp);
1360 vector<double> f_010(numComp);
1361 vector<double> f_011(numComp);
1362 vector<double> f_100(numComp);
1363 vector<double> f_101(numComp);
1364 vector<double> f_110(numComp);
1365 vector<double> f_111(numComp);
1366 #pragma omp for
1367 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1368 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1369 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1370 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1371 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1372 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1373 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1374 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1375 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1376 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1377 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1378 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1379 for (index_t i=0; i < numComp; ++i) {
1380 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1381 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1382 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1383 } // end of component loop i
1384 } // end of k0 loop
1385 } // end of k1 loop
1386 } // end of k2 loop
1387 } // end of parallel section
1388 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1389 out.requireWrite();
1390 #pragma omp parallel
1391 {
1392 vector<double> f_000(numComp);
1393 vector<double> f_001(numComp);
1394 vector<double> f_010(numComp);
1395 vector<double> f_011(numComp);
1396 vector<double> f_100(numComp);
1397 vector<double> f_101(numComp);
1398 vector<double> f_110(numComp);
1399 vector<double> f_111(numComp);
1400 if (m_faceOffset[0] > -1) {
1401 #pragma omp for nowait
1402 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1403 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1404 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1405 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1406 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1407 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1408 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1409 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1410 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1411 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1412 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1413 for (index_t i=0; i < numComp; ++i) {
1414 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1415 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1416 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1417 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1418 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1419 o[INDEX3(i,1,0,numComp,3)] = V0;
1420 o[INDEX3(i,2,0,numComp,3)] = V2;
1421 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1422 o[INDEX3(i,1,1,numComp,3)] = V0;
1423 o[INDEX3(i,2,1,numComp,3)] = V3;
1424 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1425 o[INDEX3(i,1,2,numComp,3)] = V1;
1426 o[INDEX3(i,2,2,numComp,3)] = V2;
1427 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1428 o[INDEX3(i,1,3,numComp,3)] = V1;
1429 o[INDEX3(i,2,3,numComp,3)] = V3;
1430 } // end of component loop i
1431 } // end of k1 loop
1432 } // end of k2 loop
1433 } // end of face 0
1434 if (m_faceOffset[1] > -1) {
1435 #pragma omp for nowait
1436 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1437 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1438 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1439 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1440 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1441 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1442 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1443 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1444 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1445 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1446 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1447 for (index_t i=0; i < numComp; ++i) {
1448 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1449 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1450 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1451 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1452 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1453 o[INDEX3(i,1,0,numComp,3)] = V0;
1454 o[INDEX3(i,2,0,numComp,3)] = V2;
1455 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1456 o[INDEX3(i,1,1,numComp,3)] = V0;
1457 o[INDEX3(i,2,1,numComp,3)] = V3;
1458 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1459 o[INDEX3(i,1,2,numComp,3)] = V1;
1460 o[INDEX3(i,2,2,numComp,3)] = V2;
1461 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1462 o[INDEX3(i,1,3,numComp,3)] = V1;
1463 o[INDEX3(i,2,3,numComp,3)] = V3;
1464 } // end of component loop i
1465 } // end of k1 loop
1466 } // end of k2 loop
1467 } // end of face 1
1468 if (m_faceOffset[2] > -1) {
1469 #pragma omp for nowait
1470 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1471 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1472 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1473 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1474 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1475 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1476 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1477 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1478 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1479 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1480 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1481 for (index_t i=0; i < numComp; ++i) {
1482 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1483 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1484 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1485 o[INDEX3(i,0,0,numComp,3)] = V0;
1486 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1487 o[INDEX3(i,2,0,numComp,3)] = V1;
1488 o[INDEX3(i,0,1,numComp,3)] = V0;
1489 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1490 o[INDEX3(i,2,1,numComp,3)] = V2;
1491 o[INDEX3(i,0,2,numComp,3)] = V0;
1492 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1493 o[INDEX3(i,2,2,numComp,3)] = V1;
1494 o[INDEX3(i,0,3,numComp,3)] = V0;
1495 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1496 o[INDEX3(i,2,3,numComp,3)] = V2;
1497 } // end of component loop i
1498 } // end of k0 loop
1499 } // end of k2 loop
1500 } // end of face 2
1501 if (m_faceOffset[3] > -1) {
1502 #pragma omp for nowait
1503 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1504 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1505 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1506 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1507 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1508 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1509 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1510 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1511 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1512 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1513 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1514 for (index_t i=0; i < numComp; ++i) {
1515 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1516 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1517 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1518 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1519 o[INDEX3(i,0,0,numComp,3)] = V0;
1520 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1521 o[INDEX3(i,2,0,numComp,3)] = V2;
1522 o[INDEX3(i,0,1,numComp,3)] = V0;
1523 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1524 o[INDEX3(i,2,1,numComp,3)] = V3;
1525 o[INDEX3(i,0,2,numComp,3)] = V1;
1526 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1527 o[INDEX3(i,2,2,numComp,3)] = V2;
1528 o[INDEX3(i,0,3,numComp,3)] = V1;
1529 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1530 o[INDEX3(i,2,3,numComp,3)] = V3;
1531 } // end of component loop i
1532 } // end of k0 loop
1533 } // end of k2 loop
1534 } // end of face 3
1535 if (m_faceOffset[4] > -1) {
1536 #pragma omp for nowait
1537 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1538 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1539 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1540 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1541 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1542 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1543 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1544 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1545 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1546 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1547 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1548 for (index_t i=0; i < numComp; ++i) {
1549 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1550 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1551 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1552 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1553 o[INDEX3(i,0,0,numComp,3)] = V0;
1554 o[INDEX3(i,1,0,numComp,3)] = V2;
1555 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1556 o[INDEX3(i,0,1,numComp,3)] = V0;
1557 o[INDEX3(i,1,1,numComp,3)] = V3;
1558 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1559 o[INDEX3(i,0,2,numComp,3)] = V1;
1560 o[INDEX3(i,1,2,numComp,3)] = V2;
1561 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1562 o[INDEX3(i,0,3,numComp,3)] = V1;
1563 o[INDEX3(i,1,3,numComp,3)] = V3;
1564 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1565 } // end of component loop i
1566 } // end of k0 loop
1567 } // end of k1 loop
1568 } // end of face 4
1569 if (m_faceOffset[5] > -1) {
1570 #pragma omp for nowait
1571 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1572 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1573 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1574 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1575 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1576 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1577 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1578 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1579 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1580 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1581 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1582 for (index_t i=0; i < numComp; ++i) {
1583 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1584 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1585 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1586 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1587 o[INDEX3(i,0,0,numComp,3)] = V0;
1588 o[INDEX3(i,1,0,numComp,3)] = V2;
1589 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1590 o[INDEX3(i,0,1,numComp,3)] = V0;
1591 o[INDEX3(i,1,1,numComp,3)] = V3;
1592 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1593 o[INDEX3(i,0,2,numComp,3)] = V1;
1594 o[INDEX3(i,1,2,numComp,3)] = V2;
1595 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1596 o[INDEX3(i,0,3,numComp,3)] = V1;
1597 o[INDEX3(i,1,3,numComp,3)] = V3;
1598 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1599 } // end of component loop i
1600 } // end of k0 loop
1601 } // end of k1 loop
1602 } // end of face 5
1603 } // end of parallel section
1604 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1605 out.requireWrite();
1606 #pragma omp parallel
1607 {
1608 vector<double> f_000(numComp);
1609 vector<double> f_001(numComp);
1610 vector<double> f_010(numComp);
1611 vector<double> f_011(numComp);
1612 vector<double> f_100(numComp);
1613 vector<double> f_101(numComp);
1614 vector<double> f_110(numComp);
1615 vector<double> f_111(numComp);
1616 if (m_faceOffset[0] > -1) {
1617 #pragma omp for nowait
1618 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1619 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1620 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1621 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1622 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1623 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1624 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1625 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1626 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1627 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1628 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1629 for (index_t i=0; i < numComp; ++i) {
1630 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1631 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1632 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1633 } // end of component loop i
1634 } // end of k1 loop
1635 } // end of k2 loop
1636 } // end of face 0
1637 if (m_faceOffset[1] > -1) {
1638 #pragma omp for nowait
1639 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1640 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1641 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1642 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1643 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1644 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1645 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1646 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1647 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1648 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1649 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1650 for (index_t i=0; i < numComp; ++i) {
1651 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1652 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1653 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1654 } // end of component loop i
1655 } // end of k1 loop
1656 } // end of k2 loop
1657 } // end of face 1
1658 if (m_faceOffset[2] > -1) {
1659 #pragma omp for nowait
1660 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1661 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1662 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1663 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1664 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1665 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1666 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1667 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1668 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1669 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1670 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1671 for (index_t i=0; i < numComp; ++i) {
1672 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / m_dx[0];
1673 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1674 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / m_dx[2];
1675 } // end of component loop i
1676 } // end of k0 loop
1677 } // end of k2 loop
1678 } // end of face 2
1679 if (m_faceOffset[3] > -1) {
1680 #pragma omp for nowait
1681 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1682 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1683 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1684 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1685 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1686 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1687 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1688 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1689 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1690 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1691 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1692 for (index_t i=0; i < numComp; ++i) {
1693 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / m_dx[0];
1694 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1695 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / m_dx[2];
1696 } // end of component loop i
1697 } // end of k0 loop
1698 } // end of k2 loop
1699 } // end of face 3
1700 if (m_faceOffset[4] > -1) {
1701 #pragma omp for nowait
1702 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1703 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1704 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1705 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1706 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1707 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1708 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1709 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1710 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1711 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1712 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1713 for (index_t i=0; i < numComp; ++i) {
1714 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / m_dx[0];
1715 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / m_dx[1];
1716 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1717 } // end of component loop i
1718 } // end of k0 loop
1719 } // end of k1 loop
1720 } // end of face 4
1721 if (m_faceOffset[5] > -1) {
1722 #pragma omp for nowait
1723 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1724 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1725 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1726 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1727 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1728 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1729 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1730 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1731 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1732 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1733 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1734 for (index_t i=0; i < numComp; ++i) {
1735 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / m_dx[0];
1736 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / m_dx[1];
1737 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1738 } // end of component loop i
1739 } // end of k0 loop
1740 } // end of k1 loop
1741 } // end of face 5
1742 } // end of parallel section
1743 }
1744 }
1745
1746 //protected
1747 void Brick::assembleIntegrate(vector<double>& integrals, const escript::Data& arg) const
1748 {
1749 const dim_t numComp = arg.getDataPointSize();
1750 const index_t left = (m_offset[0]==0 ? 0 : 1);
1751 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1752 const index_t front = (m_offset[2]==0 ? 0 : 1);
1753 const int fs = arg.getFunctionSpace().getTypeCode();
1754 if (fs == Elements && arg.actsExpanded()) {
1755 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2]/8.;
1756 #pragma omp parallel
1757 {
1758 vector<double> int_local(numComp, 0);
1759 #pragma omp for nowait
1760 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1761 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1762 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1763 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1764 for (index_t i=0; i < numComp; ++i) {
1765 const double f_0 = f[INDEX2(i,0,numComp)];
1766 const double f_1 = f[INDEX2(i,1,numComp)];
1767 const double f_2 = f[INDEX2(i,2,numComp)];
1768 const double f_3 = f[INDEX2(i,3,numComp)];
1769 const double f_4 = f[INDEX2(i,4,numComp)];
1770 const double f_5 = f[INDEX2(i,5,numComp)];
1771 const double f_6 = f[INDEX2(i,6,numComp)];
1772 const double f_7 = f[INDEX2(i,7,numComp)];
1773 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
1774 } // end of component loop i
1775 } // end of k0 loop
1776 } // end of k1 loop
1777 } // end of k2 loop
1778
1779 #pragma omp critical
1780 for (index_t i=0; i<numComp; i++)
1781 integrals[i]+=int_local[i];
1782 } // end of parallel section
1783
1784 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1785 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2];
1786 #pragma omp parallel
1787 {
1788 vector<double> int_local(numComp, 0);
1789 #pragma omp for nowait
1790 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1791 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1792 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1793 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1794 for (index_t i=0; i < numComp; ++i) {
1795 int_local[i]+=f[i]*w_0;
1796 } // end of component loop i
1797 } // end of k0 loop
1798 } // end of k1 loop
1799 } // end of k2 loop
1800
1801 #pragma omp critical
1802 for (index_t i=0; i<numComp; i++)
1803 integrals[i]+=int_local[i];
1804 } // end of parallel section
1805
1806 } else if (fs == FaceElements && arg.actsExpanded()) {
1807 const double w_0 = m_dx[1]*m_dx[2]/4.;
1808 const double w_1 = m_dx[0]*m_dx[2]/4.;
1809 const double w_2 = m_dx[0]*m_dx[1]/4.;
1810 #pragma omp parallel
1811 {
1812 vector<double> int_local(numComp, 0);
1813 if (m_faceOffset[0] > -1) {
1814 #pragma omp for nowait
1815 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1816 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1817 const double* f = arg.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1818 for (index_t i=0; i < n