/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4626 - (show annotations)
Wed Jan 22 06:07:34 2014 UTC (5 years, 2 months ago) by caltinay
Original Path: trunk/ripley/src/Brick.cpp
File size: 146643 byte(s)
Eliminated all const_cast<Data*> hacks in ripley and finley now that
Data.getSampleDataRO returns a const pointer.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16 #include <ripley/Brick.h>
17 #include <paso/SystemMatrix.h>
18 #include <esysUtils/esysFileWriter.h>
19 #include <ripley/DefaultAssembler3D.h>
20 #include <boost/scoped_array.hpp>
21
22 #ifdef USE_NETCDF
23 #include <netcdfcpp.h>
24 #endif
25
26 #if USE_SILO
27 #include <silo.h>
28 #ifdef ESYS_MPI
29 #include <pmpio.h>
30 #endif
31 #endif
32
33 #include <iomanip>
34
35 using namespace std;
36 using esysUtils::FileWriter;
37
38 namespace ripley {
39
40 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
41 double x1, double y1, double z1, int d0, int d1, int d2,
42 const std::vector<double>& points, const std::vector<int>& tags,
43 const std::map<std::string, int>& tagnamestonums) :
44 RipleyDomain(3)
45 {
46 // ignore subdivision parameters for serial run
47 if (m_mpiInfo->size == 1) {
48 d0=1;
49 d1=1;
50 d2=1;
51 }
52 bool warn=false;
53 // if number of subdivisions is non-positive, try to subdivide by the same
54 // ratio as the number of elements
55 if (d0<=0 && d1<=0 && d2<=0) {
56 warn=true;
57 d0=(int)(pow(m_mpiInfo->size*(n0+1)*(n0+1)/((float)(n1+1)*(n2+1)), 1.f/3));
58 d0=max(1, d0);
59 d1=max(1, (int)(d0*n1/(float)n0));
60 d2=m_mpiInfo->size/(d0*d1);
61 if (d0*d1*d2 != m_mpiInfo->size) {
62 // ratios not the same so leave "smallest" side undivided and try
63 // dividing 2 sides only
64 if (n0>=n1) {
65 if (n1>=n2) {
66 d0=d1=0;
67 d2=1;
68 } else {
69 d0=d2=0;
70 d1=1;
71 }
72 } else {
73 if (n0>=n2) {
74 d0=d1=0;
75 d2=1;
76 } else {
77 d0=1;
78 d1=d2=0;
79 }
80 }
81 }
82 }
83 if (d0<=0 && d1<=0) {
84 warn=true;
85 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1))));
86 d1=m_mpiInfo->size/d0;
87 if (d0*d1*d2 != m_mpiInfo->size) {
88 // ratios not the same so subdivide side with more elements only
89 if (n0>n1) {
90 d0=0;
91 d1=1;
92 } else {
93 d0=1;
94 d1=0;
95 }
96 }
97 } else if (d0<=0 && d2<=0) {
98 warn=true;
99 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n2+1))));
100 d2=m_mpiInfo->size/d0;
101 if (d0*d1*d2 != m_mpiInfo->size) {
102 // ratios not the same so subdivide side with more elements only
103 if (n0>n2) {
104 d0=0;
105 d2=1;
106 } else {
107 d0=1;
108 d2=0;
109 }
110 }
111 } else if (d1<=0 && d2<=0) {
112 warn=true;
113 d1=max(1, int(sqrt(m_mpiInfo->size*(n1+1)/(float)(n2+1))));
114 d2=m_mpiInfo->size/d1;
115 if (d0*d1*d2 != m_mpiInfo->size) {
116 // ratios not the same so subdivide side with more elements only
117 if (n1>n2) {
118 d1=0;
119 d2=1;
120 } else {
121 d1=1;
122 d2=0;
123 }
124 }
125 }
126 if (d0<=0) {
127 // d1,d2 are preset, determine d0
128 d0=m_mpiInfo->size/(d1*d2);
129 } else if (d1<=0) {
130 // d0,d2 are preset, determine d1
131 d1=m_mpiInfo->size/(d0*d2);
132 } else if (d2<=0) {
133 // d0,d1 are preset, determine d2
134 d2=m_mpiInfo->size/(d0*d1);
135 }
136
137 // ensure number of subdivisions is valid and nodes can be distributed
138 // among number of ranks
139 if (d0*d1*d2 != m_mpiInfo->size)
140 throw RipleyException("Invalid number of spatial subdivisions");
141
142 if (warn) {
143 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
144 << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
145 }
146
147 double l0 = x1-x0;
148 double l1 = y1-y0;
149 double l2 = z1-z0;
150 m_dx[0] = l0/n0;
151 m_dx[1] = l1/n1;
152 m_dx[2] = l2/n2;
153
154 if ((n0+1)%d0 > 0) {
155 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
156 l0=m_dx[0]*n0;
157 cout << "Warning: Adjusted number of elements and length. N0="
158 << n0 << ", l0=" << l0 << endl;
159 }
160 if ((n1+1)%d1 > 0) {
161 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
162 l1=m_dx[1]*n1;
163 cout << "Warning: Adjusted number of elements and length. N1="
164 << n1 << ", l1=" << l1 << endl;
165 }
166 if ((n2+1)%d2 > 0) {
167 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
168 l2=m_dx[2]*n2;
169 cout << "Warning: Adjusted number of elements and length. N2="
170 << n2 << ", l2=" << l2 << endl;
171 }
172
173 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
174 throw RipleyException("Too few elements for the number of ranks");
175
176 m_gNE[0] = n0;
177 m_gNE[1] = n1;
178 m_gNE[2] = n2;
179 m_origin[0] = x0;
180 m_origin[1] = y0;
181 m_origin[2] = z0;
182 m_length[0] = l0;
183 m_length[1] = l1;
184 m_length[2] = l2;
185 m_NX[0] = d0;
186 m_NX[1] = d1;
187 m_NX[2] = d2;
188
189 // local number of elements (including overlap)
190 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
191 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
192 m_NE[0]++;
193 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
194 m_ownNE[0]--;
195
196 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
197 if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
198 m_NE[1]++;
199 else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
200 m_ownNE[1]--;
201
202 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
203 if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
204 m_NE[2]++;
205 else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
206 m_ownNE[2]--;
207
208 // local number of nodes
209 m_NN[0] = m_NE[0]+1;
210 m_NN[1] = m_NE[1]+1;
211 m_NN[2] = m_NE[2]+1;
212
213 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
214 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
215 if (m_offset[0] > 0)
216 m_offset[0]--;
217 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
218 if (m_offset[1] > 0)
219 m_offset[1]--;
220 m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
221 if (m_offset[2] > 0)
222 m_offset[2]--;
223
224 populateSampleIds();
225 createPattern();
226
227 assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
228 addPoints(tags.size(), &points[0], &tags[0]);
229 }
230
231
232 Brick::~Brick()
233 {
234 Paso_SystemMatrixPattern_free(m_pattern);
235 Paso_Connector_free(m_connector);
236 delete assembler;
237 }
238
239 string Brick::getDescription() const
240 {
241 return "ripley::Brick";
242 }
243
244 bool Brick::operator==(const AbstractDomain& other) const
245 {
246 const Brick* o=dynamic_cast<const Brick*>(&other);
247 if (o) {
248 return (RipleyDomain::operator==(other) &&
249 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
250 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
251 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
252 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
253 }
254
255 return false;
256 }
257
258 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
259 const ReaderParameters& params) const
260 {
261 #ifdef USE_NETCDF
262 // check destination function space
263 int myN0, myN1, myN2;
264 if (out.getFunctionSpace().getTypeCode() == Nodes) {
265 myN0 = m_NN[0];
266 myN1 = m_NN[1];
267 myN2 = m_NN[2];
268 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
269 out.getFunctionSpace().getTypeCode() == ReducedElements) {
270 myN0 = m_NE[0];
271 myN1 = m_NE[1];
272 myN2 = m_NE[2];
273 } else
274 throw RipleyException("readNcGrid(): invalid function space for output data object");
275
276 if (params.first.size() != 3)
277 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
278
279 if (params.numValues.size() != 3)
280 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
281
282 if (params.multiplier.size() != 3)
283 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
284 for (size_t i=0; i<params.multiplier.size(); i++)
285 if (params.multiplier[i]<1)
286 throw RipleyException("readNcGrid(): all multipliers must be positive");
287
288 // check file existence and size
289 NcFile f(filename.c_str(), NcFile::ReadOnly);
290 if (!f.is_valid())
291 throw RipleyException("readNcGrid(): cannot open file");
292
293 NcVar* var = f.get_var(varname.c_str());
294 if (!var)
295 throw RipleyException("readNcGrid(): invalid variable name");
296
297 // TODO: rank>0 data support
298 const int numComp = out.getDataPointSize();
299 if (numComp > 1)
300 throw RipleyException("readNcGrid(): only scalar data supported");
301
302 const int dims = var->num_dims();
303 boost::scoped_array<long> edges(var->edges());
304
305 // is this a slice of the data object (dims!=3)?
306 // note the expected ordering of edges (as in numpy: z,y,x)
307 if ( (dims==3 && (params.numValues[2] > edges[0] ||
308 params.numValues[1] > edges[1] ||
309 params.numValues[0] > edges[2]))
310 || (dims==2 && params.numValues[2]>1)
311 || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
312 throw RipleyException("readNcGrid(): not enough data in file");
313 }
314
315 // check if this rank contributes anything
316 if (params.first[0] >= m_offset[0]+myN0 ||
317 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
318 params.first[1] >= m_offset[1]+myN1 ||
319 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
320 params.first[2] >= m_offset[2]+myN2 ||
321 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
322 return;
323 }
324
325 // now determine how much this rank has to write
326
327 // first coordinates in data object to write to
328 const int first0 = max(0, params.first[0]-m_offset[0]);
329 const int first1 = max(0, params.first[1]-m_offset[1]);
330 const int first2 = max(0, params.first[2]-m_offset[2]);
331 // indices to first value in file (not accounting for reverse yet)
332 int idx0 = max(0, m_offset[0]-params.first[0]);
333 int idx1 = max(0, m_offset[1]-params.first[1]);
334 int idx2 = max(0, m_offset[2]-params.first[2]);
335 // number of values to read
336 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
337 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
338 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
339
340 // make sure we read the right block if going backwards through file
341 if (params.reverse[0])
342 idx0 = edges[dims-1]-num0-idx0;
343 if (dims>1 && params.reverse[1])
344 idx1 = edges[dims-2]-num1-idx1;
345 if (dims>2 && params.reverse[2])
346 idx2 = edges[dims-3]-num2-idx2;
347
348
349 vector<double> values(num0*num1*num2);
350 if (dims==3) {
351 var->set_cur(idx2, idx1, idx0);
352 var->get(&values[0], num2, num1, num0);
353 } else if (dims==2) {
354 var->set_cur(idx1, idx0);
355 var->get(&values[0], num1, num0);
356 } else {
357 var->set_cur(idx0);
358 var->get(&values[0], num0);
359 }
360
361 const int dpp = out.getNumDataPointsPerSample();
362 out.requireWrite();
363
364 // helpers for reversing
365 const int x0 = (params.reverse[0] ? num0-1 : 0);
366 const int x_mult = (params.reverse[0] ? -1 : 1);
367 const int y0 = (params.reverse[1] ? num1-1 : 0);
368 const int y_mult = (params.reverse[1] ? -1 : 1);
369 const int z0 = (params.reverse[2] ? num2-1 : 0);
370 const int z_mult = (params.reverse[2] ? -1 : 1);
371
372 for (index_t z=0; z<num2; z++) {
373 for (index_t y=0; y<num1; y++) {
374 #pragma omp parallel for
375 for (index_t x=0; x<num0; x++) {
376 const int baseIndex = first0+x*params.multiplier[0]
377 +(first1+y*params.multiplier[1])*myN0
378 +(first2+z*params.multiplier[2])*myN0*myN1;
379 const int srcIndex=(z0+z_mult*z)*num1*num0
380 +(y0+y_mult*y)*num0
381 +(x0+x_mult*x);
382 if (!isnan(values[srcIndex])) {
383 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
384 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
385 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
386 const int dataIndex = baseIndex+m0
387 +m1*myN0
388 +m2*myN0*myN1;
389 double* dest = out.getSampleDataRW(dataIndex);
390 for (index_t q=0; q<dpp; q++) {
391 *dest++ = values[srcIndex];
392 }
393 }
394 }
395 }
396 }
397 }
398 }
399 }
400 #else
401 throw RipleyException("readNcGrid(): not compiled with netCDF support");
402 #endif
403 }
404
405 void Brick::readBinaryGrid(escript::Data& out, string filename,
406 const ReaderParameters& params) const
407 {
408 // the mapping is not universally correct but should work on our
409 // supported platforms
410 switch (params.dataType) {
411 case DATATYPE_INT32:
412 readBinaryGridImpl<int>(out, filename, params);
413 break;
414 case DATATYPE_FLOAT32:
415 readBinaryGridImpl<float>(out, filename, params);
416 break;
417 case DATATYPE_FLOAT64:
418 readBinaryGridImpl<double>(out, filename, params);
419 break;
420 default:
421 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
422 }
423 }
424
425 template<typename ValueType>
426 void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
427 const ReaderParameters& params) const
428 {
429 // check destination function space
430 int myN0, myN1, myN2;
431 if (out.getFunctionSpace().getTypeCode() == Nodes) {
432 myN0 = m_NN[0];
433 myN1 = m_NN[1];
434 myN2 = m_NN[2];
435 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
436 out.getFunctionSpace().getTypeCode() == ReducedElements) {
437 myN0 = m_NE[0];
438 myN1 = m_NE[1];
439 myN2 = m_NE[2];
440 } else
441 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
442
443 if (params.first.size() != 3)
444 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
445
446 if (params.numValues.size() != 3)
447 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
448
449 if (params.multiplier.size() != 3)
450 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
451 for (size_t i=0; i<params.multiplier.size(); i++)
452 if (params.multiplier[i]<1)
453 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
454
455 // check file existence and size
456 ifstream f(filename.c_str(), ifstream::binary);
457 if (f.fail()) {
458 throw RipleyException("readBinaryGrid(): cannot open file");
459 }
460 f.seekg(0, ios::end);
461 const int numComp = out.getDataPointSize();
462 const int filesize = f.tellg();
463 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
464 if (filesize < reqsize) {
465 f.close();
466 throw RipleyException("readBinaryGrid(): not enough data in file");
467 }
468
469 // check if this rank contributes anything
470 if (params.first[0] >= m_offset[0]+myN0 ||
471 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
472 params.first[1] >= m_offset[1]+myN1 ||
473 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
474 params.first[2] >= m_offset[2]+myN2 ||
475 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
476 f.close();
477 return;
478 }
479
480 // now determine how much this rank has to write
481
482 // first coordinates in data object to write to
483 const int first0 = max(0, params.first[0]-m_offset[0]);
484 const int first1 = max(0, params.first[1]-m_offset[1]);
485 const int first2 = max(0, params.first[2]-m_offset[2]);
486 // indices to first value in file
487 const int idx0 = max(0, m_offset[0]-params.first[0]);
488 const int idx1 = max(0, m_offset[1]-params.first[1]);
489 const int idx2 = max(0, m_offset[2]-params.first[2]);
490 // number of values to read
491 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
492 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
493 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
494
495 out.requireWrite();
496 vector<ValueType> values(num0*numComp);
497 const int dpp = out.getNumDataPointsPerSample();
498
499 for (int z=0; z<num2; z++) {
500 for (int y=0; y<num1; y++) {
501 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
502 +(idx2+z)*params.numValues[0]*params.numValues[1]);
503 f.seekg(fileofs*sizeof(ValueType));
504 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
505
506 for (int x=0; x<num0; x++) {
507 const int baseIndex = first0+x*params.multiplier[0]
508 +(first1+y*params.multiplier[1])*myN0
509 +(first2+z*params.multiplier[2])*myN0*myN1;
510 for (int m2=0; m2<params.multiplier[2]; m2++) {
511 for (int m1=0; m1<params.multiplier[1]; m1++) {
512 for (int m0=0; m0<params.multiplier[0]; m0++) {
513 const int dataIndex = baseIndex+m0
514 +m1*myN0
515 +m2*myN0*myN1;
516 double* dest = out.getSampleDataRW(dataIndex);
517 for (int c=0; c<numComp; c++) {
518 ValueType val = values[x*numComp+c];
519
520 if (params.byteOrder != BYTEORDER_NATIVE) {
521 char* cval = reinterpret_cast<char*>(&val);
522 // this will alter val!!
523 byte_swap32(cval);
524 }
525 if (!std::isnan(val)) {
526 for (int q=0; q<dpp; q++) {
527 *dest++ = static_cast<double>(val);
528 }
529 }
530 }
531 }
532 }
533 }
534 }
535 }
536 }
537
538 f.close();
539 }
540
541 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
542 int byteOrder, int dataType) const
543 {
544 // the mapping is not universally correct but should work on our
545 // supported platforms
546 switch (dataType) {
547 case DATATYPE_INT32:
548 writeBinaryGridImpl<int>(in, filename, byteOrder);
549 break;
550 case DATATYPE_FLOAT32:
551 writeBinaryGridImpl<float>(in, filename, byteOrder);
552 break;
553 case DATATYPE_FLOAT64:
554 writeBinaryGridImpl<double>(in, filename, byteOrder);
555 break;
556 default:
557 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
558 }
559 }
560
561 template<typename ValueType>
562 void Brick::writeBinaryGridImpl(const escript::Data& in,
563 const string& filename, int byteOrder) const
564 {
565 // check function space and determine number of points
566 int myN0, myN1, myN2;
567 int totalN0, totalN1, totalN2;
568 if (in.getFunctionSpace().getTypeCode() == Nodes) {
569 myN0 = m_NN[0];
570 myN1 = m_NN[1];
571 myN2 = m_NN[2];
572 totalN0 = m_gNE[0]+1;
573 totalN1 = m_gNE[1]+1;
574 totalN2 = m_gNE[2]+1;
575 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
576 in.getFunctionSpace().getTypeCode() == ReducedElements) {
577 myN0 = m_NE[0];
578 myN1 = m_NE[1];
579 myN2 = m_NE[2];
580 totalN0 = m_gNE[0];
581 totalN1 = m_gNE[1];
582 totalN2 = m_gNE[2];
583 } else
584 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
585
586 const int numComp = in.getDataPointSize();
587 const int dpp = in.getNumDataPointsPerSample();
588 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
589
590 if (numComp > 1 || dpp > 1)
591 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
592
593 // from here on we know that each sample consists of one value
594 FileWriter fw;
595 fw.openFile(filename, fileSize);
596 MPIBarrier();
597
598 for (index_t z=0; z<myN2; z++) {
599 for (index_t y=0; y<myN1; y++) {
600 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
601 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
602 ostringstream oss;
603
604 for (index_t x=0; x<myN0; x++) {
605 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
606 ValueType fvalue = static_cast<ValueType>(*sample);
607 if (byteOrder == BYTEORDER_NATIVE) {
608 oss.write((char*)&fvalue, sizeof(fvalue));
609 } else {
610 char* value = reinterpret_cast<char*>(&fvalue);
611 oss.write(byte_swap32(value), sizeof(fvalue));
612 }
613 }
614 fw.writeAt(oss, fileofs);
615 }
616 }
617 fw.close();
618 }
619
620 void Brick::dump(const string& fileName) const
621 {
622 #if USE_SILO
623 string fn(fileName);
624 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
625 fn+=".silo";
626 }
627
628 int driver=DB_HDF5;
629 string siloPath;
630 DBfile* dbfile = NULL;
631
632 #ifdef ESYS_MPI
633 PMPIO_baton_t* baton = NULL;
634 const int NUM_SILO_FILES = 1;
635 const char* blockDirFmt = "/block%04d";
636 #endif
637
638 if (m_mpiInfo->size > 1) {
639 #ifdef ESYS_MPI
640 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
641 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
642 PMPIO_DefaultClose, (void*)&driver);
643 // try the fallback driver in case of error
644 if (!baton && driver != DB_PDB) {
645 driver = DB_PDB;
646 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
647 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
648 PMPIO_DefaultClose, (void*)&driver);
649 }
650 if (baton) {
651 char str[64];
652 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
653 siloPath = str;
654 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
655 }
656 #endif
657 } else {
658 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
659 getDescription().c_str(), driver);
660 // try the fallback driver in case of error
661 if (!dbfile && driver != DB_PDB) {
662 driver = DB_PDB;
663 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
664 getDescription().c_str(), driver);
665 }
666 }
667
668 if (!dbfile)
669 throw RipleyException("dump: Could not create Silo file");
670
671 /*
672 if (driver==DB_HDF5) {
673 // gzip level 1 already provides good compression with minimal
674 // performance penalty. Some tests showed that gzip levels >3 performed
675 // rather badly on escript data both in terms of time and space
676 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
677 }
678 */
679
680 boost::scoped_ptr<double> x(new double[m_NN[0]]);
681 boost::scoped_ptr<double> y(new double[m_NN[1]]);
682 boost::scoped_ptr<double> z(new double[m_NN[2]]);
683 double* coords[3] = { x.get(), y.get(), z.get() };
684 #pragma omp parallel
685 {
686 #pragma omp for
687 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
688 coords[0][i0]=getLocalCoordinate(i0, 0);
689 }
690 #pragma omp for
691 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
692 coords[1][i1]=getLocalCoordinate(i1, 1);
693 }
694 #pragma omp for
695 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
696 coords[2][i2]=getLocalCoordinate(i2, 2);
697 }
698 }
699 int* dims = const_cast<int*>(getNumNodesPerDim());
700
701 // write mesh
702 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
703 DB_COLLINEAR, NULL);
704
705 // write node ids
706 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
707 NULL, 0, DB_INT, DB_NODECENT, NULL);
708
709 // write element ids
710 dims = const_cast<int*>(getNumElementsPerDim());
711 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
712 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
713
714 // rank 0 writes multimesh and multivar
715 if (m_mpiInfo->rank == 0) {
716 vector<string> tempstrings;
717 vector<char*> names;
718 for (dim_t i=0; i<m_mpiInfo->size; i++) {
719 stringstream path;
720 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
721 tempstrings.push_back(path.str());
722 names.push_back((char*)tempstrings.back().c_str());
723 }
724 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
725 DBSetDir(dbfile, "/");
726 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
727 &types[0], NULL);
728 tempstrings.clear();
729 names.clear();
730 for (dim_t i=0; i<m_mpiInfo->size; i++) {
731 stringstream path;
732 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
733 tempstrings.push_back(path.str());
734 names.push_back((char*)tempstrings.back().c_str());
735 }
736 types.assign(m_mpiInfo->size, DB_QUADVAR);
737 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
738 &types[0], NULL);
739 tempstrings.clear();
740 names.clear();
741 for (dim_t i=0; i<m_mpiInfo->size; i++) {
742 stringstream path;
743 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
744 tempstrings.push_back(path.str());
745 names.push_back((char*)tempstrings.back().c_str());
746 }
747 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
748 &types[0], NULL);
749 }
750
751 if (m_mpiInfo->size > 1) {
752 #ifdef ESYS_MPI
753 PMPIO_HandOffBaton(baton, dbfile);
754 PMPIO_Finish(baton);
755 #endif
756 } else {
757 DBClose(dbfile);
758 }
759
760 #else // USE_SILO
761 throw RipleyException("dump: no Silo support");
762 #endif
763 }
764
765 const int* Brick::borrowSampleReferenceIDs(int fsType) const
766 {
767 switch (fsType) {
768 case Nodes:
769 case ReducedNodes: //FIXME: reduced
770 return &m_nodeId[0];
771 case DegreesOfFreedom:
772 case ReducedDegreesOfFreedom: //FIXME: reduced
773 return &m_dofId[0];
774 case Elements:
775 case ReducedElements:
776 return &m_elementId[0];
777 case FaceElements:
778 case ReducedFaceElements:
779 return &m_faceId[0];
780 default:
781 break;
782 }
783
784 stringstream msg;
785 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
786 throw RipleyException(msg.str());
787 }
788
789 bool Brick::ownSample(int fsType, index_t id) const
790 {
791 if (getMPISize()==1)
792 return true;
793
794 switch (fsType) {
795 case Nodes:
796 case ReducedNodes: //FIXME: reduced
797 return (m_dofMap[id] < getNumDOF());
798 case DegreesOfFreedom:
799 case ReducedDegreesOfFreedom:
800 return true;
801 case Elements:
802 case ReducedElements:
803 {
804 // check ownership of element's _last_ node
805 const index_t x=id%m_NE[0] + 1;
806 const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
807 const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
808 return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
809 }
810 case FaceElements:
811 case ReducedFaceElements:
812 {
813 // check ownership of face element's last node
814 dim_t n=0;
815 for (size_t i=0; i<6; i++) {
816 n+=m_faceCount[i];
817 if (id<n) {
818 const index_t j=id-n+m_faceCount[i];
819 if (i>=4) { // front or back
820 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
821 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
822 } else if (i>=2) { // bottom or top
823 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
824 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
825 } else { // left or right
826 const index_t first=(i==0 ? 0 : m_NN[0]-1);
827 return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
828 }
829 }
830 }
831 return false;
832 }
833 default:
834 break;
835 }
836
837 stringstream msg;
838 msg << "ownSample: invalid function space type " << fsType;
839 throw RipleyException(msg.str());
840 }
841
842 void Brick::setToNormal(escript::Data& out) const
843 {
844 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
845 out.requireWrite();
846 #pragma omp parallel
847 {
848 if (m_faceOffset[0] > -1) {
849 #pragma omp for nowait
850 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
851 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
852 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
853 // set vector at four quadrature points
854 *o++ = -1.; *o++ = 0.; *o++ = 0.;
855 *o++ = -1.; *o++ = 0.; *o++ = 0.;
856 *o++ = -1.; *o++ = 0.; *o++ = 0.;
857 *o++ = -1.; *o++ = 0.; *o = 0.;
858 }
859 }
860 }
861
862 if (m_faceOffset[1] > -1) {
863 #pragma omp for nowait
864 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
865 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
866 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
867 // set vector at four quadrature points
868 *o++ = 1.; *o++ = 0.; *o++ = 0.;
869 *o++ = 1.; *o++ = 0.; *o++ = 0.;
870 *o++ = 1.; *o++ = 0.; *o++ = 0.;
871 *o++ = 1.; *o++ = 0.; *o = 0.;
872 }
873 }
874 }
875
876 if (m_faceOffset[2] > -1) {
877 #pragma omp for nowait
878 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
879 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
880 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
881 // set vector at four quadrature points
882 *o++ = 0.; *o++ = -1.; *o++ = 0.;
883 *o++ = 0.; *o++ = -1.; *o++ = 0.;
884 *o++ = 0.; *o++ = -1.; *o++ = 0.;
885 *o++ = 0.; *o++ = -1.; *o = 0.;
886 }
887 }
888 }
889
890 if (m_faceOffset[3] > -1) {
891 #pragma omp for nowait
892 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
893 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
894 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
895 // set vector at four quadrature points
896 *o++ = 0.; *o++ = 1.; *o++ = 0.;
897 *o++ = 0.; *o++ = 1.; *o++ = 0.;
898 *o++ = 0.; *o++ = 1.; *o++ = 0.;
899 *o++ = 0.; *o++ = 1.; *o = 0.;
900 }
901 }
902 }
903
904 if (m_faceOffset[4] > -1) {
905 #pragma omp for nowait
906 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
907 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
908 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
909 // set vector at four quadrature points
910 *o++ = 0.; *o++ = 0.; *o++ = -1.;
911 *o++ = 0.; *o++ = 0.; *o++ = -1.;
912 *o++ = 0.; *o++ = 0.; *o++ = -1.;
913 *o++ = 0.; *o++ = 0.; *o = -1.;
914 }
915 }
916 }
917
918 if (m_faceOffset[5] > -1) {
919 #pragma omp for nowait
920 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
921 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
922 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
923 // set vector at four quadrature points
924 *o++ = 0.; *o++ = 0.; *o++ = 1.;
925 *o++ = 0.; *o++ = 0.; *o++ = 1.;
926 *o++ = 0.; *o++ = 0.; *o++ = 1.;
927 *o++ = 0.; *o++ = 0.; *o = 1.;
928 }
929 }
930 }
931 } // end of parallel section
932 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
933 out.requireWrite();
934 #pragma omp parallel
935 {
936 if (m_faceOffset[0] > -1) {
937 #pragma omp for nowait
938 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
939 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
940 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
941 *o++ = -1.;
942 *o++ = 0.;
943 *o = 0.;
944 }
945 }
946 }
947
948 if (m_faceOffset[1] > -1) {
949 #pragma omp for nowait
950 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
951 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
952 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
953 *o++ = 1.;
954 *o++ = 0.;
955 *o = 0.;
956 }
957 }
958 }
959
960 if (m_faceOffset[2] > -1) {
961 #pragma omp for nowait
962 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
963 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
964 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
965 *o++ = 0.;
966 *o++ = -1.;
967 *o = 0.;
968 }
969 }
970 }
971
972 if (m_faceOffset[3] > -1) {
973 #pragma omp for nowait
974 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
975 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
976 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
977 *o++ = 0.;
978 *o++ = 1.;
979 *o = 0.;
980 }
981 }
982 }
983
984 if (m_faceOffset[4] > -1) {
985 #pragma omp for nowait
986 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
987 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
988 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
989 *o++ = 0.;
990 *o++ = 0.;
991 *o = -1.;
992 }
993 }
994 }
995
996 if (m_faceOffset[5] > -1) {
997 #pragma omp for nowait
998 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
999 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1000 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1001 *o++ = 0.;
1002 *o++ = 0.;
1003 *o = 1.;
1004 }
1005 }
1006 }
1007 } // end of parallel section
1008
1009 } else {
1010 stringstream msg;
1011 msg << "setToNormal: invalid function space type "
1012 << out.getFunctionSpace().getTypeCode();
1013 throw RipleyException(msg.str());
1014 }
1015 }
1016
1017 void Brick::setToSize(escript::Data& out) const
1018 {
1019 if (out.getFunctionSpace().getTypeCode() == Elements
1020 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1021 out.requireWrite();
1022 const dim_t numQuad=out.getNumDataPointsPerSample();
1023 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1024 #pragma omp parallel for
1025 for (index_t k = 0; k < getNumElements(); ++k) {
1026 double* o = out.getSampleDataRW(k);
1027 fill(o, o+numQuad, size);
1028 }
1029 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1030 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1031 out.requireWrite();
1032 const dim_t numQuad=out.getNumDataPointsPerSample();
1033 #pragma omp parallel
1034 {
1035 if (m_faceOffset[0] > -1) {
1036 const double size=min(m_dx[1],m_dx[2]);
1037 #pragma omp for nowait
1038 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1039 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1040 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1041 fill(o, o+numQuad, size);
1042 }
1043 }
1044 }
1045
1046 if (m_faceOffset[1] > -1) {
1047 const double size=min(m_dx[1],m_dx[2]);
1048 #pragma omp for nowait
1049 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1050 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1051 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1052 fill(o, o+numQuad, size);
1053 }
1054 }
1055 }
1056
1057 if (m_faceOffset[2] > -1) {
1058 const double size=min(m_dx[0],m_dx[2]);
1059 #pragma omp for nowait
1060 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1061 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1062 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1063 fill(o, o+numQuad, size);
1064 }
1065 }
1066 }
1067
1068 if (m_faceOffset[3] > -1) {
1069 const double size=min(m_dx[0],m_dx[2]);
1070 #pragma omp for nowait
1071 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1072 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1073 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1074 fill(o, o+numQuad, size);
1075 }
1076 }
1077 }
1078
1079 if (m_faceOffset[4] > -1) {
1080 const double size=min(m_dx[0],m_dx[1]);
1081 #pragma omp for nowait
1082 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1083 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1084 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1085 fill(o, o+numQuad, size);
1086 }
1087 }
1088 }
1089
1090 if (m_faceOffset[5] > -1) {
1091 const double size=min(m_dx[0],m_dx[1]);
1092 #pragma omp for nowait
1093 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1094 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1095 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1096 fill(o, o+numQuad, size);
1097 }
1098 }
1099 }
1100 } // end of parallel section
1101
1102 } else {
1103 stringstream msg;
1104 msg << "setToSize: invalid function space type "
1105 << out.getFunctionSpace().getTypeCode();
1106 throw RipleyException(msg.str());
1107 }
1108 }
1109
1110 void Brick::Print_Mesh_Info(const bool full) const
1111 {
1112 RipleyDomain::Print_Mesh_Info(full);
1113 if (full) {
1114 cout << " Id Coordinates" << endl;
1115 cout.precision(15);
1116 cout.setf(ios::scientific, ios::floatfield);
1117 for (index_t i=0; i < getNumNodes(); i++) {
1118 cout << " " << setw(5) << m_nodeId[i]
1119 << " " << getLocalCoordinate(i%m_NN[0], 0)
1120 << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1121 << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1122 }
1123 }
1124 }
1125
1126
1127 //protected
1128 void Brick::assembleCoordinates(escript::Data& arg) const
1129 {
1130 escriptDataC x = arg.getDataC();
1131 int numDim = m_numDim;
1132 if (!isDataPointShapeEqual(&x, 1, &numDim))
1133 throw RipleyException("setToX: Invalid Data object shape");
1134 if (!numSamplesEqual(&x, 1, getNumNodes()))
1135 throw RipleyException("setToX: Illegal number of samples in Data object");
1136
1137 arg.requireWrite();
1138 #pragma omp parallel for
1139 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1140 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1141 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1142 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1143 point[0] = getLocalCoordinate(i0, 0);
1144 point[1] = getLocalCoordinate(i1, 1);
1145 point[2] = getLocalCoordinate(i2, 2);
1146 }
1147 }
1148 }
1149 }
1150
1151 //protected
1152 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1153 {
1154 const dim_t numComp = in.getDataPointSize();
1155 const double C0 = .044658198738520451079;
1156 const double C1 = .16666666666666666667;
1157 const double C2 = .21132486540518711775;
1158 const double C3 = .25;
1159 const double C4 = .5;
1160 const double C5 = .62200846792814621559;
1161 const double C6 = .78867513459481288225;
1162
1163 if (out.getFunctionSpace().getTypeCode() == Elements) {
1164 out.requireWrite();
1165 #pragma omp parallel
1166 {
1167 vector<double> f_000(numComp);
1168 vector<double> f_001(numComp);
1169 vector<double> f_010(numComp);
1170 vector<double> f_011(numComp);
1171 vector<double> f_100(numComp);
1172 vector<double> f_101(numComp);
1173 vector<double> f_110(numComp);
1174 vector<double> f_111(numComp);
1175 #pragma omp for
1176 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1177 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1178 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1179 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1180 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1181 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1182 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1183 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1184 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1185 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1186 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1187 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1188 for (index_t i=0; i < numComp; ++i) {
1189 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1190 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1191 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1192 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1193 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1194 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1195 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1196 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1197 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1198 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1199 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1200 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1201 o[INDEX3(i,0,0,numComp,3)] = V0;
1202 o[INDEX3(i,1,0,numComp,3)] = V4;
1203 o[INDEX3(i,2,0,numComp,3)] = V8;
1204 o[INDEX3(i,0,1,numComp,3)] = V0;
1205 o[INDEX3(i,1,1,numComp,3)] = V5;
1206 o[INDEX3(i,2,1,numComp,3)] = V9;
1207 o[INDEX3(i,0,2,numComp,3)] = V1;
1208 o[INDEX3(i,1,2,numComp,3)] = V4;
1209 o[INDEX3(i,2,2,numComp,3)] = V10;
1210 o[INDEX3(i,0,3,numComp,3)] = V1;
1211 o[INDEX3(i,1,3,numComp,3)] = V5;
1212 o[INDEX3(i,2,3,numComp,3)] = V11;
1213 o[INDEX3(i,0,4,numComp,3)] = V2;
1214 o[INDEX3(i,1,4,numComp,3)] = V6;
1215 o[INDEX3(i,2,4,numComp,3)] = V8;
1216 o[INDEX3(i,0,5,numComp,3)] = V2;
1217 o[INDEX3(i,1,5,numComp,3)] = V7;
1218 o[INDEX3(i,2,5,numComp,3)] = V9;
1219 o[INDEX3(i,0,6,numComp,3)] = V3;
1220 o[INDEX3(i,1,6,numComp,3)] = V6;
1221 o[INDEX3(i,2,6,numComp,3)] = V10;
1222 o[INDEX3(i,0,7,numComp,3)] = V3;
1223 o[INDEX3(i,1,7,numComp,3)] = V7;
1224 o[INDEX3(i,2,7,numComp,3)] = V11;
1225 } // end of component loop i
1226 } // end of k0 loop
1227 } // end of k1 loop
1228 } // end of k2 loop
1229 } // end of parallel section
1230 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1231 out.requireWrite();
1232 #pragma omp parallel
1233 {
1234 vector<double> f_000(numComp);
1235 vector<double> f_001(numComp);
1236 vector<double> f_010(numComp);
1237 vector<double> f_011(numComp);
1238 vector<double> f_100(numComp);
1239 vector<double> f_101(numComp);
1240 vector<double> f_110(numComp);
1241 vector<double> f_111(numComp);
1242 #pragma omp for
1243 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1244 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1245 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1246 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1247 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1248 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1249 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1250 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1251 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1252 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1253 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1254 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1255 for (index_t i=0; i < numComp; ++i) {
1256 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1257 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1258 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1259 } // end of component loop i
1260 } // end of k0 loop
1261 } // end of k1 loop
1262 } // end of k2 loop
1263 } // end of parallel section
1264 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1265 out.requireWrite();
1266 #pragma omp parallel
1267 {
1268 vector<double> f_000(numComp);
1269 vector<double> f_001(numComp);
1270 vector<double> f_010(numComp);
1271 vector<double> f_011(numComp);
1272 vector<double> f_100(numComp);
1273 vector<double> f_101(numComp);
1274 vector<double> f_110(numComp);
1275 vector<double> f_111(numComp);
1276 if (m_faceOffset[0] > -1) {
1277 #pragma omp for nowait
1278 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1279 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1280 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1281 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1282 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1283 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1284 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1285 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1286 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1287 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1288 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1289 for (index_t i=0; i < numComp; ++i) {
1290 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1291 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1292 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1293 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1294 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1295 o[INDEX3(i,1,0,numComp,3)] = V0;
1296 o[INDEX3(i,2,0,numComp,3)] = V2;
1297 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1298 o[INDEX3(i,1,1,numComp,3)] = V0;
1299 o[INDEX3(i,2,1,numComp,3)] = V3;
1300 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1301 o[INDEX3(i,1,2,numComp,3)] = V1;
1302 o[INDEX3(i,2,2,numComp,3)] = V2;
1303 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1304 o[INDEX3(i,1,3,numComp,3)] = V1;
1305 o[INDEX3(i,2,3,numComp,3)] = V3;
1306 } // end of component loop i
1307 } // end of k1 loop
1308 } // end of k2 loop
1309 } // end of face 0
1310 if (m_faceOffset[1] > -1) {
1311 #pragma omp for nowait
1312 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1313 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1314 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1315 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1316 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1317 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1318 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1319 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1320 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1321 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1322 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1323 for (index_t i=0; i < numComp; ++i) {
1324 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1325 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1326 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1327 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1328 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1329 o[INDEX3(i,1,0,numComp,3)] = V0;
1330 o[INDEX3(i,2,0,numComp,3)] = V2;
1331 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1332 o[INDEX3(i,1,1,numComp,3)] = V0;
1333 o[INDEX3(i,2,1,numComp,3)] = V3;
1334 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1335 o[INDEX3(i,1,2,numComp,3)] = V1;
1336 o[INDEX3(i,2,2,numComp,3)] = V2;
1337 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1338 o[INDEX3(i,1,3,numComp,3)] = V1;
1339 o[INDEX3(i,2,3,numComp,3)] = V3;
1340 } // end of component loop i
1341 } // end of k1 loop
1342 } // end of k2 loop
1343 } // end of face 1
1344 if (m_faceOffset[2] > -1) {
1345 #pragma omp for nowait
1346 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1347 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1348 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1349 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1350 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1351 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1352 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1353 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1354 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1355 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1356 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1357 for (index_t i=0; i < numComp; ++i) {
1358 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1359 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1360 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1361 o[INDEX3(i,0,0,numComp,3)] = V0;
1362 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1363 o[INDEX3(i,2,0,numComp,3)] = V1;
1364 o[INDEX3(i,0,1,numComp,3)] = V0;
1365 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1366 o[INDEX3(i,2,1,numComp,3)] = V2;
1367 o[INDEX3(i,0,2,numComp,3)] = V0;
1368 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1369 o[INDEX3(i,2,2,numComp,3)] = V1;
1370 o[INDEX3(i,0,3,numComp,3)] = V0;
1371 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1372 o[INDEX3(i,2,3,numComp,3)] = V2;
1373 } // end of component loop i
1374 } // end of k0 loop
1375 } // end of k2 loop
1376 } // end of face 2
1377 if (m_faceOffset[3] > -1) {
1378 #pragma omp for nowait
1379 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1380 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1381 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1382 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1383 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1384 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1385 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1386 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1387 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1388 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1389 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1390 for (index_t i=0; i < numComp; ++i) {
1391 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1392 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1393 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1394 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1395 o[INDEX3(i,0,0,numComp,3)] = V0;
1396 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1397 o[INDEX3(i,2,0,numComp,3)] = V2;
1398 o[INDEX3(i,0,1,numComp,3)] = V0;
1399 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1400 o[INDEX3(i,2,1,numComp,3)] = V3;
1401 o[INDEX3(i,0,2,numComp,3)] = V1;
1402 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1403 o[INDEX3(i,2,2,numComp,3)] = V2;
1404 o[INDEX3(i,0,3,numComp,3)] = V1;
1405 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1406 o[INDEX3(i,2,3,numComp,3)] = V3;
1407 } // end of component loop i
1408 } // end of k0 loop
1409 } // end of k2 loop
1410 } // end of face 3
1411 if (m_faceOffset[4] > -1) {
1412 #pragma omp for nowait
1413 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1414 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1415 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1416 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1417 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1418 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1419 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1420 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1421 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1422 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1423 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1424 for (index_t i=0; i < numComp; ++i) {
1425 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1426 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1427 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1428 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1429 o[INDEX3(i,0,0,numComp,3)] = V0;
1430 o[INDEX3(i,1,0,numComp,3)] = V2;
1431 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1432 o[INDEX3(i,0,1,numComp,3)] = V0;
1433 o[INDEX3(i,1,1,numComp,3)] = V3;
1434 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1435 o[INDEX3(i,0,2,numComp,3)] = V1;
1436 o[INDEX3(i,1,2,numComp,3)] = V2;
1437 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1438 o[INDEX3(i,0,3,numComp,3)] = V1;
1439 o[INDEX3(i,1,3,numComp,3)] = V3;
1440 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1441 } // end of component loop i
1442 } // end of k0 loop
1443 } // end of k1 loop
1444 } // end of face 4
1445 if (m_faceOffset[5] > -1) {
1446 #pragma omp for nowait
1447 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1448 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1449 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1450 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1451 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1452 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1453 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1454 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1455 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1456 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1457 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1458 for (index_t i=0; i < numComp; ++i) {
1459 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1460 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1461 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1462 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1463 o[INDEX3(i,0,0,numComp,3)] = V0;
1464 o[INDEX3(i,1,0,numComp,3)] = V2;
1465 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1466 o[INDEX3(i,0,1,numComp,3)] = V0;
1467 o[INDEX3(i,1,1,numComp,3)] = V3;
1468 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1469 o[INDEX3(i,0,2,numComp,3)] = V1;
1470 o[INDEX3(i,1,2,numComp,3)] = V2;
1471 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1472 o[INDEX3(i,0,3,numComp,3)] = V1;
1473 o[INDEX3(i,1,3,numComp,3)] = V3;
1474 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1475 } // end of component loop i
1476 } // end of k0 loop
1477 } // end of k1 loop
1478 } // end of face 5
1479 } // end of parallel section
1480 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1481 out.requireWrite();
1482 #pragma omp parallel
1483 {
1484 vector<double> f_000(numComp);
1485 vector<double> f_001(numComp);
1486 vector<double> f_010(numComp);
1487 vector<double> f_011(numComp);
1488 vector<double> f_100(numComp);
1489 vector<double> f_101(numComp);
1490 vector<double> f_110(numComp);
1491 vector<double> f_111(numComp);
1492 if (m_faceOffset[0] > -1) {
1493 #pragma omp for nowait
1494 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1495 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1496 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1497 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1498 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1499 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1500 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1501 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1502 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1503 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1504 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1505 for (index_t i=0; i < numComp; ++i) {
1506 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1507 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1508 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1509 } // end of component loop i
1510 } // end of k1 loop
1511 } // end of k2 loop
1512 } // end of face 0
1513 if (m_faceOffset[1] > -1) {
1514 #pragma omp for nowait
1515 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1516 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1517 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1518 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1519 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1520 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1521 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1522 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1523 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1524 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1525 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1526 for (index_t i=0; i < numComp; ++i) {
1527 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1528 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1529 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1530 } // end of component loop i
1531 } // end of k1 loop
1532 } // end of k2 loop
1533 } // end of face 1
1534 if (m_faceOffset[2] > -1) {
1535 #pragma omp for nowait
1536 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1537 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1538 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1539 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1540 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1541 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1542 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1543 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1544 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1545 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1546 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1547 for (index_t i=0; i < numComp; ++i) {
1548 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / m_dx[0];
1549 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1550 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / m_dx[2];
1551 } // end of component loop i
1552 } // end of k0 loop
1553 } // end of k2 loop
1554 } // end of face 2
1555 if (m_faceOffset[3] > -1) {
1556 #pragma omp for nowait
1557 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1558 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1559 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1560 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1561 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1562 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1563 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1564 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1565 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1566 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1567 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1568 for (index_t i=0; i < numComp; ++i) {
1569 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / m_dx[0];
1570 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1571 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / m_dx[2];
1572 } // end of component loop i
1573 } // end of k0 loop
1574 } // end of k2 loop
1575 } // end of face 3
1576 if (m_faceOffset[4] > -1) {
1577 #pragma omp for nowait
1578 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1579 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1580 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1581 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1582 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1583 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1584 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1585 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1586 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1587 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1588 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1589 for (index_t i=0; i < numComp; ++i) {
1590 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / m_dx[0];
1591 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / m_dx[1];
1592 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1593 } // end of component loop i
1594 } // end of k0 loop
1595 } // end of k1 loop
1596 } // end of face 4
1597 if (m_faceOffset[5] > -1) {
1598 #pragma omp for nowait
1599 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1600 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1601 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1602 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1603 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1604 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1605 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1606 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1607 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1608 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1609 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1610 for (index_t i=0; i < numComp; ++i) {
1611 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / m_dx[0];
1612 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / m_dx[1];
1613 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1614 } // end of component loop i
1615 } // end of k0 loop
1616 } // end of k1 loop
1617 } // end of face 5
1618 } // end of parallel section
1619 }
1620 }
1621
1622 //protected
1623 void Brick::assembleIntegrate(vector<double>& integrals, const escript::Data& arg) const
1624 {
1625 const dim_t numComp = arg.getDataPointSize();
1626 const index_t left = (m_offset[0]==0 ? 0 : 1);
1627 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1628 const index_t front = (m_offset[2]==0 ? 0 : 1);
1629 const int fs = arg.getFunctionSpace().getTypeCode();
1630 if (fs == Elements && arg.actsExpanded()) {
1631 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2]/8.;
1632 #pragma omp parallel
1633 {
1634 vector<double> int_local(numComp, 0);
1635 #pragma omp for nowait
1636 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1637 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1638 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1639 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1640 for (index_t i=0; i < numComp; ++i) {
1641 const double f_0 = f[INDEX2(i,0,numComp)];
1642 const double f_1 = f[INDEX2(i,1,numComp)];
1643 const double f_2 = f[INDEX2(i,2,numComp)];
1644 const double f_3 = f[INDEX2(i,3,numComp)];
1645 const double f_4 = f[INDEX2(i,4,numComp)];
1646 const double f_5 = f[INDEX2(i,5,numComp)];
1647 const double f_6 = f[INDEX2(i,6,numComp)];
1648 const double f_7 = f[INDEX2(i,7,numComp)];
1649 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
1650 } // end of component loop i
1651 } // end of k0 loop
1652 } // end of k1 loop
1653 } // end of k2 loop
1654
1655 #pragma omp critical
1656 for (index_t i=0; i<numComp; i++)
1657 integrals[i]+=int_local[i];
1658 } // end of parallel section
1659
1660 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1661 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2];
1662 #pragma omp parallel
1663 {
1664 vector<double> int_local(numComp, 0);
1665 #pragma omp for nowait
1666 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1667 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1668 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1669 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1670 for (index_t i=0; i < numComp; ++i) {
1671 int_local[i]+=f[i]*w_0;
1672 } // end of component loop i
1673 } // end of k0 loop
1674 } // end of k1 loop
1675 } // end of k2 loop
1676
1677 #pragma omp critical
1678 for (index_t i=0; i<numComp; i++)
1679 integrals[i]+=int_local[i];
1680 } // end of parallel section
1681
1682 } else if (fs == FaceElements && arg.actsExpanded()) {
1683 const double w_0 = m_dx[1]*m_dx[2]/4.;
1684 const double w_1 = m_dx[0]*m_dx[2]/4.;
1685 const double w_2 = m_dx[0]*m_dx[1]/4.;
1686 #pragma omp parallel
1687 {
1688 vector<double> int_local(numComp, 0);
1689 if (m_faceOffset[0] > -1) {
1690 #pragma omp for nowait
1691 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1692 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1693 const double* f = arg.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1694 for (index_t i=0; i < numComp; ++i) {
1695 const double f_0 = f[INDEX2(i,0,numComp)];
1696 const double f_1 = f[INDEX2(i,1,numComp)];
1697 const double f_2 = f[INDEX2(i,2,numComp)];
1698 const double f_3 = f[INDEX2(i,3,numComp)];
1699 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
1700 } // end of component loop i
1701 } // end of k1 loop
1702 } // end of k2 loop
1703 }
1704
1705 if (m_faceOffset[1] > -1) {
1706 #pragma omp for nowait
1707 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1708 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1709 const double* f = arg.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1710 for (index_t i=0; i < numComp; ++i) {
1711 const double f_0 = f[INDEX2(i,0,numComp)];
1712 const double f_1 = f[INDEX2(i,1,numComp)];
1713 const double f_2 = f[INDEX2(i,2,numComp)];
1714 const double f_3 = f[INDEX2(i,3,numComp)];
1715 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
1716 } // end of component loop i
1717 } // end of k1 loop
1718 } // end of k2 loop
1719 }
1720
1721 if (m_faceOffset[2] > -1) {
1722 #pragma omp for nowait
1723 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1724 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1725 const double* f = arg.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1726 for (index_t i=0; i < numComp; ++i) {
1727 const double f_0 = f[INDEX2(i,0,numComp)];
1728 const double f_1 = f[INDEX2(i,1,numComp)];
1729 const double f_2 = f[INDEX2(i,2,numComp)];
1730 const double f_3 = f[INDEX2(i,3,numComp)];
1731 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
1732 } // end of component loop i
1733 } // end of k1 loop
1734 } // end of k2 loop
1735 }
1736
1737 if (m_faceOffset[3] > -1) {
1738 #pragma omp for nowait
1739 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1740 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1741 const double* f = arg.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1742 for (index_t i=0; i < numComp; ++i) {
1743 const double f_0 = f[INDEX2(i,0,numComp)];
1744 const double f_1 = f[INDEX2(i,1,numComp)];
1745 const double f_2 = f[INDEX2(i,2,numComp)];
1746 const double f_3 = f[INDEX2(i,3,numComp)];
1747 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
1748 } // end of component loop i
1749 } // end of k1 loop
1750 } // end of k2 loop
1751 }
1752
1753 if (m_faceOffset[4] > -1) {
1754 #pragma omp for nowait
1755 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1756 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1757 const double* f = arg.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1758 for (index_t i=0; i < numComp; ++i) {
1759 const double f_0 = f[INDEX2(i,0,numComp)];
1760 const double f_1 = f[INDEX2(i,1,numComp)];
1761 const double f_2 = f[INDEX2(i,2,numComp)];
1762 const double f_3 = f[INDEX2(i,3,numComp)];
1763 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
1764 } // end of component loop i
1765 } // end of k1 loop
1766 } // end of k2 loop
1767 }
1768
1769 if (m_faceOffset[5] > -1) {
1770 #pragma omp for nowait
1771 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1772 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1773 const double* f = arg.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1774 for (index_t i=0; i < numComp; ++i) {
1775 const double f_0 = f[INDEX2(i,0,numComp)];
1776 const double f_1 = f[INDEX2(i,1,numComp)];
1777 const double f_2 = f[INDEX2(i,2,numComp)];
1778 const double f_3 = f[INDEX2(i,3,numComp)];
1779 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
1780 } // end of component loop i
1781 } // end of k1 loop
1782 } // end of k2 loop
1783 }
1784
1785 #pragma omp critical
1786 for (index_t i=0; i<numComp; i++)
1787 integrals[i]+=int_local[i];
1788 } // end of parallel section
1789
1790 } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded())) {
1791 const double w_0 = m_dx[1]*m_dx[2];
1792 const double w_1 = m_dx[0]*m_dx[2];
1793 const double w_2 = m_dx[0]*m_dx[1];
1794 #pragma omp parallel
1795 {
1796 vector<double> int_local(numComp, 0);
1797 if (m_faceOffset[0] > -1) {
1798 #pragma omp for nowait
1799 for (index_t k2 = front; k2 < front+m_ownNE[2];