/[escript]/trunk/ripley/src/Brick.cpp
ViewVC logotype

Contents of /trunk/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4650 - (show annotations)
Wed Feb 5 04:16:01 2014 UTC (5 years, 2 months ago) by jfenwick
File size: 154351 byte(s)
Fixed a spelling error and missing virtual (not sure that one matters).
Added _untested_ 3D gaussian smoothed random data


1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16 #include <ripley/Brick.h>
17 #include <paso/SystemMatrix.h>
18 #include <esysUtils/esysFileWriter.h>
19 #include <ripley/DefaultAssembler3D.h>
20 #include <ripley/WaveAssembler3D.h>
21 #include <boost/scoped_array.hpp>
22
23 #ifdef USE_NETCDF
24 #include <netcdfcpp.h>
25 #endif
26
27 #if USE_SILO
28 #include <silo.h>
29 #ifdef ESYS_MPI
30 #include <pmpio.h>
31 #endif
32 #endif
33
34 #include <iomanip>
35
36 #include "esysUtils/EsysRandom.h"
37 #include "blocktools.h"
38
39
40 using namespace std;
41 using esysUtils::FileWriter;
42
43 namespace ripley {
44
45 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
46 double x1, double y1, double z1, int d0, int d1, int d2,
47 const std::vector<double>& points, const std::vector<int>& tags,
48 const simap_t& tagnamestonums) :
49 RipleyDomain(3)
50 {
51 // ignore subdivision parameters for serial run
52 if (m_mpiInfo->size == 1) {
53 d0=1;
54 d1=1;
55 d2=1;
56 }
57 bool warn=false;
58 // if number of subdivisions is non-positive, try to subdivide by the same
59 // ratio as the number of elements
60 if (d0<=0 && d1<=0 && d2<=0) {
61 warn=true;
62 d0=(int)(pow(m_mpiInfo->size*(n0+1)*(n0+1)/((float)(n1+1)*(n2+1)), 1.f/3));
63 d0=max(1, d0);
64 d1=max(1, (int)(d0*n1/(float)n0));
65 d2=m_mpiInfo->size/(d0*d1);
66 if (d0*d1*d2 != m_mpiInfo->size) {
67 // ratios not the same so leave "smallest" side undivided and try
68 // dividing 2 sides only
69 if (n0>=n1) {
70 if (n1>=n2) {
71 d0=d1=0;
72 d2=1;
73 } else {
74 d0=d2=0;
75 d1=1;
76 }
77 } else {
78 if (n0>=n2) {
79 d0=d1=0;
80 d2=1;
81 } else {
82 d0=1;
83 d1=d2=0;
84 }
85 }
86 }
87 }
88 if (d0<=0 && d1<=0) {
89 warn=true;
90 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1))));
91 d1=m_mpiInfo->size/d0;
92 if (d0*d1*d2 != m_mpiInfo->size) {
93 // ratios not the same so subdivide side with more elements only
94 if (n0>n1) {
95 d0=0;
96 d1=1;
97 } else {
98 d0=1;
99 d1=0;
100 }
101 }
102 } else if (d0<=0 && d2<=0) {
103 warn=true;
104 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n2+1))));
105 d2=m_mpiInfo->size/d0;
106 if (d0*d1*d2 != m_mpiInfo->size) {
107 // ratios not the same so subdivide side with more elements only
108 if (n0>n2) {
109 d0=0;
110 d2=1;
111 } else {
112 d0=1;
113 d2=0;
114 }
115 }
116 } else if (d1<=0 && d2<=0) {
117 warn=true;
118 d1=max(1, int(sqrt(m_mpiInfo->size*(n1+1)/(float)(n2+1))));
119 d2=m_mpiInfo->size/d1;
120 if (d0*d1*d2 != m_mpiInfo->size) {
121 // ratios not the same so subdivide side with more elements only
122 if (n1>n2) {
123 d1=0;
124 d2=1;
125 } else {
126 d1=1;
127 d2=0;
128 }
129 }
130 }
131 if (d0<=0) {
132 // d1,d2 are preset, determine d0
133 d0=m_mpiInfo->size/(d1*d2);
134 } else if (d1<=0) {
135 // d0,d2 are preset, determine d1
136 d1=m_mpiInfo->size/(d0*d2);
137 } else if (d2<=0) {
138 // d0,d1 are preset, determine d2
139 d2=m_mpiInfo->size/(d0*d1);
140 }
141
142 // ensure number of subdivisions is valid and nodes can be distributed
143 // among number of ranks
144 if (d0*d1*d2 != m_mpiInfo->size)
145 throw RipleyException("Invalid number of spatial subdivisions");
146
147 if (warn) {
148 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
149 << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
150 }
151
152 double l0 = x1-x0;
153 double l1 = y1-y0;
154 double l2 = z1-z0;
155 m_dx[0] = l0/n0;
156 m_dx[1] = l1/n1;
157 m_dx[2] = l2/n2;
158
159 if ((n0+1)%d0 > 0) {
160 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
161 l0=m_dx[0]*n0;
162 cout << "Warning: Adjusted number of elements and length. N0="
163 << n0 << ", l0=" << l0 << endl;
164 }
165 if ((n1+1)%d1 > 0) {
166 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
167 l1=m_dx[1]*n1;
168 cout << "Warning: Adjusted number of elements and length. N1="
169 << n1 << ", l1=" << l1 << endl;
170 }
171 if ((n2+1)%d2 > 0) {
172 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
173 l2=m_dx[2]*n2;
174 cout << "Warning: Adjusted number of elements and length. N2="
175 << n2 << ", l2=" << l2 << endl;
176 }
177
178 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
179 throw RipleyException("Too few elements for the number of ranks");
180
181 m_gNE[0] = n0;
182 m_gNE[1] = n1;
183 m_gNE[2] = n2;
184 m_origin[0] = x0;
185 m_origin[1] = y0;
186 m_origin[2] = z0;
187 m_length[0] = l0;
188 m_length[1] = l1;
189 m_length[2] = l2;
190 m_NX[0] = d0;
191 m_NX[1] = d1;
192 m_NX[2] = d2;
193
194 // local number of elements (including overlap)
195 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
196 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
197 m_NE[0]++;
198 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
199 m_ownNE[0]--;
200
201 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
202 if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
203 m_NE[1]++;
204 else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
205 m_ownNE[1]--;
206
207 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
208 if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
209 m_NE[2]++;
210 else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
211 m_ownNE[2]--;
212
213 // local number of nodes
214 m_NN[0] = m_NE[0]+1;
215 m_NN[1] = m_NE[1]+1;
216 m_NN[2] = m_NE[2]+1;
217
218 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
219 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
220 if (m_offset[0] > 0)
221 m_offset[0]--;
222 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
223 if (m_offset[1] > 0)
224 m_offset[1]--;
225 m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
226 if (m_offset[2] > 0)
227 m_offset[2]--;
228
229 populateSampleIds();
230 createPattern();
231
232 assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
233 for (map<string, int>::const_iterator i = tagnamestonums.begin();
234 i != tagnamestonums.end(); i++) {
235 setTagMap(i->first, i->second);
236 }
237 addPoints(tags.size(), &points[0], &tags[0]);
238 }
239
240
241 Brick::~Brick()
242 {
243 Paso_SystemMatrixPattern_free(m_pattern);
244 Paso_Connector_free(m_connector);
245 delete assembler;
246 }
247
248 string Brick::getDescription() const
249 {
250 return "ripley::Brick";
251 }
252
253 bool Brick::operator==(const AbstractDomain& other) const
254 {
255 const Brick* o=dynamic_cast<const Brick*>(&other);
256 if (o) {
257 return (RipleyDomain::operator==(other) &&
258 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
259 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
260 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
261 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
262 }
263
264 return false;
265 }
266
267 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
268 const ReaderParameters& params) const
269 {
270 #ifdef USE_NETCDF
271 // check destination function space
272 int myN0, myN1, myN2;
273 if (out.getFunctionSpace().getTypeCode() == Nodes) {
274 myN0 = m_NN[0];
275 myN1 = m_NN[1];
276 myN2 = m_NN[2];
277 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
278 out.getFunctionSpace().getTypeCode() == ReducedElements) {
279 myN0 = m_NE[0];
280 myN1 = m_NE[1];
281 myN2 = m_NE[2];
282 } else
283 throw RipleyException("readNcGrid(): invalid function space for output data object");
284
285 if (params.first.size() != 3)
286 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
287
288 if (params.numValues.size() != 3)
289 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
290
291 if (params.multiplier.size() != 3)
292 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
293 for (size_t i=0; i<params.multiplier.size(); i++)
294 if (params.multiplier[i]<1)
295 throw RipleyException("readNcGrid(): all multipliers must be positive");
296
297 // check file existence and size
298 NcFile f(filename.c_str(), NcFile::ReadOnly);
299 if (!f.is_valid())
300 throw RipleyException("readNcGrid(): cannot open file");
301
302 NcVar* var = f.get_var(varname.c_str());
303 if (!var)
304 throw RipleyException("readNcGrid(): invalid variable name");
305
306 // TODO: rank>0 data support
307 const int numComp = out.getDataPointSize();
308 if (numComp > 1)
309 throw RipleyException("readNcGrid(): only scalar data supported");
310
311 const int dims = var->num_dims();
312 boost::scoped_array<long> edges(var->edges());
313
314 // is this a slice of the data object (dims!=3)?
315 // note the expected ordering of edges (as in numpy: z,y,x)
316 if ( (dims==3 && (params.numValues[2] > edges[0] ||
317 params.numValues[1] > edges[1] ||
318 params.numValues[0] > edges[2]))
319 || (dims==2 && params.numValues[2]>1)
320 || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
321 throw RipleyException("readNcGrid(): not enough data in file");
322 }
323
324 // check if this rank contributes anything
325 if (params.first[0] >= m_offset[0]+myN0 ||
326 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
327 params.first[1] >= m_offset[1]+myN1 ||
328 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
329 params.first[2] >= m_offset[2]+myN2 ||
330 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
331 return;
332 }
333
334 // now determine how much this rank has to write
335
336 // first coordinates in data object to write to
337 const int first0 = max(0, params.first[0]-m_offset[0]);
338 const int first1 = max(0, params.first[1]-m_offset[1]);
339 const int first2 = max(0, params.first[2]-m_offset[2]);
340 // indices to first value in file (not accounting for reverse yet)
341 int idx0 = max(0, m_offset[0]-params.first[0]);
342 int idx1 = max(0, m_offset[1]-params.first[1]);
343 int idx2 = max(0, m_offset[2]-params.first[2]);
344 // number of values to read
345 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
346 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
347 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
348
349 // make sure we read the right block if going backwards through file
350 if (params.reverse[0])
351 idx0 = edges[dims-1]-num0-idx0;
352 if (dims>1 && params.reverse[1])
353 idx1 = edges[dims-2]-num1-idx1;
354 if (dims>2 && params.reverse[2])
355 idx2 = edges[dims-3]-num2-idx2;
356
357
358 vector<double> values(num0*num1*num2);
359 if (dims==3) {
360 var->set_cur(idx2, idx1, idx0);
361 var->get(&values[0], num2, num1, num0);
362 } else if (dims==2) {
363 var->set_cur(idx1, idx0);
364 var->get(&values[0], num1, num0);
365 } else {
366 var->set_cur(idx0);
367 var->get(&values[0], num0);
368 }
369
370 const int dpp = out.getNumDataPointsPerSample();
371 out.requireWrite();
372
373 // helpers for reversing
374 const int x0 = (params.reverse[0] ? num0-1 : 0);
375 const int x_mult = (params.reverse[0] ? -1 : 1);
376 const int y0 = (params.reverse[1] ? num1-1 : 0);
377 const int y_mult = (params.reverse[1] ? -1 : 1);
378 const int z0 = (params.reverse[2] ? num2-1 : 0);
379 const int z_mult = (params.reverse[2] ? -1 : 1);
380
381 for (index_t z=0; z<num2; z++) {
382 for (index_t y=0; y<num1; y++) {
383 #pragma omp parallel for
384 for (index_t x=0; x<num0; x++) {
385 const int baseIndex = first0+x*params.multiplier[0]
386 +(first1+y*params.multiplier[1])*myN0
387 +(first2+z*params.multiplier[2])*myN0*myN1;
388 const int srcIndex=(z0+z_mult*z)*num1*num0
389 +(y0+y_mult*y)*num0
390 +(x0+x_mult*x);
391 if (!isnan(values[srcIndex])) {
392 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
393 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
394 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
395 const int dataIndex = baseIndex+m0
396 +m1*myN0
397 +m2*myN0*myN1;
398 double* dest = out.getSampleDataRW(dataIndex);
399 for (index_t q=0; q<dpp; q++) {
400 *dest++ = values[srcIndex];
401 }
402 }
403 }
404 }
405 }
406 }
407 }
408 }
409 #else
410 throw RipleyException("readNcGrid(): not compiled with netCDF support");
411 #endif
412 }
413
414 void Brick::readBinaryGrid(escript::Data& out, string filename,
415 const ReaderParameters& params) const
416 {
417 // the mapping is not universally correct but should work on our
418 // supported platforms
419 switch (params.dataType) {
420 case DATATYPE_INT32:
421 readBinaryGridImpl<int>(out, filename, params);
422 break;
423 case DATATYPE_FLOAT32:
424 readBinaryGridImpl<float>(out, filename, params);
425 break;
426 case DATATYPE_FLOAT64:
427 readBinaryGridImpl<double>(out, filename, params);
428 break;
429 default:
430 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
431 }
432 }
433
434 template<typename ValueType>
435 void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
436 const ReaderParameters& params) const
437 {
438 // check destination function space
439 int myN0, myN1, myN2;
440 if (out.getFunctionSpace().getTypeCode() == Nodes) {
441 myN0 = m_NN[0];
442 myN1 = m_NN[1];
443 myN2 = m_NN[2];
444 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
445 out.getFunctionSpace().getTypeCode() == ReducedElements) {
446 myN0 = m_NE[0];
447 myN1 = m_NE[1];
448 myN2 = m_NE[2];
449 } else
450 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
451
452 if (params.first.size() != 3)
453 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
454
455 if (params.numValues.size() != 3)
456 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
457
458 if (params.multiplier.size() != 3)
459 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
460 for (size_t i=0; i<params.multiplier.size(); i++)
461 if (params.multiplier[i]<1)
462 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
463
464 // check file existence and size
465 ifstream f(filename.c_str(), ifstream::binary);
466 if (f.fail()) {
467 throw RipleyException("readBinaryGrid(): cannot open file");
468 }
469 f.seekg(0, ios::end);
470 const int numComp = out.getDataPointSize();
471 const int filesize = f.tellg();
472 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
473 if (filesize < reqsize) {
474 f.close();
475 throw RipleyException("readBinaryGrid(): not enough data in file");
476 }
477
478 // check if this rank contributes anything
479 if (params.first[0] >= m_offset[0]+myN0 ||
480 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
481 params.first[1] >= m_offset[1]+myN1 ||
482 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
483 params.first[2] >= m_offset[2]+myN2 ||
484 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
485 f.close();
486 return;
487 }
488
489 // now determine how much this rank has to write
490
491 // first coordinates in data object to write to
492 const int first0 = max(0, params.first[0]-m_offset[0]);
493 const int first1 = max(0, params.first[1]-m_offset[1]);
494 const int first2 = max(0, params.first[2]-m_offset[2]);
495 // indices to first value in file
496 const int idx0 = max(0, m_offset[0]-params.first[0]);
497 const int idx1 = max(0, m_offset[1]-params.first[1]);
498 const int idx2 = max(0, m_offset[2]-params.first[2]);
499 // number of values to read
500 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
501 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
502 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
503
504 out.requireWrite();
505 vector<ValueType> values(num0*numComp);
506 const int dpp = out.getNumDataPointsPerSample();
507
508 for (int z=0; z<num2; z++) {
509 for (int y=0; y<num1; y++) {
510 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
511 +(idx2+z)*params.numValues[0]*params.numValues[1]);
512 f.seekg(fileofs*sizeof(ValueType));
513 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
514
515 for (int x=0; x<num0; x++) {
516 const int baseIndex = first0+x*params.multiplier[0]
517 +(first1+y*params.multiplier[1])*myN0
518 +(first2+z*params.multiplier[2])*myN0*myN1;
519 for (int m2=0; m2<params.multiplier[2]; m2++) {
520 for (int m1=0; m1<params.multiplier[1]; m1++) {
521 for (int m0=0; m0<params.multiplier[0]; m0++) {
522 const int dataIndex = baseIndex+m0
523 +m1*myN0
524 +m2*myN0*myN1;
525 double* dest = out.getSampleDataRW(dataIndex);
526 for (int c=0; c<numComp; c++) {
527 ValueType val = values[x*numComp+c];
528
529 if (params.byteOrder != BYTEORDER_NATIVE) {
530 char* cval = reinterpret_cast<char*>(&val);
531 // this will alter val!!
532 byte_swap32(cval);
533 }
534 if (!std::isnan(val)) {
535 for (int q=0; q<dpp; q++) {
536 *dest++ = static_cast<double>(val);
537 }
538 }
539 }
540 }
541 }
542 }
543 }
544 }
545 }
546
547 f.close();
548 }
549
550 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
551 int byteOrder, int dataType) const
552 {
553 // the mapping is not universally correct but should work on our
554 // supported platforms
555 switch (dataType) {
556 case DATATYPE_INT32:
557 writeBinaryGridImpl<int>(in, filename, byteOrder);
558 break;
559 case DATATYPE_FLOAT32:
560 writeBinaryGridImpl<float>(in, filename, byteOrder);
561 break;
562 case DATATYPE_FLOAT64:
563 writeBinaryGridImpl<double>(in, filename, byteOrder);
564 break;
565 default:
566 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
567 }
568 }
569
570 template<typename ValueType>
571 void Brick::writeBinaryGridImpl(const escript::Data& in,
572 const string& filename, int byteOrder) const
573 {
574 // check function space and determine number of points
575 int myN0, myN1, myN2;
576 int totalN0, totalN1, totalN2;
577 if (in.getFunctionSpace().getTypeCode() == Nodes) {
578 myN0 = m_NN[0];
579 myN1 = m_NN[1];
580 myN2 = m_NN[2];
581 totalN0 = m_gNE[0]+1;
582 totalN1 = m_gNE[1]+1;
583 totalN2 = m_gNE[2]+1;
584 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
585 in.getFunctionSpace().getTypeCode() == ReducedElements) {
586 myN0 = m_NE[0];
587 myN1 = m_NE[1];
588 myN2 = m_NE[2];
589 totalN0 = m_gNE[0];
590 totalN1 = m_gNE[1];
591 totalN2 = m_gNE[2];
592 } else
593 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
594
595 const int numComp = in.getDataPointSize();
596 const int dpp = in.getNumDataPointsPerSample();
597 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
598
599 if (numComp > 1 || dpp > 1)
600 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
601
602 // from here on we know that each sample consists of one value
603 FileWriter fw;
604 fw.openFile(filename, fileSize);
605 MPIBarrier();
606
607 for (index_t z=0; z<myN2; z++) {
608 for (index_t y=0; y<myN1; y++) {
609 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
610 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
611 ostringstream oss;
612
613 for (index_t x=0; x<myN0; x++) {
614 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
615 ValueType fvalue = static_cast<ValueType>(*sample);
616 if (byteOrder == BYTEORDER_NATIVE) {
617 oss.write((char*)&fvalue, sizeof(fvalue));
618 } else {
619 char* value = reinterpret_cast<char*>(&fvalue);
620 oss.write(byte_swap32(value), sizeof(fvalue));
621 }
622 }
623 fw.writeAt(oss, fileofs);
624 }
625 }
626 fw.close();
627 }
628
629 void Brick::dump(const string& fileName) const
630 {
631 #if USE_SILO
632 string fn(fileName);
633 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
634 fn+=".silo";
635 }
636
637 int driver=DB_HDF5;
638 string siloPath;
639 DBfile* dbfile = NULL;
640
641 #ifdef ESYS_MPI
642 PMPIO_baton_t* baton = NULL;
643 const int NUM_SILO_FILES = 1;
644 const char* blockDirFmt = "/block%04d";
645 #endif
646
647 if (m_mpiInfo->size > 1) {
648 #ifdef ESYS_MPI
649 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
650 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
651 PMPIO_DefaultClose, (void*)&driver);
652 // try the fallback driver in case of error
653 if (!baton && driver != DB_PDB) {
654 driver = DB_PDB;
655 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
656 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
657 PMPIO_DefaultClose, (void*)&driver);
658 }
659 if (baton) {
660 char str[64];
661 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
662 siloPath = str;
663 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
664 }
665 #endif
666 } else {
667 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
668 getDescription().c_str(), driver);
669 // try the fallback driver in case of error
670 if (!dbfile && driver != DB_PDB) {
671 driver = DB_PDB;
672 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
673 getDescription().c_str(), driver);
674 }
675 }
676
677 if (!dbfile)
678 throw RipleyException("dump: Could not create Silo file");
679
680 /*
681 if (driver==DB_HDF5) {
682 // gzip level 1 already provides good compression with minimal
683 // performance penalty. Some tests showed that gzip levels >3 performed
684 // rather badly on escript data both in terms of time and space
685 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
686 }
687 */
688
689 boost::scoped_ptr<double> x(new double[m_NN[0]]);
690 boost::scoped_ptr<double> y(new double[m_NN[1]]);
691 boost::scoped_ptr<double> z(new double[m_NN[2]]);
692 double* coords[3] = { x.get(), y.get(), z.get() };
693 #pragma omp parallel
694 {
695 #pragma omp for
696 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
697 coords[0][i0]=getLocalCoordinate(i0, 0);
698 }
699 #pragma omp for
700 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
701 coords[1][i1]=getLocalCoordinate(i1, 1);
702 }
703 #pragma omp for
704 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
705 coords[2][i2]=getLocalCoordinate(i2, 2);
706 }
707 }
708 int* dims = const_cast<int*>(getNumNodesPerDim());
709
710 // write mesh
711 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
712 DB_COLLINEAR, NULL);
713
714 // write node ids
715 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
716 NULL, 0, DB_INT, DB_NODECENT, NULL);
717
718 // write element ids
719 dims = const_cast<int*>(getNumElementsPerDim());
720 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
721 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
722
723 // rank 0 writes multimesh and multivar
724 if (m_mpiInfo->rank == 0) {
725 vector<string> tempstrings;
726 vector<char*> names;
727 for (dim_t i=0; i<m_mpiInfo->size; i++) {
728 stringstream path;
729 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
730 tempstrings.push_back(path.str());
731 names.push_back((char*)tempstrings.back().c_str());
732 }
733 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
734 DBSetDir(dbfile, "/");
735 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
736 &types[0], NULL);
737 tempstrings.clear();
738 names.clear();
739 for (dim_t i=0; i<m_mpiInfo->size; i++) {
740 stringstream path;
741 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
742 tempstrings.push_back(path.str());
743 names.push_back((char*)tempstrings.back().c_str());
744 }
745 types.assign(m_mpiInfo->size, DB_QUADVAR);
746 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
747 &types[0], NULL);
748 tempstrings.clear();
749 names.clear();
750 for (dim_t i=0; i<m_mpiInfo->size; i++) {
751 stringstream path;
752 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
753 tempstrings.push_back(path.str());
754 names.push_back((char*)tempstrings.back().c_str());
755 }
756 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
757 &types[0], NULL);
758 }
759
760 if (m_mpiInfo->size > 1) {
761 #ifdef ESYS_MPI
762 PMPIO_HandOffBaton(baton, dbfile);
763 PMPIO_Finish(baton);
764 #endif
765 } else {
766 DBClose(dbfile);
767 }
768
769 #else // USE_SILO
770 throw RipleyException("dump: no Silo support");
771 #endif
772 }
773
774 const int* Brick::borrowSampleReferenceIDs(int fsType) const
775 {
776 switch (fsType) {
777 case Nodes:
778 case ReducedNodes: //FIXME: reduced
779 return &m_nodeId[0];
780 case DegreesOfFreedom:
781 case ReducedDegreesOfFreedom: //FIXME: reduced
782 return &m_dofId[0];
783 case Elements:
784 case ReducedElements:
785 return &m_elementId[0];
786 case FaceElements:
787 case ReducedFaceElements:
788 return &m_faceId[0];
789 default:
790 break;
791 }
792
793 stringstream msg;
794 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
795 throw RipleyException(msg.str());
796 }
797
798 bool Brick::ownSample(int fsType, index_t id) const
799 {
800 if (getMPISize()==1)
801 return true;
802
803 switch (fsType) {
804 case Nodes:
805 case ReducedNodes: //FIXME: reduced
806 return (m_dofMap[id] < getNumDOF());
807 case DegreesOfFreedom:
808 case ReducedDegreesOfFreedom:
809 return true;
810 case Elements:
811 case ReducedElements:
812 {
813 // check ownership of element's _last_ node
814 const index_t x=id%m_NE[0] + 1;
815 const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
816 const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
817 return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
818 }
819 case FaceElements:
820 case ReducedFaceElements:
821 {
822 // check ownership of face element's last node
823 dim_t n=0;
824 for (size_t i=0; i<6; i++) {
825 n+=m_faceCount[i];
826 if (id<n) {
827 const index_t j=id-n+m_faceCount[i];
828 if (i>=4) { // front or back
829 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
830 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
831 } else if (i>=2) { // bottom or top
832 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
833 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
834 } else { // left or right
835 const index_t first=(i==0 ? 0 : m_NN[0]-1);
836 return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
837 }
838 }
839 }
840 return false;
841 }
842 default:
843 break;
844 }
845
846 stringstream msg;
847 msg << "ownSample: invalid function space type " << fsType;
848 throw RipleyException(msg.str());
849 }
850
851 void Brick::setToNormal(escript::Data& out) const
852 {
853 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
854 out.requireWrite();
855 #pragma omp parallel
856 {
857 if (m_faceOffset[0] > -1) {
858 #pragma omp for nowait
859 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
860 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
861 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
862 // set vector at four quadrature points
863 *o++ = -1.; *o++ = 0.; *o++ = 0.;
864 *o++ = -1.; *o++ = 0.; *o++ = 0.;
865 *o++ = -1.; *o++ = 0.; *o++ = 0.;
866 *o++ = -1.; *o++ = 0.; *o = 0.;
867 }
868 }
869 }
870
871 if (m_faceOffset[1] > -1) {
872 #pragma omp for nowait
873 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
874 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
875 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
876 // set vector at four quadrature points
877 *o++ = 1.; *o++ = 0.; *o++ = 0.;
878 *o++ = 1.; *o++ = 0.; *o++ = 0.;
879 *o++ = 1.; *o++ = 0.; *o++ = 0.;
880 *o++ = 1.; *o++ = 0.; *o = 0.;
881 }
882 }
883 }
884
885 if (m_faceOffset[2] > -1) {
886 #pragma omp for nowait
887 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
888 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
889 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
890 // set vector at four quadrature points
891 *o++ = 0.; *o++ = -1.; *o++ = 0.;
892 *o++ = 0.; *o++ = -1.; *o++ = 0.;
893 *o++ = 0.; *o++ = -1.; *o++ = 0.;
894 *o++ = 0.; *o++ = -1.; *o = 0.;
895 }
896 }
897 }
898
899 if (m_faceOffset[3] > -1) {
900 #pragma omp for nowait
901 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
902 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
903 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
904 // set vector at four quadrature points
905 *o++ = 0.; *o++ = 1.; *o++ = 0.;
906 *o++ = 0.; *o++ = 1.; *o++ = 0.;
907 *o++ = 0.; *o++ = 1.; *o++ = 0.;
908 *o++ = 0.; *o++ = 1.; *o = 0.;
909 }
910 }
911 }
912
913 if (m_faceOffset[4] > -1) {
914 #pragma omp for nowait
915 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
916 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
917 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
918 // set vector at four quadrature points
919 *o++ = 0.; *o++ = 0.; *o++ = -1.;
920 *o++ = 0.; *o++ = 0.; *o++ = -1.;
921 *o++ = 0.; *o++ = 0.; *o++ = -1.;
922 *o++ = 0.; *o++ = 0.; *o = -1.;
923 }
924 }
925 }
926
927 if (m_faceOffset[5] > -1) {
928 #pragma omp for nowait
929 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
930 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
931 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
932 // set vector at four quadrature points
933 *o++ = 0.; *o++ = 0.; *o++ = 1.;
934 *o++ = 0.; *o++ = 0.; *o++ = 1.;
935 *o++ = 0.; *o++ = 0.; *o++ = 1.;
936 *o++ = 0.; *o++ = 0.; *o = 1.;
937 }
938 }
939 }
940 } // end of parallel section
941 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
942 out.requireWrite();
943 #pragma omp parallel
944 {
945 if (m_faceOffset[0] > -1) {
946 #pragma omp for nowait
947 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
948 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
949 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
950 *o++ = -1.;
951 *o++ = 0.;
952 *o = 0.;
953 }
954 }
955 }
956
957 if (m_faceOffset[1] > -1) {
958 #pragma omp for nowait
959 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
960 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
961 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
962 *o++ = 1.;
963 *o++ = 0.;
964 *o = 0.;
965 }
966 }
967 }
968
969 if (m_faceOffset[2] > -1) {
970 #pragma omp for nowait
971 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
972 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
973 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
974 *o++ = 0.;
975 *o++ = -1.;
976 *o = 0.;
977 }
978 }
979 }
980
981 if (m_faceOffset[3] > -1) {
982 #pragma omp for nowait
983 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
984 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
985 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
986 *o++ = 0.;
987 *o++ = 1.;
988 *o = 0.;
989 }
990 }
991 }
992
993 if (m_faceOffset[4] > -1) {
994 #pragma omp for nowait
995 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
996 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
997 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
998 *o++ = 0.;
999 *o++ = 0.;
1000 *o = -1.;
1001 }
1002 }
1003 }
1004
1005 if (m_faceOffset[5] > -1) {
1006 #pragma omp for nowait
1007 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1008 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1009 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1010 *o++ = 0.;
1011 *o++ = 0.;
1012 *o = 1.;
1013 }
1014 }
1015 }
1016 } // end of parallel section
1017
1018 } else {
1019 stringstream msg;
1020 msg << "setToNormal: invalid function space type "
1021 << out.getFunctionSpace().getTypeCode();
1022 throw RipleyException(msg.str());
1023 }
1024 }
1025
1026 void Brick::setToSize(escript::Data& out) const
1027 {
1028 if (out.getFunctionSpace().getTypeCode() == Elements
1029 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1030 out.requireWrite();
1031 const dim_t numQuad=out.getNumDataPointsPerSample();
1032 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1033 #pragma omp parallel for
1034 for (index_t k = 0; k < getNumElements(); ++k) {
1035 double* o = out.getSampleDataRW(k);
1036 fill(o, o+numQuad, size);
1037 }
1038 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1039 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1040 out.requireWrite();
1041 const dim_t numQuad=out.getNumDataPointsPerSample();
1042 #pragma omp parallel
1043 {
1044 if (m_faceOffset[0] > -1) {
1045 const double size=min(m_dx[1],m_dx[2]);
1046 #pragma omp for nowait
1047 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1048 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1049 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1050 fill(o, o+numQuad, size);
1051 }
1052 }
1053 }
1054
1055 if (m_faceOffset[1] > -1) {
1056 const double size=min(m_dx[1],m_dx[2]);
1057 #pragma omp for nowait
1058 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1059 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1060 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1061 fill(o, o+numQuad, size);
1062 }
1063 }
1064 }
1065
1066 if (m_faceOffset[2] > -1) {
1067 const double size=min(m_dx[0],m_dx[2]);
1068 #pragma omp for nowait
1069 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1070 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1071 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1072 fill(o, o+numQuad, size);
1073 }
1074 }
1075 }
1076
1077 if (m_faceOffset[3] > -1) {
1078 const double size=min(m_dx[0],m_dx[2]);
1079 #pragma omp for nowait
1080 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1081 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1082 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1083 fill(o, o+numQuad, size);
1084 }
1085 }
1086 }
1087
1088 if (m_faceOffset[4] > -1) {
1089 const double size=min(m_dx[0],m_dx[1]);
1090 #pragma omp for nowait
1091 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1092 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1093 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1094 fill(o, o+numQuad, size);
1095 }
1096 }
1097 }
1098
1099 if (m_faceOffset[5] > -1) {
1100 const double size=min(m_dx[0],m_dx[1]);
1101 #pragma omp for nowait
1102 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1103 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1104 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1105 fill(o, o+numQuad, size);
1106 }
1107 }
1108 }
1109 } // end of parallel section
1110
1111 } else {
1112 stringstream msg;
1113 msg << "setToSize: invalid function space type "
1114 << out.getFunctionSpace().getTypeCode();
1115 throw RipleyException(msg.str());
1116 }
1117 }
1118
1119 void Brick::Print_Mesh_Info(const bool full) const
1120 {
1121 RipleyDomain::Print_Mesh_Info(full);
1122 if (full) {
1123 cout << " Id Coordinates" << endl;
1124 cout.precision(15);
1125 cout.setf(ios::scientific, ios::floatfield);
1126 for (index_t i=0; i < getNumNodes(); i++) {
1127 cout << " " << setw(5) << m_nodeId[i]
1128 << " " << getLocalCoordinate(i%m_NN[0], 0)
1129 << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1130 << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1131 }
1132 }
1133 }
1134
1135
1136 //protected
1137 void Brick::assembleCoordinates(escript::Data& arg) const
1138 {
1139 escriptDataC x = arg.getDataC();
1140 int numDim = m_numDim;
1141 if (!isDataPointShapeEqual(&x, 1, &numDim))
1142 throw RipleyException("setToX: Invalid Data object shape");
1143 if (!numSamplesEqual(&x, 1, getNumNodes()))
1144 throw RipleyException("setToX: Illegal number of samples in Data object");
1145
1146 arg.requireWrite();
1147 #pragma omp parallel for
1148 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1149 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1150 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1151 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1152 point[0] = getLocalCoordinate(i0, 0);
1153 point[1] = getLocalCoordinate(i1, 1);
1154 point[2] = getLocalCoordinate(i2, 2);
1155 }
1156 }
1157 }
1158 }
1159
1160 //protected
1161 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1162 {
1163 const dim_t numComp = in.getDataPointSize();
1164 const double C0 = .044658198738520451079;
1165 const double C1 = .16666666666666666667;
1166 const double C2 = .21132486540518711775;
1167 const double C3 = .25;
1168 const double C4 = .5;
1169 const double C5 = .62200846792814621559;
1170 const double C6 = .78867513459481288225;
1171
1172 if (out.getFunctionSpace().getTypeCode() == Elements) {
1173 out.requireWrite();
1174 #pragma omp parallel
1175 {
1176 vector<double> f_000(numComp);
1177 vector<double> f_001(numComp);
1178 vector<double> f_010(numComp);
1179 vector<double> f_011(numComp);
1180 vector<double> f_100(numComp);
1181 vector<double> f_101(numComp);
1182 vector<double> f_110(numComp);
1183 vector<double> f_111(numComp);
1184 #pragma omp for
1185 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1186 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1187 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1188 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1189 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1190 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1191 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1192 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1193 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1194 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1195 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1196 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1197 for (index_t i=0; i < numComp; ++i) {
1198 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1199 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1200 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1201 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1202 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1203 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1204 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1205 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1206 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1207 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1208 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1209 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1210 o[INDEX3(i,0,0,numComp,3)] = V0;
1211 o[INDEX3(i,1,0,numComp,3)] = V4;
1212 o[INDEX3(i,2,0,numComp,3)] = V8;
1213 o[INDEX3(i,0,1,numComp,3)] = V0;
1214 o[INDEX3(i,1,1,numComp,3)] = V5;
1215 o[INDEX3(i,2,1,numComp,3)] = V9;
1216 o[INDEX3(i,0,2,numComp,3)] = V1;
1217 o[INDEX3(i,1,2,numComp,3)] = V4;
1218 o[INDEX3(i,2,2,numComp,3)] = V10;
1219 o[INDEX3(i,0,3,numComp,3)] = V1;
1220 o[INDEX3(i,1,3,numComp,3)] = V5;
1221 o[INDEX3(i,2,3,numComp,3)] = V11;
1222 o[INDEX3(i,0,4,numComp,3)] = V2;
1223 o[INDEX3(i,1,4,numComp,3)] = V6;
1224 o[INDEX3(i,2,4,numComp,3)] = V8;
1225 o[INDEX3(i,0,5,numComp,3)] = V2;
1226 o[INDEX3(i,1,5,numComp,3)] = V7;
1227 o[INDEX3(i,2,5,numComp,3)] = V9;
1228 o[INDEX3(i,0,6,numComp,3)] = V3;
1229 o[INDEX3(i,1,6,numComp,3)] = V6;
1230 o[INDEX3(i,2,6,numComp,3)] = V10;
1231 o[INDEX3(i,0,7,numComp,3)] = V3;
1232 o[INDEX3(i,1,7,numComp,3)] = V7;
1233 o[INDEX3(i,2,7,numComp,3)] = V11;
1234 } // end of component loop i
1235 } // end of k0 loop
1236 } // end of k1 loop
1237 } // end of k2 loop
1238 } // end of parallel section
1239 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1240 out.requireWrite();
1241 #pragma omp parallel
1242 {
1243 vector<double> f_000(numComp);
1244 vector<double> f_001(numComp);
1245 vector<double> f_010(numComp);
1246 vector<double> f_011(numComp);
1247 vector<double> f_100(numComp);
1248 vector<double> f_101(numComp);
1249 vector<double> f_110(numComp);
1250 vector<double> f_111(numComp);
1251 #pragma omp for
1252 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1253 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1254 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1255 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1256 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1257 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1258 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1259 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1260 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1261 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1262 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1263 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1264 for (index_t i=0; i < numComp; ++i) {
1265 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1266 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1267 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1268 } // end of component loop i
1269 } // end of k0 loop
1270 } // end of k1 loop
1271 } // end of k2 loop
1272 } // end of parallel section
1273 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1274 out.requireWrite();
1275 #pragma omp parallel
1276 {
1277 vector<double> f_000(numComp);
1278 vector<double> f_001(numComp);
1279 vector<double> f_010(numComp);
1280 vector<double> f_011(numComp);
1281 vector<double> f_100(numComp);
1282 vector<double> f_101(numComp);
1283 vector<double> f_110(numComp);
1284 vector<double> f_111(numComp);
1285 if (m_faceOffset[0] > -1) {
1286 #pragma omp for nowait
1287 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1288 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1289 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1290 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1291 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1292 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1293 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1294 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1295 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1296 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1297 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1298 for (index_t i=0; i < numComp; ++i) {
1299 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1300 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1301 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1302 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1303 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1304 o[INDEX3(i,1,0,numComp,3)] = V0;
1305 o[INDEX3(i,2,0,numComp,3)] = V2;
1306 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1307 o[INDEX3(i,1,1,numComp,3)] = V0;
1308 o[INDEX3(i,2,1,numComp,3)] = V3;
1309 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1310 o[INDEX3(i,1,2,numComp,3)] = V1;
1311 o[INDEX3(i,2,2,numComp,3)] = V2;
1312 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1313 o[INDEX3(i,1,3,numComp,3)] = V1;
1314 o[INDEX3(i,2,3,numComp,3)] = V3;
1315 } // end of component loop i
1316 } // end of k1 loop
1317 } // end of k2 loop
1318 } // end of face 0
1319 if (m_faceOffset[1] > -1) {
1320 #pragma omp for nowait
1321 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1322 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1323 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1324 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1325 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1326 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1327 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1328 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1329 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1330 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1331 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1332 for (index_t i=0; i < numComp; ++i) {
1333 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1334 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1335 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1336 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1337 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1338 o[INDEX3(i,1,0,numComp,3)] = V0;
1339 o[INDEX3(i,2,0,numComp,3)] = V2;
1340 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1341 o[INDEX3(i,1,1,numComp,3)] = V0;
1342 o[INDEX3(i,2,1,numComp,3)] = V3;
1343 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1344 o[INDEX3(i,1,2,numComp,3)] = V1;
1345 o[INDEX3(i,2,2,numComp,3)] = V2;
1346 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1347 o[INDEX3(i,1,3,numComp,3)] = V1;
1348 o[INDEX3(i,2,3,numComp,3)] = V3;
1349 } // end of component loop i
1350 } // end of k1 loop
1351 } // end of k2 loop
1352 } // end of face 1
1353 if (m_faceOffset[2] > -1) {
1354 #pragma omp for nowait
1355 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1356 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1357 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1358 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1359 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1360 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1361 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1362 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1363 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1364 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1365 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1366 for (index_t i=0; i < numComp; ++i) {
1367 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1368 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1369 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1370 o[INDEX3(i,0,0,numComp,3)] = V0;
1371 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1372 o[INDEX3(i,2,0,numComp,3)] = V1;
1373 o[INDEX3(i,0,1,numComp,3)] = V0;
1374 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1375 o[INDEX3(i,2,1,numComp,3)] = V2;
1376 o[INDEX3(i,0,2,numComp,3)] = V0;
1377 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1378 o[INDEX3(i,2,2,numComp,3)] = V1;
1379 o[INDEX3(i,0,3,numComp,3)] = V0;
1380 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1381 o[INDEX3(i,2,3,numComp,3)] = V2;
1382 } // end of component loop i
1383 } // end of k0 loop
1384 } // end of k2 loop
1385 } // end of face 2
1386 if (m_faceOffset[3] > -1) {
1387 #pragma omp for nowait
1388 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1389 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1390 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1391 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1392 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1393 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1394 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1395 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1396 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1397 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1398 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1399 for (index_t i=0; i < numComp; ++i) {
1400 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1401 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1402 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1403 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1404 o[INDEX3(i,0,0,numComp,3)] = V0;
1405 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1406 o[INDEX3(i,2,0,numComp,3)] = V2;
1407 o[INDEX3(i,0,1,numComp,3)] = V0;
1408 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1409 o[INDEX3(i,2,1,numComp,3)] = V3;
1410 o[INDEX3(i,0,2,numComp,3)] = V1;
1411 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1412 o[INDEX3(i,2,2,numComp,3)] = V2;
1413 o[INDEX3(i,0,3,numComp,3)] = V1;
1414 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1415 o[INDEX3(i,2,3,numComp,3)] = V3;
1416 } // end of component loop i
1417 } // end of k0 loop
1418 } // end of k2 loop
1419 } // end of face 3
1420 if (m_faceOffset[4] > -1) {
1421 #pragma omp for nowait
1422 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1423 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1424 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1425 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1426 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1427 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1428 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1429 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1430 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1431 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1432 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1433 for (index_t i=0; i < numComp; ++i) {
1434 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1435 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1436 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1437 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1438 o[INDEX3(i,0,0,numComp,3)] = V0;
1439 o[INDEX3(i,1,0,numComp,3)] = V2;
1440 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1441 o[INDEX3(i,0,1,numComp,3)] = V0;
1442 o[INDEX3(i,1,1,numComp,3)] = V3;
1443 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1444 o[INDEX3(i,0,2,numComp,3)] = V1;
1445 o[INDEX3(i,1,2,numComp,3)] = V2;
1446 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1447 o[INDEX3(i,0,3,numComp,3)] = V1;
1448 o[INDEX3(i,1,3,numComp,3)] = V3;
1449 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1450 } // end of component loop i
1451 } // end of k0 loop
1452 } // end of k1 loop
1453 } // end of face 4
1454 if (m_faceOffset[5] > -1) {
1455 #pragma omp for nowait
1456 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1457 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1458 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1459 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1460 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1461 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1462 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1463 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1464 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1465 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1466 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1467 for (index_t i=0; i < numComp; ++i) {
1468 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1469 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1470 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1471 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1472 o[INDEX3(i,0,0,numComp,3)] = V0;
1473 o[INDEX3(i,1,0,numComp,3)] = V2;
1474 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1475 o[INDEX3(i,0,1,numComp,3)] = V0;
1476 o[INDEX3(i,1,1,numComp,3)] = V3;
1477 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1478 o[INDEX3(i,0,2,numComp,3)] = V1;
1479 o[INDEX3(i,1,2,numComp,3)] = V2;
1480 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1481 o[INDEX3(i,0,3,numComp,3)] = V1;
1482 o[INDEX3(i,1,3,numComp,3)] = V3;
1483 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1484 } // end of component loop i
1485 } // end of k0 loop
1486 } // end of k1 loop
1487 } // end of face 5
1488 } // end of parallel section
1489 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1490 out.requireWrite();
1491 #pragma omp parallel
1492 {
1493 vector<double> f_000(numComp);
1494 vector<double> f_001(numComp);
1495 vector<double> f_010(numComp);
1496 vector<double> f_011(numComp);
1497 vector<double> f_100(numComp);
1498 vector<double> f_101(numComp);
1499 vector<double> f_110(numComp);
1500 vector<double> f_111(numComp);
1501 if (m_faceOffset[0] > -1) {
1502 #pragma omp for nowait
1503 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1504 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1505 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1506 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1507 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1508 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1509 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1510 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1511 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1512 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1513 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1514 for (index_t i=0; i < numComp; ++i) {
1515 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1516 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1517 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1518 } // end of component loop i
1519 } // end of k1 loop
1520 } // end of k2 loop
1521 } // end of face 0
1522 if (m_faceOffset[1] > -1) {
1523 #pragma omp for nowait
1524 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1525 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1526 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1527 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1528 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1529 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1530 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1531 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1532 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1533 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1534 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1535 for (index_t i=0; i < numComp; ++i) {
1536 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1537 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1538 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1539 } // end of component loop i
1540 } // end of k1 loop
1541 } // end of k2 loop
1542 } // end of face 1
1543 if (m_faceOffset[2] > -1) {
1544 #pragma omp for nowait
1545 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1546 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1547 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1548 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1549 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1550 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1551 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1552 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1553 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1554 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1555 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1556 for (index_t i=0; i < numComp; ++i) {
1557 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / m_dx[0];
1558 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1559 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / m_dx[2];
1560 } // end of component loop i
1561 } // end of k0 loop
1562 } // end of k2 loop
1563 } // end of face 2
1564 if (m_faceOffset[3] > -1) {
1565 #pragma omp for nowait
1566 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1567 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1568 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1569 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1570 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1571 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1572 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1573 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1574 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1575 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1576 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1577 for (index_t i=0; i < numComp; ++i) {
1578 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / m_dx[0];
1579 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1580 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / m_dx[2];
1581 } // end of component loop i
1582 } // end of k0 loop
1583 } // end of k2 loop
1584 } // end of face 3
1585 if (m_faceOffset[4] > -1) {
1586 #pragma omp for nowait
1587 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1588 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1589 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1590 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1591 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1592 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1593 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1594 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1595 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1596 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1597 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1598 for (index_t i=0; i < numComp; ++i) {
1599 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / m_dx[0];
1600 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / m_dx[1];
1601 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1602 } // end of component loop i
1603 } // end of k0 loop
1604 } // end of k1 loop
1605 } // end of face 4
1606 if (m_faceOffset[5] > -1) {
1607 #pragma omp for nowait
1608 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1609 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1610 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1611 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1612 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1613 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1614 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1615 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1616 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1617 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1618 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1619 for (index_t i=0; i < numComp; ++i) {
1620 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / m_dx[0];
1621 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / m_dx[1];
1622 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1623 } // end of component loop i
1624 } // end of k0 loop
1625 } // end of k1 loop
1626 } // end of face 5
1627 } // end of parallel section
1628 }
1629 }
1630
1631 //protected
1632 void Brick::assembleIntegrate(vector<double>& integrals, const escript::Data& arg) const
1633 {
1634 const dim_t numComp = arg.getDataPointSize();
1635 const index_t left = (m_offset[0]==0 ? 0 : 1);
1636 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1637 const index_t front = (m_offset[2]==0 ? 0 : 1);
1638 const int fs = arg.getFunctionSpace().getTypeCode();
1639 if (fs == Elements && arg.actsExpanded()) {
1640 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2]/8.;
1641 #pragma omp parallel
1642 {
1643 vector<double> int_local(numComp, 0);
1644 #pragma omp for nowait
1645 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1646 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1647 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1648 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1649 for (index_t i=0; i < numComp; ++i) {
1650 const double f_0 = f[INDEX2(i,0,numComp)];
1651 const double f_1 = f[INDEX2(i,1,numComp)];
1652 const double f_2 = f[INDEX2(i,2,numComp)];
1653 const double f_3 = f[INDEX2(i,3,numComp)];
1654 const double f_4 = f[INDEX2(i,4,numComp)];
1655 const double f_5 = f[INDEX2(i,5,numComp)];
1656 const double f_6 = f[INDEX2(i,6,numComp)];
1657 const double f_7 = f[INDEX2(i,7,numComp)];
1658 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
1659 } // end of component loop i
1660 } // end of k0 loop
1661 } // end of k1 loop
1662 } // end of k2 loop
1663
1664 #pragma omp critical
1665 for (index_t i=0; i<numComp; i++)
1666 integrals[i]+=int_local[i];
1667 } // end of parallel section
1668
1669 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1670 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2];
1671 #pragma omp parallel
1672 {
1673 vector<double> int_local(numComp, 0);
1674 #pragma omp for nowait
1675 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1676 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1677 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1678 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1679 for (index_t i=0; i < numComp; ++i) {
1680 int_local[i]+=f[i]*w_0;
1681 } // end of component loop i
1682 } // end of k0 loop
1683 } // end of k1 loop
1684 } // end of k2 loop
1685
1686 #pragma omp critical
1687 for (index_t i=0; i<numComp; i++)
1688 integrals[i]+=int_local[i];
1689 } // end of parallel section
1690
1691 } else if (fs == FaceElements && arg.actsExpanded()) {
1692 const double w_0 = m_dx[1]*m_dx[2]/4.;
1693 const double w_1 = m_dx[0]*m_dx[2]/4.;
1694 const double w_2 = m_dx[0]*m_dx[1]/4.;
1695 #pragma omp parallel
1696 {
1697 vector<double> int_local(numComp, 0);
1698 if (m_faceOffset[0] > -1) {
1699 #pragma omp for nowait
1700 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1701 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1702 const double* f = arg.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1703 for (index_t i=0; i < numComp; ++i) {
1704 const double f_0 = f[INDEX2(i,0,numComp)];
1705 const double f_1 = f[INDEX2(i,1,numComp)];
1706 const double f_2 = f[INDEX2(i,2,numComp)];
1707 const double f_3 = f[INDEX2(i,3,numComp)];
1708 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
1709 } // end of component loop i
1710 } // end of k1 loop
1711 } // end of k2 loop
1712 }
1713
1714 if (m_faceOffset[1] > -1) {
1715 #pragma omp for nowait
1716 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1717 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1718 const double* f = arg.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1719 for (index_t i=0; i < numComp; ++i) {
1720 const double f_0 = f[INDEX2(i,0,numComp)];
1721 const double f_1 = f[INDEX2(i,1,numComp)];
1722 const double f_2 = f[INDEX2(i,2,numComp)];
1723 const double f_3 = f[INDEX2(i,3,numComp)];
1724 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
1725 } // end of component loop i
1726 } // end of k1 loop
1727 } // end of k2 loop
1728 }
1729
1730 if (m_faceOffset[2] > -1) {
1731 #pragma omp for nowait
1732 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1733 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1734 const double* f = arg.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1735 for (index_t i=0; i < numComp; ++i) {
1736 const double f_0 = f[INDEX2(i,0,numComp)];
1737 const double f_1 = f[INDEX2(i,1,numComp)];
1738 const double f_2 = f[INDEX2(i,2,numComp)];
1739 const double f_3 = f[INDEX2(i,3,numComp)];
1740 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
1741 } // end of component loop i
1742 } // end of k1 loop
1743 } // end of k2 loop
1744 }
1745
1746 if (m_faceOffset[3] > -1) {
1747 #pragma omp for nowait
1748 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1749 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1750 const double* f = arg.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1751 for (index_t i=0; i < numComp; ++i) {
1752 const double f_0 = f[INDEX2(i,0,numComp)];
1753 const double f_1 = f[INDEX2(i,1,numComp)];
1754 const double f_2 = f[INDEX2(i,2,numComp)];
1755 const double f_3 = f[INDEX2(i,3,numComp)];
1756 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
1757 } // end of component loop i
1758 } // end of k1 loop
1759 } // end of k2 loop
1760 }
1761
1762 if (m_faceOffset[4] > -1) {
1763 #pragma omp for nowait
1764 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1765 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1766 const double* f = arg.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1767 for (index_t i=0; i < numComp; ++i) {
1768 const double f_0 = f[INDEX2(i,0,numComp)];
1769 const double f_1 = f[INDEX2(i,1,numComp)];
1770 const double f_2 = f[INDEX2(i,2,numComp)];
1771 const double f_3 = f[INDEX2(i,3,numComp)];
1772 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
1773 } // end of component loop i
1774 } // end of k1 loop
1775 } // end of k2 loop
1776 }
1777
1778 if (m_faceOffset[5] > -1) {
1779 #pragma omp for nowait
1780 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1781 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1782 const double* f = arg.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1783 for (index_t i=0; i < numComp; ++i) {
1784 const double f_0 = f[INDEX2(i,0,numComp)];
1785 const double f_1 = f[INDEX2(i,1,numComp)];
1786 const double f_2 = f[INDEX2(i,2,numComp)];
1787 const double f_3 = f[INDEX2(i,3,numComp)];
1788 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
1789 } // end of component loop i
1790 } // end of k1 loop
1791 } // end of k2 loop
1792 }
1793
1794 #pragma omp critical
1795 for (index_t i=0; i<numComp; i++)
1796 integrals[i]+=int_local[i];
1797 } // end of parallel section
1798
1799 } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded())) {
1800 const double w_0 = m_dx[1]*m_dx[2];
1801 const double w_1 = m_dx[0]*m_dx[2];
1802 const double w_2 = m_dx[0]*m_dx[1