/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4633 - (show annotations)
Tue Jan 28 01:04:17 2014 UTC (5 years, 1 month ago) by caltinay
Original Path: trunk/ripley/src/Brick.cpp
File size: 147414 byte(s)
Fixed failure for lazy data when interpolating.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16 #include <ripley/Brick.h>
17 #include <paso/SystemMatrix.h>
18 #include <esysUtils/esysFileWriter.h>
19 #include <ripley/DefaultAssembler3D.h>
20 #include <ripley/WaveAssembler3D.h>
21 #include <boost/scoped_array.hpp>
22
23 #ifdef USE_NETCDF
24 #include <netcdfcpp.h>
25 #endif
26
27 #if USE_SILO
28 #include <silo.h>
29 #ifdef ESYS_MPI
30 #include <pmpio.h>
31 #endif
32 #endif
33
34 #include <iomanip>
35
36 using namespace std;
37 using esysUtils::FileWriter;
38
39 namespace ripley {
40
41 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
42 double x1, double y1, double z1, int d0, int d1, int d2,
43 const std::vector<double>& points, const std::vector<int>& tags,
44 const std::map<std::string, int>& tagnamestonums) :
45 RipleyDomain(3)
46 {
47 // ignore subdivision parameters for serial run
48 if (m_mpiInfo->size == 1) {
49 d0=1;
50 d1=1;
51 d2=1;
52 }
53 bool warn=false;
54 // if number of subdivisions is non-positive, try to subdivide by the same
55 // ratio as the number of elements
56 if (d0<=0 && d1<=0 && d2<=0) {
57 warn=true;
58 d0=(int)(pow(m_mpiInfo->size*(n0+1)*(n0+1)/((float)(n1+1)*(n2+1)), 1.f/3));
59 d0=max(1, d0);
60 d1=max(1, (int)(d0*n1/(float)n0));
61 d2=m_mpiInfo->size/(d0*d1);
62 if (d0*d1*d2 != m_mpiInfo->size) {
63 // ratios not the same so leave "smallest" side undivided and try
64 // dividing 2 sides only
65 if (n0>=n1) {
66 if (n1>=n2) {
67 d0=d1=0;
68 d2=1;
69 } else {
70 d0=d2=0;
71 d1=1;
72 }
73 } else {
74 if (n0>=n2) {
75 d0=d1=0;
76 d2=1;
77 } else {
78 d0=1;
79 d1=d2=0;
80 }
81 }
82 }
83 }
84 if (d0<=0 && d1<=0) {
85 warn=true;
86 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1))));
87 d1=m_mpiInfo->size/d0;
88 if (d0*d1*d2 != m_mpiInfo->size) {
89 // ratios not the same so subdivide side with more elements only
90 if (n0>n1) {
91 d0=0;
92 d1=1;
93 } else {
94 d0=1;
95 d1=0;
96 }
97 }
98 } else if (d0<=0 && d2<=0) {
99 warn=true;
100 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n2+1))));
101 d2=m_mpiInfo->size/d0;
102 if (d0*d1*d2 != m_mpiInfo->size) {
103 // ratios not the same so subdivide side with more elements only
104 if (n0>n2) {
105 d0=0;
106 d2=1;
107 } else {
108 d0=1;
109 d2=0;
110 }
111 }
112 } else if (d1<=0 && d2<=0) {
113 warn=true;
114 d1=max(1, int(sqrt(m_mpiInfo->size*(n1+1)/(float)(n2+1))));
115 d2=m_mpiInfo->size/d1;
116 if (d0*d1*d2 != m_mpiInfo->size) {
117 // ratios not the same so subdivide side with more elements only
118 if (n1>n2) {
119 d1=0;
120 d2=1;
121 } else {
122 d1=1;
123 d2=0;
124 }
125 }
126 }
127 if (d0<=0) {
128 // d1,d2 are preset, determine d0
129 d0=m_mpiInfo->size/(d1*d2);
130 } else if (d1<=0) {
131 // d0,d2 are preset, determine d1
132 d1=m_mpiInfo->size/(d0*d2);
133 } else if (d2<=0) {
134 // d0,d1 are preset, determine d2
135 d2=m_mpiInfo->size/(d0*d1);
136 }
137
138 // ensure number of subdivisions is valid and nodes can be distributed
139 // among number of ranks
140 if (d0*d1*d2 != m_mpiInfo->size)
141 throw RipleyException("Invalid number of spatial subdivisions");
142
143 if (warn) {
144 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
145 << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
146 }
147
148 double l0 = x1-x0;
149 double l1 = y1-y0;
150 double l2 = z1-z0;
151 m_dx[0] = l0/n0;
152 m_dx[1] = l1/n1;
153 m_dx[2] = l2/n2;
154
155 if ((n0+1)%d0 > 0) {
156 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
157 l0=m_dx[0]*n0;
158 cout << "Warning: Adjusted number of elements and length. N0="
159 << n0 << ", l0=" << l0 << endl;
160 }
161 if ((n1+1)%d1 > 0) {
162 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
163 l1=m_dx[1]*n1;
164 cout << "Warning: Adjusted number of elements and length. N1="
165 << n1 << ", l1=" << l1 << endl;
166 }
167 if ((n2+1)%d2 > 0) {
168 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
169 l2=m_dx[2]*n2;
170 cout << "Warning: Adjusted number of elements and length. N2="
171 << n2 << ", l2=" << l2 << endl;
172 }
173
174 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
175 throw RipleyException("Too few elements for the number of ranks");
176
177 m_gNE[0] = n0;
178 m_gNE[1] = n1;
179 m_gNE[2] = n2;
180 m_origin[0] = x0;
181 m_origin[1] = y0;
182 m_origin[2] = z0;
183 m_length[0] = l0;
184 m_length[1] = l1;
185 m_length[2] = l2;
186 m_NX[0] = d0;
187 m_NX[1] = d1;
188 m_NX[2] = d2;
189
190 // local number of elements (including overlap)
191 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
192 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
193 m_NE[0]++;
194 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
195 m_ownNE[0]--;
196
197 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
198 if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
199 m_NE[1]++;
200 else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
201 m_ownNE[1]--;
202
203 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
204 if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
205 m_NE[2]++;
206 else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
207 m_ownNE[2]--;
208
209 // local number of nodes
210 m_NN[0] = m_NE[0]+1;
211 m_NN[1] = m_NE[1]+1;
212 m_NN[2] = m_NE[2]+1;
213
214 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
215 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
216 if (m_offset[0] > 0)
217 m_offset[0]--;
218 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
219 if (m_offset[1] > 0)
220 m_offset[1]--;
221 m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
222 if (m_offset[2] > 0)
223 m_offset[2]--;
224
225 populateSampleIds();
226 createPattern();
227
228 assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
229 for (map<string, int>::const_iterator i = tagnamestonums.begin();
230 i != tagnamestonums.end(); i++) {
231 setTagMap(i->first, i->second);
232 }
233 addPoints(tags.size(), &points[0], &tags[0]);
234 }
235
236
237 Brick::~Brick()
238 {
239 Paso_SystemMatrixPattern_free(m_pattern);
240 Paso_Connector_free(m_connector);
241 delete assembler;
242 }
243
244 string Brick::getDescription() const
245 {
246 return "ripley::Brick";
247 }
248
249 bool Brick::operator==(const AbstractDomain& other) const
250 {
251 const Brick* o=dynamic_cast<const Brick*>(&other);
252 if (o) {
253 return (RipleyDomain::operator==(other) &&
254 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
255 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
256 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
257 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
258 }
259
260 return false;
261 }
262
263 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
264 const ReaderParameters& params) const
265 {
266 #ifdef USE_NETCDF
267 // check destination function space
268 int myN0, myN1, myN2;
269 if (out.getFunctionSpace().getTypeCode() == Nodes) {
270 myN0 = m_NN[0];
271 myN1 = m_NN[1];
272 myN2 = m_NN[2];
273 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
274 out.getFunctionSpace().getTypeCode() == ReducedElements) {
275 myN0 = m_NE[0];
276 myN1 = m_NE[1];
277 myN2 = m_NE[2];
278 } else
279 throw RipleyException("readNcGrid(): invalid function space for output data object");
280
281 if (params.first.size() != 3)
282 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
283
284 if (params.numValues.size() != 3)
285 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
286
287 if (params.multiplier.size() != 3)
288 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
289 for (size_t i=0; i<params.multiplier.size(); i++)
290 if (params.multiplier[i]<1)
291 throw RipleyException("readNcGrid(): all multipliers must be positive");
292
293 // check file existence and size
294 NcFile f(filename.c_str(), NcFile::ReadOnly);
295 if (!f.is_valid())
296 throw RipleyException("readNcGrid(): cannot open file");
297
298 NcVar* var = f.get_var(varname.c_str());
299 if (!var)
300 throw RipleyException("readNcGrid(): invalid variable name");
301
302 // TODO: rank>0 data support
303 const int numComp = out.getDataPointSize();
304 if (numComp > 1)
305 throw RipleyException("readNcGrid(): only scalar data supported");
306
307 const int dims = var->num_dims();
308 boost::scoped_array<long> edges(var->edges());
309
310 // is this a slice of the data object (dims!=3)?
311 // note the expected ordering of edges (as in numpy: z,y,x)
312 if ( (dims==3 && (params.numValues[2] > edges[0] ||
313 params.numValues[1] > edges[1] ||
314 params.numValues[0] > edges[2]))
315 || (dims==2 && params.numValues[2]>1)
316 || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
317 throw RipleyException("readNcGrid(): not enough data in file");
318 }
319
320 // check if this rank contributes anything
321 if (params.first[0] >= m_offset[0]+myN0 ||
322 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
323 params.first[1] >= m_offset[1]+myN1 ||
324 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
325 params.first[2] >= m_offset[2]+myN2 ||
326 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
327 return;
328 }
329
330 // now determine how much this rank has to write
331
332 // first coordinates in data object to write to
333 const int first0 = max(0, params.first[0]-m_offset[0]);
334 const int first1 = max(0, params.first[1]-m_offset[1]);
335 const int first2 = max(0, params.first[2]-m_offset[2]);
336 // indices to first value in file (not accounting for reverse yet)
337 int idx0 = max(0, m_offset[0]-params.first[0]);
338 int idx1 = max(0, m_offset[1]-params.first[1]);
339 int idx2 = max(0, m_offset[2]-params.first[2]);
340 // number of values to read
341 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
342 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
343 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
344
345 // make sure we read the right block if going backwards through file
346 if (params.reverse[0])
347 idx0 = edges[dims-1]-num0-idx0;
348 if (dims>1 && params.reverse[1])
349 idx1 = edges[dims-2]-num1-idx1;
350 if (dims>2 && params.reverse[2])
351 idx2 = edges[dims-3]-num2-idx2;
352
353
354 vector<double> values(num0*num1*num2);
355 if (dims==3) {
356 var->set_cur(idx2, idx1, idx0);
357 var->get(&values[0], num2, num1, num0);
358 } else if (dims==2) {
359 var->set_cur(idx1, idx0);
360 var->get(&values[0], num1, num0);
361 } else {
362 var->set_cur(idx0);
363 var->get(&values[0], num0);
364 }
365
366 const int dpp = out.getNumDataPointsPerSample();
367 out.requireWrite();
368
369 // helpers for reversing
370 const int x0 = (params.reverse[0] ? num0-1 : 0);
371 const int x_mult = (params.reverse[0] ? -1 : 1);
372 const int y0 = (params.reverse[1] ? num1-1 : 0);
373 const int y_mult = (params.reverse[1] ? -1 : 1);
374 const int z0 = (params.reverse[2] ? num2-1 : 0);
375 const int z_mult = (params.reverse[2] ? -1 : 1);
376
377 for (index_t z=0; z<num2; z++) {
378 for (index_t y=0; y<num1; y++) {
379 #pragma omp parallel for
380 for (index_t x=0; x<num0; x++) {
381 const int baseIndex = first0+x*params.multiplier[0]
382 +(first1+y*params.multiplier[1])*myN0
383 +(first2+z*params.multiplier[2])*myN0*myN1;
384 const int srcIndex=(z0+z_mult*z)*num1*num0
385 +(y0+y_mult*y)*num0
386 +(x0+x_mult*x);
387 if (!isnan(values[srcIndex])) {
388 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
389 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
390 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
391 const int dataIndex = baseIndex+m0
392 +m1*myN0
393 +m2*myN0*myN1;
394 double* dest = out.getSampleDataRW(dataIndex);
395 for (index_t q=0; q<dpp; q++) {
396 *dest++ = values[srcIndex];
397 }
398 }
399 }
400 }
401 }
402 }
403 }
404 }
405 #else
406 throw RipleyException("readNcGrid(): not compiled with netCDF support");
407 #endif
408 }
409
410 void Brick::readBinaryGrid(escript::Data& out, string filename,
411 const ReaderParameters& params) const
412 {
413 // the mapping is not universally correct but should work on our
414 // supported platforms
415 switch (params.dataType) {
416 case DATATYPE_INT32:
417 readBinaryGridImpl<int>(out, filename, params);
418 break;
419 case DATATYPE_FLOAT32:
420 readBinaryGridImpl<float>(out, filename, params);
421 break;
422 case DATATYPE_FLOAT64:
423 readBinaryGridImpl<double>(out, filename, params);
424 break;
425 default:
426 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
427 }
428 }
429
430 template<typename ValueType>
431 void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
432 const ReaderParameters& params) const
433 {
434 // check destination function space
435 int myN0, myN1, myN2;
436 if (out.getFunctionSpace().getTypeCode() == Nodes) {
437 myN0 = m_NN[0];
438 myN1 = m_NN[1];
439 myN2 = m_NN[2];
440 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
441 out.getFunctionSpace().getTypeCode() == ReducedElements) {
442 myN0 = m_NE[0];
443 myN1 = m_NE[1];
444 myN2 = m_NE[2];
445 } else
446 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
447
448 if (params.first.size() != 3)
449 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
450
451 if (params.numValues.size() != 3)
452 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
453
454 if (params.multiplier.size() != 3)
455 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
456 for (size_t i=0; i<params.multiplier.size(); i++)
457 if (params.multiplier[i]<1)
458 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
459
460 // check file existence and size
461 ifstream f(filename.c_str(), ifstream::binary);
462 if (f.fail()) {
463 throw RipleyException("readBinaryGrid(): cannot open file");
464 }
465 f.seekg(0, ios::end);
466 const int numComp = out.getDataPointSize();
467 const int filesize = f.tellg();
468 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
469 if (filesize < reqsize) {
470 f.close();
471 throw RipleyException("readBinaryGrid(): not enough data in file");
472 }
473
474 // check if this rank contributes anything
475 if (params.first[0] >= m_offset[0]+myN0 ||
476 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
477 params.first[1] >= m_offset[1]+myN1 ||
478 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
479 params.first[2] >= m_offset[2]+myN2 ||
480 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
481 f.close();
482 return;
483 }
484
485 // now determine how much this rank has to write
486
487 // first coordinates in data object to write to
488 const int first0 = max(0, params.first[0]-m_offset[0]);
489 const int first1 = max(0, params.first[1]-m_offset[1]);
490 const int first2 = max(0, params.first[2]-m_offset[2]);
491 // indices to first value in file
492 const int idx0 = max(0, m_offset[0]-params.first[0]);
493 const int idx1 = max(0, m_offset[1]-params.first[1]);
494 const int idx2 = max(0, m_offset[2]-params.first[2]);
495 // number of values to read
496 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
497 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
498 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
499
500 out.requireWrite();
501 vector<ValueType> values(num0*numComp);
502 const int dpp = out.getNumDataPointsPerSample();
503
504 for (int z=0; z<num2; z++) {
505 for (int y=0; y<num1; y++) {
506 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
507 +(idx2+z)*params.numValues[0]*params.numValues[1]);
508 f.seekg(fileofs*sizeof(ValueType));
509 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
510
511 for (int x=0; x<num0; x++) {
512 const int baseIndex = first0+x*params.multiplier[0]
513 +(first1+y*params.multiplier[1])*myN0
514 +(first2+z*params.multiplier[2])*myN0*myN1;
515 for (int m2=0; m2<params.multiplier[2]; m2++) {
516 for (int m1=0; m1<params.multiplier[1]; m1++) {
517 for (int m0=0; m0<params.multiplier[0]; m0++) {
518 const int dataIndex = baseIndex+m0
519 +m1*myN0
520 +m2*myN0*myN1;
521 double* dest = out.getSampleDataRW(dataIndex);
522 for (int c=0; c<numComp; c++) {
523 ValueType val = values[x*numComp+c];
524
525 if (params.byteOrder != BYTEORDER_NATIVE) {
526 char* cval = reinterpret_cast<char*>(&val);
527 // this will alter val!!
528 byte_swap32(cval);
529 }
530 if (!std::isnan(val)) {
531 for (int q=0; q<dpp; q++) {
532 *dest++ = static_cast<double>(val);
533 }
534 }
535 }
536 }
537 }
538 }
539 }
540 }
541 }
542
543 f.close();
544 }
545
546 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
547 int byteOrder, int dataType) const
548 {
549 // the mapping is not universally correct but should work on our
550 // supported platforms
551 switch (dataType) {
552 case DATATYPE_INT32:
553 writeBinaryGridImpl<int>(in, filename, byteOrder);
554 break;
555 case DATATYPE_FLOAT32:
556 writeBinaryGridImpl<float>(in, filename, byteOrder);
557 break;
558 case DATATYPE_FLOAT64:
559 writeBinaryGridImpl<double>(in, filename, byteOrder);
560 break;
561 default:
562 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
563 }
564 }
565
566 template<typename ValueType>
567 void Brick::writeBinaryGridImpl(const escript::Data& in,
568 const string& filename, int byteOrder) const
569 {
570 // check function space and determine number of points
571 int myN0, myN1, myN2;
572 int totalN0, totalN1, totalN2;
573 if (in.getFunctionSpace().getTypeCode() == Nodes) {
574 myN0 = m_NN[0];
575 myN1 = m_NN[1];
576 myN2 = m_NN[2];
577 totalN0 = m_gNE[0]+1;
578 totalN1 = m_gNE[1]+1;
579 totalN2 = m_gNE[2]+1;
580 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
581 in.getFunctionSpace().getTypeCode() == ReducedElements) {
582 myN0 = m_NE[0];
583 myN1 = m_NE[1];
584 myN2 = m_NE[2];
585 totalN0 = m_gNE[0];
586 totalN1 = m_gNE[1];
587 totalN2 = m_gNE[2];
588 } else
589 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
590
591 const int numComp = in.getDataPointSize();
592 const int dpp = in.getNumDataPointsPerSample();
593 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
594
595 if (numComp > 1 || dpp > 1)
596 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
597
598 // from here on we know that each sample consists of one value
599 FileWriter fw;
600 fw.openFile(filename, fileSize);
601 MPIBarrier();
602
603 for (index_t z=0; z<myN2; z++) {
604 for (index_t y=0; y<myN1; y++) {
605 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
606 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
607 ostringstream oss;
608
609 for (index_t x=0; x<myN0; x++) {
610 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
611 ValueType fvalue = static_cast<ValueType>(*sample);
612 if (byteOrder == BYTEORDER_NATIVE) {
613 oss.write((char*)&fvalue, sizeof(fvalue));
614 } else {
615 char* value = reinterpret_cast<char*>(&fvalue);
616 oss.write(byte_swap32(value), sizeof(fvalue));
617 }
618 }
619 fw.writeAt(oss, fileofs);
620 }
621 }
622 fw.close();
623 }
624
625 void Brick::dump(const string& fileName) const
626 {
627 #if USE_SILO
628 string fn(fileName);
629 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
630 fn+=".silo";
631 }
632
633 int driver=DB_HDF5;
634 string siloPath;
635 DBfile* dbfile = NULL;
636
637 #ifdef ESYS_MPI
638 PMPIO_baton_t* baton = NULL;
639 const int NUM_SILO_FILES = 1;
640 const char* blockDirFmt = "/block%04d";
641 #endif
642
643 if (m_mpiInfo->size > 1) {
644 #ifdef ESYS_MPI
645 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
646 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
647 PMPIO_DefaultClose, (void*)&driver);
648 // try the fallback driver in case of error
649 if (!baton && driver != DB_PDB) {
650 driver = DB_PDB;
651 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
652 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
653 PMPIO_DefaultClose, (void*)&driver);
654 }
655 if (baton) {
656 char str[64];
657 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
658 siloPath = str;
659 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
660 }
661 #endif
662 } else {
663 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
664 getDescription().c_str(), driver);
665 // try the fallback driver in case of error
666 if (!dbfile && driver != DB_PDB) {
667 driver = DB_PDB;
668 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
669 getDescription().c_str(), driver);
670 }
671 }
672
673 if (!dbfile)
674 throw RipleyException("dump: Could not create Silo file");
675
676 /*
677 if (driver==DB_HDF5) {
678 // gzip level 1 already provides good compression with minimal
679 // performance penalty. Some tests showed that gzip levels >3 performed
680 // rather badly on escript data both in terms of time and space
681 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
682 }
683 */
684
685 boost::scoped_ptr<double> x(new double[m_NN[0]]);
686 boost::scoped_ptr<double> y(new double[m_NN[1]]);
687 boost::scoped_ptr<double> z(new double[m_NN[2]]);
688 double* coords[3] = { x.get(), y.get(), z.get() };
689 #pragma omp parallel
690 {
691 #pragma omp for
692 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
693 coords[0][i0]=getLocalCoordinate(i0, 0);
694 }
695 #pragma omp for
696 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
697 coords[1][i1]=getLocalCoordinate(i1, 1);
698 }
699 #pragma omp for
700 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
701 coords[2][i2]=getLocalCoordinate(i2, 2);
702 }
703 }
704 int* dims = const_cast<int*>(getNumNodesPerDim());
705
706 // write mesh
707 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
708 DB_COLLINEAR, NULL);
709
710 // write node ids
711 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
712 NULL, 0, DB_INT, DB_NODECENT, NULL);
713
714 // write element ids
715 dims = const_cast<int*>(getNumElementsPerDim());
716 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
717 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
718
719 // rank 0 writes multimesh and multivar
720 if (m_mpiInfo->rank == 0) {
721 vector<string> tempstrings;
722 vector<char*> names;
723 for (dim_t i=0; i<m_mpiInfo->size; i++) {
724 stringstream path;
725 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
726 tempstrings.push_back(path.str());
727 names.push_back((char*)tempstrings.back().c_str());
728 }
729 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
730 DBSetDir(dbfile, "/");
731 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
732 &types[0], NULL);
733 tempstrings.clear();
734 names.clear();
735 for (dim_t i=0; i<m_mpiInfo->size; i++) {
736 stringstream path;
737 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
738 tempstrings.push_back(path.str());
739 names.push_back((char*)tempstrings.back().c_str());
740 }
741 types.assign(m_mpiInfo->size, DB_QUADVAR);
742 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
743 &types[0], NULL);
744 tempstrings.clear();
745 names.clear();
746 for (dim_t i=0; i<m_mpiInfo->size; i++) {
747 stringstream path;
748 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
749 tempstrings.push_back(path.str());
750 names.push_back((char*)tempstrings.back().c_str());
751 }
752 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
753 &types[0], NULL);
754 }
755
756 if (m_mpiInfo->size > 1) {
757 #ifdef ESYS_MPI
758 PMPIO_HandOffBaton(baton, dbfile);
759 PMPIO_Finish(baton);
760 #endif
761 } else {
762 DBClose(dbfile);
763 }
764
765 #else // USE_SILO
766 throw RipleyException("dump: no Silo support");
767 #endif
768 }
769
770 const int* Brick::borrowSampleReferenceIDs(int fsType) const
771 {
772 switch (fsType) {
773 case Nodes:
774 case ReducedNodes: //FIXME: reduced
775 return &m_nodeId[0];
776 case DegreesOfFreedom:
777 case ReducedDegreesOfFreedom: //FIXME: reduced
778 return &m_dofId[0];
779 case Elements:
780 case ReducedElements:
781 return &m_elementId[0];
782 case FaceElements:
783 case ReducedFaceElements:
784 return &m_faceId[0];
785 default:
786 break;
787 }
788
789 stringstream msg;
790 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
791 throw RipleyException(msg.str());
792 }
793
794 bool Brick::ownSample(int fsType, index_t id) const
795 {
796 if (getMPISize()==1)
797 return true;
798
799 switch (fsType) {
800 case Nodes:
801 case ReducedNodes: //FIXME: reduced
802 return (m_dofMap[id] < getNumDOF());
803 case DegreesOfFreedom:
804 case ReducedDegreesOfFreedom:
805 return true;
806 case Elements:
807 case ReducedElements:
808 {
809 // check ownership of element's _last_ node
810 const index_t x=id%m_NE[0] + 1;
811 const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
812 const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
813 return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
814 }
815 case FaceElements:
816 case ReducedFaceElements:
817 {
818 // check ownership of face element's last node
819 dim_t n=0;
820 for (size_t i=0; i<6; i++) {
821 n+=m_faceCount[i];
822 if (id<n) {
823 const index_t j=id-n+m_faceCount[i];
824 if (i>=4) { // front or back
825 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
826 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
827 } else if (i>=2) { // bottom or top
828 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
829 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
830 } else { // left or right
831 const index_t first=(i==0 ? 0 : m_NN[0]-1);
832 return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
833 }
834 }
835 }
836 return false;
837 }
838 default:
839 break;
840 }
841
842 stringstream msg;
843 msg << "ownSample: invalid function space type " << fsType;
844 throw RipleyException(msg.str());
845 }
846
847 void Brick::setToNormal(escript::Data& out) const
848 {
849 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
850 out.requireWrite();
851 #pragma omp parallel
852 {
853 if (m_faceOffset[0] > -1) {
854 #pragma omp for nowait
855 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
856 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
857 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
858 // set vector at four quadrature points
859 *o++ = -1.; *o++ = 0.; *o++ = 0.;
860 *o++ = -1.; *o++ = 0.; *o++ = 0.;
861 *o++ = -1.; *o++ = 0.; *o++ = 0.;
862 *o++ = -1.; *o++ = 0.; *o = 0.;
863 }
864 }
865 }
866
867 if (m_faceOffset[1] > -1) {
868 #pragma omp for nowait
869 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
870 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
871 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
872 // set vector at four quadrature points
873 *o++ = 1.; *o++ = 0.; *o++ = 0.;
874 *o++ = 1.; *o++ = 0.; *o++ = 0.;
875 *o++ = 1.; *o++ = 0.; *o++ = 0.;
876 *o++ = 1.; *o++ = 0.; *o = 0.;
877 }
878 }
879 }
880
881 if (m_faceOffset[2] > -1) {
882 #pragma omp for nowait
883 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
884 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
885 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
886 // set vector at four quadrature points
887 *o++ = 0.; *o++ = -1.; *o++ = 0.;
888 *o++ = 0.; *o++ = -1.; *o++ = 0.;
889 *o++ = 0.; *o++ = -1.; *o++ = 0.;
890 *o++ = 0.; *o++ = -1.; *o = 0.;
891 }
892 }
893 }
894
895 if (m_faceOffset[3] > -1) {
896 #pragma omp for nowait
897 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
898 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
899 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
900 // set vector at four quadrature points
901 *o++ = 0.; *o++ = 1.; *o++ = 0.;
902 *o++ = 0.; *o++ = 1.; *o++ = 0.;
903 *o++ = 0.; *o++ = 1.; *o++ = 0.;
904 *o++ = 0.; *o++ = 1.; *o = 0.;
905 }
906 }
907 }
908
909 if (m_faceOffset[4] > -1) {
910 #pragma omp for nowait
911 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
912 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
913 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
914 // set vector at four quadrature points
915 *o++ = 0.; *o++ = 0.; *o++ = -1.;
916 *o++ = 0.; *o++ = 0.; *o++ = -1.;
917 *o++ = 0.; *o++ = 0.; *o++ = -1.;
918 *o++ = 0.; *o++ = 0.; *o = -1.;
919 }
920 }
921 }
922
923 if (m_faceOffset[5] > -1) {
924 #pragma omp for nowait
925 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
926 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
927 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
928 // set vector at four quadrature points
929 *o++ = 0.; *o++ = 0.; *o++ = 1.;
930 *o++ = 0.; *o++ = 0.; *o++ = 1.;
931 *o++ = 0.; *o++ = 0.; *o++ = 1.;
932 *o++ = 0.; *o++ = 0.; *o = 1.;
933 }
934 }
935 }
936 } // end of parallel section
937 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
938 out.requireWrite();
939 #pragma omp parallel
940 {
941 if (m_faceOffset[0] > -1) {
942 #pragma omp for nowait
943 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
944 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
945 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
946 *o++ = -1.;
947 *o++ = 0.;
948 *o = 0.;
949 }
950 }
951 }
952
953 if (m_faceOffset[1] > -1) {
954 #pragma omp for nowait
955 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
956 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
957 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
958 *o++ = 1.;
959 *o++ = 0.;
960 *o = 0.;
961 }
962 }
963 }
964
965 if (m_faceOffset[2] > -1) {
966 #pragma omp for nowait
967 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
968 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
969 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
970 *o++ = 0.;
971 *o++ = -1.;
972 *o = 0.;
973 }
974 }
975 }
976
977 if (m_faceOffset[3] > -1) {
978 #pragma omp for nowait
979 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
980 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
981 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
982 *o++ = 0.;
983 *o++ = 1.;
984 *o = 0.;
985 }
986 }
987 }
988
989 if (m_faceOffset[4] > -1) {
990 #pragma omp for nowait
991 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
992 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
993 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
994 *o++ = 0.;
995 *o++ = 0.;
996 *o = -1.;
997 }
998 }
999 }
1000
1001 if (m_faceOffset[5] > -1) {
1002 #pragma omp for nowait
1003 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1004 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1005 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1006 *o++ = 0.;
1007 *o++ = 0.;
1008 *o = 1.;
1009 }
1010 }
1011 }
1012 } // end of parallel section
1013
1014 } else {
1015 stringstream msg;
1016 msg << "setToNormal: invalid function space type "
1017 << out.getFunctionSpace().getTypeCode();
1018 throw RipleyException(msg.str());
1019 }
1020 }
1021
1022 void Brick::setToSize(escript::Data& out) const
1023 {
1024 if (out.getFunctionSpace().getTypeCode() == Elements
1025 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1026 out.requireWrite();
1027 const dim_t numQuad=out.getNumDataPointsPerSample();
1028 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1029 #pragma omp parallel for
1030 for (index_t k = 0; k < getNumElements(); ++k) {
1031 double* o = out.getSampleDataRW(k);
1032 fill(o, o+numQuad, size);
1033 }
1034 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1035 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1036 out.requireWrite();
1037 const dim_t numQuad=out.getNumDataPointsPerSample();
1038 #pragma omp parallel
1039 {
1040 if (m_faceOffset[0] > -1) {
1041 const double size=min(m_dx[1],m_dx[2]);
1042 #pragma omp for nowait
1043 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1044 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1045 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1046 fill(o, o+numQuad, size);
1047 }
1048 }
1049 }
1050
1051 if (m_faceOffset[1] > -1) {
1052 const double size=min(m_dx[1],m_dx[2]);
1053 #pragma omp for nowait
1054 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1055 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1056 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1057 fill(o, o+numQuad, size);
1058 }
1059 }
1060 }
1061
1062 if (m_faceOffset[2] > -1) {
1063 const double size=min(m_dx[0],m_dx[2]);
1064 #pragma omp for nowait
1065 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1066 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1067 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1068 fill(o, o+numQuad, size);
1069 }
1070 }
1071 }
1072
1073 if (m_faceOffset[3] > -1) {
1074 const double size=min(m_dx[0],m_dx[2]);
1075 #pragma omp for nowait
1076 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1077 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1078 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1079 fill(o, o+numQuad, size);
1080 }
1081 }
1082 }
1083
1084 if (m_faceOffset[4] > -1) {
1085 const double size=min(m_dx[0],m_dx[1]);
1086 #pragma omp for nowait
1087 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1088 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1089 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1090 fill(o, o+numQuad, size);
1091 }
1092 }
1093 }
1094
1095 if (m_faceOffset[5] > -1) {
1096 const double size=min(m_dx[0],m_dx[1]);
1097 #pragma omp for nowait
1098 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1099 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1100 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1101 fill(o, o+numQuad, size);
1102 }
1103 }
1104 }
1105 } // end of parallel section
1106
1107 } else {
1108 stringstream msg;
1109 msg << "setToSize: invalid function space type "
1110 << out.getFunctionSpace().getTypeCode();
1111 throw RipleyException(msg.str());
1112 }
1113 }
1114
1115 void Brick::Print_Mesh_Info(const bool full) const
1116 {
1117 RipleyDomain::Print_Mesh_Info(full);
1118 if (full) {
1119 cout << " Id Coordinates" << endl;
1120 cout.precision(15);
1121 cout.setf(ios::scientific, ios::floatfield);
1122 for (index_t i=0; i < getNumNodes(); i++) {
1123 cout << " " << setw(5) << m_nodeId[i]
1124 << " " << getLocalCoordinate(i%m_NN[0], 0)
1125 << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1126 << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1127 }
1128 }
1129 }
1130
1131
1132 //protected
1133 void Brick::assembleCoordinates(escript::Data& arg) const
1134 {
1135 escriptDataC x = arg.getDataC();
1136 int numDim = m_numDim;
1137 if (!isDataPointShapeEqual(&x, 1, &numDim))
1138 throw RipleyException("setToX: Invalid Data object shape");
1139 if (!numSamplesEqual(&x, 1, getNumNodes()))
1140 throw RipleyException("setToX: Illegal number of samples in Data object");
1141
1142 arg.requireWrite();
1143 #pragma omp parallel for
1144 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1145 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1146 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1147 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1148 point[0] = getLocalCoordinate(i0, 0);
1149 point[1] = getLocalCoordinate(i1, 1);
1150 point[2] = getLocalCoordinate(i2, 2);
1151 }
1152 }
1153 }
1154 }
1155
1156 //protected
1157 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1158 {
1159 const dim_t numComp = in.getDataPointSize();
1160 const double C0 = .044658198738520451079;
1161 const double C1 = .16666666666666666667;
1162 const double C2 = .21132486540518711775;
1163 const double C3 = .25;
1164 const double C4 = .5;
1165 const double C5 = .62200846792814621559;
1166 const double C6 = .78867513459481288225;
1167
1168 if (out.getFunctionSpace().getTypeCode() == Elements) {
1169 out.requireWrite();
1170 #pragma omp parallel
1171 {
1172 vector<double> f_000(numComp);
1173 vector<double> f_001(numComp);
1174 vector<double> f_010(numComp);
1175 vector<double> f_011(numComp);
1176 vector<double> f_100(numComp);
1177 vector<double> f_101(numComp);
1178 vector<double> f_110(numComp);
1179 vector<double> f_111(numComp);
1180 #pragma omp for
1181 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1182 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1183 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1184 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1185 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1186 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1187 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1188 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1189 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1190 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1191 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1192 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1193 for (index_t i=0; i < numComp; ++i) {
1194 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1195 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1196 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1197 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1198 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1199 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1200 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1201 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1202 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1203 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1204 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1205 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1206 o[INDEX3(i,0,0,numComp,3)] = V0;
1207 o[INDEX3(i,1,0,numComp,3)] = V4;
1208 o[INDEX3(i,2,0,numComp,3)] = V8;
1209 o[INDEX3(i,0,1,numComp,3)] = V0;
1210 o[INDEX3(i,1,1,numComp,3)] = V5;
1211 o[INDEX3(i,2,1,numComp,3)] = V9;
1212 o[INDEX3(i,0,2,numComp,3)] = V1;
1213 o[INDEX3(i,1,2,numComp,3)] = V4;
1214 o[INDEX3(i,2,2,numComp,3)] = V10;
1215 o[INDEX3(i,0,3,numComp,3)] = V1;
1216 o[INDEX3(i,1,3,numComp,3)] = V5;
1217 o[INDEX3(i,2,3,numComp,3)] = V11;
1218 o[INDEX3(i,0,4,numComp,3)] = V2;
1219 o[INDEX3(i,1,4,numComp,3)] = V6;
1220 o[INDEX3(i,2,4,numComp,3)] = V8;
1221 o[INDEX3(i,0,5,numComp,3)] = V2;
1222 o[INDEX3(i,1,5,numComp,3)] = V7;
1223 o[INDEX3(i,2,5,numComp,3)] = V9;
1224 o[INDEX3(i,0,6,numComp,3)] = V3;
1225 o[INDEX3(i,1,6,numComp,3)] = V6;
1226 o[INDEX3(i,2,6,numComp,3)] = V10;
1227 o[INDEX3(i,0,7,numComp,3)] = V3;
1228 o[INDEX3(i,1,7,numComp,3)] = V7;
1229 o[INDEX3(i,2,7,numComp,3)] = V11;
1230 } // end of component loop i
1231 } // end of k0 loop
1232 } // end of k1 loop
1233 } // end of k2 loop
1234 } // end of parallel section
1235 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1236 out.requireWrite();
1237 #pragma omp parallel
1238 {
1239 vector<double> f_000(numComp);
1240 vector<double> f_001(numComp);
1241 vector<double> f_010(numComp);
1242 vector<double> f_011(numComp);
1243 vector<double> f_100(numComp);
1244 vector<double> f_101(numComp);
1245 vector<double> f_110(numComp);
1246 vector<double> f_111(numComp);
1247 #pragma omp for
1248 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1249 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1250 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1251 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1252 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1253 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1254 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1255 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1256 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1257 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1258 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1259 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1260 for (index_t i=0; i < numComp; ++i) {
1261 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1262 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1263 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1264 } // end of component loop i
1265 } // end of k0 loop
1266 } // end of k1 loop
1267 } // end of k2 loop
1268 } // end of parallel section
1269 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1270 out.requireWrite();
1271 #pragma omp parallel
1272 {
1273 vector<double> f_000(numComp);
1274 vector<double> f_001(numComp);
1275 vector<double> f_010(numComp);
1276 vector<double> f_011(numComp);
1277 vector<double> f_100(numComp);
1278 vector<double> f_101(numComp);
1279 vector<double> f_110(numComp);
1280 vector<double> f_111(numComp);
1281 if (m_faceOffset[0] > -1) {
1282 #pragma omp for nowait
1283 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1284 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1285 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1286 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1287 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1288 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1289 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1290 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1291 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1292 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1293 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1294 for (index_t i=0; i < numComp; ++i) {
1295 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1296 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1297 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1298 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1299 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1300 o[INDEX3(i,1,0,numComp,3)] = V0;
1301 o[INDEX3(i,2,0,numComp,3)] = V2;
1302 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1303 o[INDEX3(i,1,1,numComp,3)] = V0;
1304 o[INDEX3(i,2,1,numComp,3)] = V3;
1305 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1306 o[INDEX3(i,1,2,numComp,3)] = V1;
1307 o[INDEX3(i,2,2,numComp,3)] = V2;
1308 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1309 o[INDEX3(i,1,3,numComp,3)] = V1;
1310 o[INDEX3(i,2,3,numComp,3)] = V3;
1311 } // end of component loop i
1312 } // end of k1 loop
1313 } // end of k2 loop
1314 } // end of face 0
1315 if (m_faceOffset[1] > -1) {
1316 #pragma omp for nowait
1317 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1318 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1319 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1320 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1321 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1322 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1323 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1324 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1325 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1326 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1327 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1328 for (index_t i=0; i < numComp; ++i) {
1329 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1330 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1331 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1332 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1333 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1334 o[INDEX3(i,1,0,numComp,3)] = V0;
1335 o[INDEX3(i,2,0,numComp,3)] = V2;
1336 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1337 o[INDEX3(i,1,1,numComp,3)] = V0;
1338 o[INDEX3(i,2,1,numComp,3)] = V3;
1339 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1340 o[INDEX3(i,1,2,numComp,3)] = V1;
1341 o[INDEX3(i,2,2,numComp,3)] = V2;
1342 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1343 o[INDEX3(i,1,3,numComp,3)] = V1;
1344 o[INDEX3(i,2,3,numComp,3)] = V3;
1345 } // end of component loop i
1346 } // end of k1 loop
1347 } // end of k2 loop
1348 } // end of face 1
1349 if (m_faceOffset[2] > -1) {
1350 #pragma omp for nowait
1351 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1352 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1353 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1354 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1355 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1356 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1357 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1358 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1359 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1360 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1361 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1362 for (index_t i=0; i < numComp; ++i) {
1363 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1364 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1365 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1366 o[INDEX3(i,0,0,numComp,3)] = V0;
1367 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1368 o[INDEX3(i,2,0,numComp,3)] = V1;
1369 o[INDEX3(i,0,1,numComp,3)] = V0;
1370 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1371 o[INDEX3(i,2,1,numComp,3)] = V2;
1372 o[INDEX3(i,0,2,numComp,3)] = V0;
1373 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1374 o[INDEX3(i,2,2,numComp,3)] = V1;
1375 o[INDEX3(i,0,3,numComp,3)] = V0;
1376 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1377 o[INDEX3(i,2,3,numComp,3)] = V2;
1378 } // end of component loop i
1379 } // end of k0 loop
1380 } // end of k2 loop
1381 } // end of face 2
1382 if (m_faceOffset[3] > -1) {
1383 #pragma omp for nowait
1384 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1385 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1386 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1387 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1388 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1389 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1390 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1391 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1392 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1393 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1394 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1395 for (index_t i=0; i < numComp; ++i) {
1396 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1397 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1398 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1399 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1400 o[INDEX3(i,0,0,numComp,3)] = V0;
1401 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1402 o[INDEX3(i,2,0,numComp,3)] = V2;
1403 o[INDEX3(i,0,1,numComp,3)] = V0;
1404 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1405 o[INDEX3(i,2,1,numComp,3)] = V3;
1406 o[INDEX3(i,0,2,numComp,3)] = V1;
1407 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1408 o[INDEX3(i,2,2,numComp,3)] = V2;
1409 o[INDEX3(i,0,3,numComp,3)] = V1;
1410 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1411 o[INDEX3(i,2,3,numComp,3)] = V3;
1412 } // end of component loop i
1413 } // end of k0 loop
1414 } // end of k2 loop
1415 } // end of face 3
1416 if (m_faceOffset[4] > -1) {
1417 #pragma omp for nowait
1418 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1419 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1420 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1421 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1422 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1423 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1424 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1425 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1426 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1427 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1428 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1429 for (index_t i=0; i < numComp; ++i) {
1430 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1431 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1432 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1433 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1434 o[INDEX3(i,0,0,numComp,3)] = V0;
1435 o[INDEX3(i,1,0,numComp,3)] = V2;
1436 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1437 o[INDEX3(i,0,1,numComp,3)] = V0;
1438 o[INDEX3(i,1,1,numComp,3)] = V3;
1439 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1440 o[INDEX3(i,0,2,numComp,3)] = V1;
1441 o[INDEX3(i,1,2,numComp,3)] = V2;
1442 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1443 o[INDEX3(i,0,3,numComp,3)] = V1;
1444 o[INDEX3(i,1,3,numComp,3)] = V3;
1445 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1446 } // end of component loop i
1447 } // end of k0 loop
1448 } // end of k1 loop
1449 } // end of face 4
1450 if (m_faceOffset[5] > -1) {
1451 #pragma omp for nowait
1452 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1453 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1454 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1455 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1456 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1457 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1458 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1459 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1460 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1461 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1462 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1463 for (index_t i=0; i < numComp; ++i) {
1464 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1465 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1466 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1467 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1468 o[INDEX3(i,0,0,numComp,3)] = V0;
1469 o[INDEX3(i,1,0,numComp,3)] = V2;
1470 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1471 o[INDEX3(i,0,1,numComp,3)] = V0;
1472 o[INDEX3(i,1,1,numComp,3)] = V3;
1473 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1474 o[INDEX3(i,0,2,numComp,3)] = V1;
1475 o[INDEX3(i,1,2,numComp,3)] = V2;
1476 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1477 o[INDEX3(i,0,3,numComp,3)] = V1;
1478 o[INDEX3(i,1,3,numComp,3)] = V3;
1479 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1480 } // end of component loop i
1481 } // end of k0 loop
1482 } // end of k1 loop
1483 } // end of face 5
1484 } // end of parallel section
1485 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1486 out.requireWrite();
1487 #pragma omp parallel
1488 {
1489 vector<double> f_000(numComp);
1490 vector<double> f_001(numComp);
1491 vector<double> f_010(numComp);
1492 vector<double> f_011(numComp);
1493 vector<double> f_100(numComp);
1494 vector<double> f_101(numComp);
1495 vector<double> f_110(numComp);
1496 vector<double> f_111(numComp);
1497 if (m_faceOffset[0] > -1) {
1498 #pragma omp for nowait
1499 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1500 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1501 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1502 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1503 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1504 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1505 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1506 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1507 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1508 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1509 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1510 for (index_t i=0; i < numComp; ++i) {
1511 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1512 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1513 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1514 } // end of component loop i
1515 } // end of k1 loop
1516 } // end of k2 loop
1517 } // end of face 0
1518 if (m_faceOffset[1] > -1) {
1519 #pragma omp for nowait
1520 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1521 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1522 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1523 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1524 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1525 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1526 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1527 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1528 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1529 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1530 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1531 for (index_t i=0; i < numComp; ++i) {
1532 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1533 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1534 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1535 } // end of component loop i
1536 } // end of k1 loop
1537 } // end of k2 loop
1538 } // end of face 1
1539 if (m_faceOffset[2] > -1) {
1540 #pragma omp for nowait
1541 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1542 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1543 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1544 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1545 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1546 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1547 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1548 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1549 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1550 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1551 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1552 for (index_t i=0; i < numComp; ++i) {
1553 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / m_dx[0];
1554 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1555 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / m_dx[2];
1556 } // end of component loop i
1557 } // end of k0 loop
1558 } // end of k2 loop
1559 } // end of face 2
1560 if (m_faceOffset[3] > -1) {
1561 #pragma omp for nowait
1562 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1563 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1564 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1565 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1566 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1567 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1568 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1569 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1570 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1571 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1572 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1573 for (index_t i=0; i < numComp; ++i) {
1574 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / m_dx[0];
1575 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1576 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / m_dx[2];
1577 } // end of component loop i
1578 } // end of k0 loop
1579 } // end of k2 loop
1580 } // end of face 3
1581 if (m_faceOffset[4] > -1) {
1582 #pragma omp for nowait
1583 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1584 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1585 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1586 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1587 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1588 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1589 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1590 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1591 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1592 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1593 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1594 for (index_t i=0; i < numComp; ++i) {
1595 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / m_dx[0];
1596 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / m_dx[1];
1597 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1598 } // end of component loop i
1599 } // end of k0 loop
1600 } // end of k1 loop
1601 } // end of face 4
1602 if (m_faceOffset[5] > -1) {
1603 #pragma omp for nowait
1604 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1605 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1606 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1607 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1608 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1609 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1610 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1611 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1612 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1613 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1614 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1615 for (index_t i=0; i < numComp; ++i) {
1616 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / m_dx[0];
1617 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / m_dx[1];
1618 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1619 } // end of component loop i
1620 } // end of k0 loop
1621 } // end of k1 loop
1622 } // end of face 5
1623 } // end of parallel section
1624 }
1625 }
1626
1627 //protected
1628 void Brick::assembleIntegrate(vector<double>& integrals, const escript::Data& arg) const
1629 {
1630 const dim_t numComp = arg.getDataPointSize();
1631 const index_t left = (m_offset[0]==0 ? 0 : 1);
1632 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1633 const index_t front = (m_offset[2]==0 ? 0 : 1);
1634 const int fs = arg.getFunctionSpace().getTypeCode();
1635 if (fs == Elements && arg.actsExpanded()) {
1636 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2]/8.;
1637 #pragma omp parallel
1638 {
1639 vector<double> int_local(numComp, 0);
1640 #pragma omp for nowait
1641 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1642 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1643 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1644 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1645 for (index_t i=0; i < numComp; ++i) {
1646 const double f_0 = f[INDEX2(i,0,numComp)];
1647 const double f_1 = f[INDEX2(i,1,numComp)];
1648 const double f_2 = f[INDEX2(i,2,numComp)];
1649 const double f_3 = f[INDEX2(i,3,numComp)];
1650 const double f_4 = f[INDEX2(i,4,numComp)];
1651 const double f_5 = f[INDEX2(i,5,numComp)];
1652 const double f_6 = f[INDEX2(i,6,numComp)];
1653 const double f_7 = f[INDEX2(i,7,numComp)];
1654 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
1655 } // end of component loop i
1656 } // end of k0 loop
1657 } // end of k1 loop
1658 } // end of k2 loop
1659
1660 #pragma omp critical
1661 for (index_t i=0; i<numComp; i++)
1662 integrals[i]+=int_local[i];
1663 } // end of parallel section
1664
1665 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1666 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2];
1667 #pragma omp parallel
1668 {
1669 vector<double> int_local(numComp, 0);
1670 #pragma omp for nowait
1671 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1672 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1673 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1674 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1675 for (index_t i=0; i < numComp; ++i) {
1676 int_local[i]+=f[i]*w_0;
1677 } // end of component loop i
1678 } // end of k0 loop
1679 } // end of k1 loop
1680 } // end of k2 loop
1681
1682 #pragma omp critical
1683 for (index_t i=0; i<numComp; i++)
1684 integrals[i]+=int_local[i];
1685 } // end of parallel section
1686
1687 } else if (fs == FaceElements && arg.actsExpanded()) {
1688 const double w_0 = m_dx[1]*m_dx[2]/4.;
1689 const double w_1 = m_dx[0]*m_dx[2]/4.;
1690 const double w_2 = m_dx[0]*m_dx[1]/4.;
1691 #pragma omp parallel
1692 {
1693 vector<double> int_local(numComp, 0);
1694 if (m_faceOffset[0] > -1) {
1695 #pragma omp for nowait
1696 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1697 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1698 const double* f = arg.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1699 for (index_t i=0; i < numComp; ++i) {
1700 const double f_0 = f[INDEX2(i,0,numComp)];
1701 const double f_1 = f[INDEX2(i,1,numComp)];
1702 const double f_2 = f[INDEX2(i,2,numComp)];
1703 const double f_3 = f[INDEX2(i,3,numComp)];
1704 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
1705 } // end of component loop i
1706 } // end of k1 loop
1707 } // end of k2 loop
1708 }
1709
1710 if (m_faceOffset[1] > -1) {
1711 #pragma omp for nowait
1712 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1713 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1714 const double* f = arg.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1715 for (index_t i=0; i < numComp; ++i) {
1716 const double f_0 = f[INDEX2(i,0,numComp)];
1717 const double f_1 = f[INDEX2(i,1,numComp)];
1718 const double f_2 = f[INDEX2(i,2,numComp)];
1719 const double f_3 = f[INDEX2(i,3,numComp)];
1720 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
1721 } // end of component loop i
1722 } // end of k1 loop
1723 } // end of k2 loop
1724 }
1725
1726 if (m_faceOffset[2] > -1) {
1727 #pragma omp for nowait
1728 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1729 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1730 const double* f = arg.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1731 for (index_t i=0; i < numComp; ++i) {
1732 const double f_0 = f[INDEX2(i,0,numComp)];
1733 const double f_1 = f[INDEX2(i,1,numComp)];
1734 const double f_2 = f[INDEX2(i,2,numComp)];
1735 const double f_3 = f[INDEX2(i,3,numComp)];
1736 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
1737 } // end of component loop i
1738 } // end of k1 loop
1739 } // end of k2 loop
1740 }
1741
1742 if (m_faceOffset[3] > -1) {
1743 #pragma omp for nowait
1744 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1745 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1746 const double* f = arg.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1747 for (index_t i=0; i < numComp; ++i) {
1748 const double f_0 = f[INDEX2(i,0,numComp)];
1749 const double f_1 = f[INDEX2(i,1,numComp)];
1750 const double f_2 = f[INDEX2(i,2,numComp)];
1751 const double f_3 = f[INDEX2(i,3,numComp)];
1752 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
1753 } // end of component loop i
1754 } // end of k1 loop
1755 } // end of k2 loop
1756 }
1757
1758 if (m_faceOffset[4] > -1) {
1759 #pragma omp for nowait
1760 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1761 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1762 const double* f = arg.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1763 for (index_t i=0; i < numComp; ++i) {
1764 const double f_0 = f[INDEX2(i,0,numComp)];
1765 const double f_1 = f[INDEX2(i,1,numComp)];
1766 const double f_2 = f[INDEX2(i,2,numComp)];
1767 const double f_3 = f[INDEX2(i,3,numComp)];
1768 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
1769 } // end of component loop i
1770 } // end of k1 loop
1771 } // end of k2 loop
1772 }
1773
1774 if (m_faceOffset[5] > -1) {
1775 #pragma omp for nowait
1776 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1777 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1778 const double* f = arg.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1779 for (index_t i=0; i < numComp; ++i) {
1780 const double f_0 = f[INDEX2(i,0,numComp)];
1781 const double f_1 = f[INDEX2(i,1,numComp)];
1782 const double f_2 = f[INDEX2(i,2,numComp)];
1783 const double f_3 = f[INDEX2(i,3,numComp)];
1784 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
1785 } // end of component loop i
1786 } // end of k1 loop
1787 } // end of k2 loop
1788 }
1789
1790 #pragma omp critical
1791 for (index_t i=0; i<numComp; i++)
1792 integrals[i]+=int_local[i];
1793 } // end of parallel section
1794
1795 } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded())) {
1796 const double w_0 = m_dx[1]*m_dx[2];
1797 const double w_1 = m_dx[0]*m_dx[2];
1798 const double w_2 = m_dx[0]*m_dx[1];
1799 #pragma omp parallel