/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4657 - (show annotations)
Thu Feb 6 06:12:20 2014 UTC (5 years, 1 month ago) by jfenwick
Original Path: trunk/ripley/src/Brick.cpp
File size: 154421 byte(s)
I changed some files.
Updated copyright notices, added GeoComp.



1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <ripley/Brick.h>
18 #include <paso/SystemMatrix.h>
19 #include <esysUtils/esysFileWriter.h>
20 #include <ripley/DefaultAssembler3D.h>
21 #include <ripley/WaveAssembler3D.h>
22 #include <boost/scoped_array.hpp>
23
24 #ifdef USE_NETCDF
25 #include <netcdfcpp.h>
26 #endif
27
28 #if USE_SILO
29 #include <silo.h>
30 #ifdef ESYS_MPI
31 #include <pmpio.h>
32 #endif
33 #endif
34
35 #include <iomanip>
36
37 #include "esysUtils/EsysRandom.h"
38 #include "blocktools.h"
39
40
41 using namespace std;
42 using esysUtils::FileWriter;
43
44 namespace ripley {
45
46 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
47 double x1, double y1, double z1, int d0, int d1, int d2,
48 const std::vector<double>& points, const std::vector<int>& tags,
49 const simap_t& tagnamestonums) :
50 RipleyDomain(3)
51 {
52 // ignore subdivision parameters for serial run
53 if (m_mpiInfo->size == 1) {
54 d0=1;
55 d1=1;
56 d2=1;
57 }
58 bool warn=false;
59 // if number of subdivisions is non-positive, try to subdivide by the same
60 // ratio as the number of elements
61 if (d0<=0 && d1<=0 && d2<=0) {
62 warn=true;
63 d0=(int)(pow(m_mpiInfo->size*(n0+1)*(n0+1)/((float)(n1+1)*(n2+1)), 1.f/3));
64 d0=max(1, d0);
65 d1=max(1, (int)(d0*n1/(float)n0));
66 d2=m_mpiInfo->size/(d0*d1);
67 if (d0*d1*d2 != m_mpiInfo->size) {
68 // ratios not the same so leave "smallest" side undivided and try
69 // dividing 2 sides only
70 if (n0>=n1) {
71 if (n1>=n2) {
72 d0=d1=0;
73 d2=1;
74 } else {
75 d0=d2=0;
76 d1=1;
77 }
78 } else {
79 if (n0>=n2) {
80 d0=d1=0;
81 d2=1;
82 } else {
83 d0=1;
84 d1=d2=0;
85 }
86 }
87 }
88 }
89 if (d0<=0 && d1<=0) {
90 warn=true;
91 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1))));
92 d1=m_mpiInfo->size/d0;
93 if (d0*d1*d2 != m_mpiInfo->size) {
94 // ratios not the same so subdivide side with more elements only
95 if (n0>n1) {
96 d0=0;
97 d1=1;
98 } else {
99 d0=1;
100 d1=0;
101 }
102 }
103 } else if (d0<=0 && d2<=0) {
104 warn=true;
105 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n2+1))));
106 d2=m_mpiInfo->size/d0;
107 if (d0*d1*d2 != m_mpiInfo->size) {
108 // ratios not the same so subdivide side with more elements only
109 if (n0>n2) {
110 d0=0;
111 d2=1;
112 } else {
113 d0=1;
114 d2=0;
115 }
116 }
117 } else if (d1<=0 && d2<=0) {
118 warn=true;
119 d1=max(1, int(sqrt(m_mpiInfo->size*(n1+1)/(float)(n2+1))));
120 d2=m_mpiInfo->size/d1;
121 if (d0*d1*d2 != m_mpiInfo->size) {
122 // ratios not the same so subdivide side with more elements only
123 if (n1>n2) {
124 d1=0;
125 d2=1;
126 } else {
127 d1=1;
128 d2=0;
129 }
130 }
131 }
132 if (d0<=0) {
133 // d1,d2 are preset, determine d0
134 d0=m_mpiInfo->size/(d1*d2);
135 } else if (d1<=0) {
136 // d0,d2 are preset, determine d1
137 d1=m_mpiInfo->size/(d0*d2);
138 } else if (d2<=0) {
139 // d0,d1 are preset, determine d2
140 d2=m_mpiInfo->size/(d0*d1);
141 }
142
143 // ensure number of subdivisions is valid and nodes can be distributed
144 // among number of ranks
145 if (d0*d1*d2 != m_mpiInfo->size)
146 throw RipleyException("Invalid number of spatial subdivisions");
147
148 if (warn) {
149 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
150 << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
151 }
152
153 double l0 = x1-x0;
154 double l1 = y1-y0;
155 double l2 = z1-z0;
156 m_dx[0] = l0/n0;
157 m_dx[1] = l1/n1;
158 m_dx[2] = l2/n2;
159
160 if ((n0+1)%d0 > 0) {
161 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
162 l0=m_dx[0]*n0;
163 cout << "Warning: Adjusted number of elements and length. N0="
164 << n0 << ", l0=" << l0 << endl;
165 }
166 if ((n1+1)%d1 > 0) {
167 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
168 l1=m_dx[1]*n1;
169 cout << "Warning: Adjusted number of elements and length. N1="
170 << n1 << ", l1=" << l1 << endl;
171 }
172 if ((n2+1)%d2 > 0) {
173 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
174 l2=m_dx[2]*n2;
175 cout << "Warning: Adjusted number of elements and length. N2="
176 << n2 << ", l2=" << l2 << endl;
177 }
178
179 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
180 throw RipleyException("Too few elements for the number of ranks");
181
182 m_gNE[0] = n0;
183 m_gNE[1] = n1;
184 m_gNE[2] = n2;
185 m_origin[0] = x0;
186 m_origin[1] = y0;
187 m_origin[2] = z0;
188 m_length[0] = l0;
189 m_length[1] = l1;
190 m_length[2] = l2;
191 m_NX[0] = d0;
192 m_NX[1] = d1;
193 m_NX[2] = d2;
194
195 // local number of elements (including overlap)
196 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
197 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
198 m_NE[0]++;
199 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
200 m_ownNE[0]--;
201
202 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
203 if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
204 m_NE[1]++;
205 else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
206 m_ownNE[1]--;
207
208 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
209 if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
210 m_NE[2]++;
211 else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
212 m_ownNE[2]--;
213
214 // local number of nodes
215 m_NN[0] = m_NE[0]+1;
216 m_NN[1] = m_NE[1]+1;
217 m_NN[2] = m_NE[2]+1;
218
219 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
220 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
221 if (m_offset[0] > 0)
222 m_offset[0]--;
223 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
224 if (m_offset[1] > 0)
225 m_offset[1]--;
226 m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
227 if (m_offset[2] > 0)
228 m_offset[2]--;
229
230 populateSampleIds();
231 createPattern();
232
233 assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
234 for (map<string, int>::const_iterator i = tagnamestonums.begin();
235 i != tagnamestonums.end(); i++) {
236 setTagMap(i->first, i->second);
237 }
238 addPoints(tags.size(), &points[0], &tags[0]);
239 }
240
241
242 Brick::~Brick()
243 {
244 Paso_SystemMatrixPattern_free(m_pattern);
245 Paso_Connector_free(m_connector);
246 delete assembler;
247 }
248
249 string Brick::getDescription() const
250 {
251 return "ripley::Brick";
252 }
253
254 bool Brick::operator==(const AbstractDomain& other) const
255 {
256 const Brick* o=dynamic_cast<const Brick*>(&other);
257 if (o) {
258 return (RipleyDomain::operator==(other) &&
259 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
260 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
261 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
262 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
263 }
264
265 return false;
266 }
267
268 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
269 const ReaderParameters& params) const
270 {
271 #ifdef USE_NETCDF
272 // check destination function space
273 int myN0, myN1, myN2;
274 if (out.getFunctionSpace().getTypeCode() == Nodes) {
275 myN0 = m_NN[0];
276 myN1 = m_NN[1];
277 myN2 = m_NN[2];
278 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
279 out.getFunctionSpace().getTypeCode() == ReducedElements) {
280 myN0 = m_NE[0];
281 myN1 = m_NE[1];
282 myN2 = m_NE[2];
283 } else
284 throw RipleyException("readNcGrid(): invalid function space for output data object");
285
286 if (params.first.size() != 3)
287 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
288
289 if (params.numValues.size() != 3)
290 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
291
292 if (params.multiplier.size() != 3)
293 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
294 for (size_t i=0; i<params.multiplier.size(); i++)
295 if (params.multiplier[i]<1)
296 throw RipleyException("readNcGrid(): all multipliers must be positive");
297
298 // check file existence and size
299 NcFile f(filename.c_str(), NcFile::ReadOnly);
300 if (!f.is_valid())
301 throw RipleyException("readNcGrid(): cannot open file");
302
303 NcVar* var = f.get_var(varname.c_str());
304 if (!var)
305 throw RipleyException("readNcGrid(): invalid variable name");
306
307 // TODO: rank>0 data support
308 const int numComp = out.getDataPointSize();
309 if (numComp > 1)
310 throw RipleyException("readNcGrid(): only scalar data supported");
311
312 const int dims = var->num_dims();
313 boost::scoped_array<long> edges(var->edges());
314
315 // is this a slice of the data object (dims!=3)?
316 // note the expected ordering of edges (as in numpy: z,y,x)
317 if ( (dims==3 && (params.numValues[2] > edges[0] ||
318 params.numValues[1] > edges[1] ||
319 params.numValues[0] > edges[2]))
320 || (dims==2 && params.numValues[2]>1)
321 || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
322 throw RipleyException("readNcGrid(): not enough data in file");
323 }
324
325 // check if this rank contributes anything
326 if (params.first[0] >= m_offset[0]+myN0 ||
327 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
328 params.first[1] >= m_offset[1]+myN1 ||
329 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
330 params.first[2] >= m_offset[2]+myN2 ||
331 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
332 return;
333 }
334
335 // now determine how much this rank has to write
336
337 // first coordinates in data object to write to
338 const int first0 = max(0, params.first[0]-m_offset[0]);
339 const int first1 = max(0, params.first[1]-m_offset[1]);
340 const int first2 = max(0, params.first[2]-m_offset[2]);
341 // indices to first value in file (not accounting for reverse yet)
342 int idx0 = max(0, m_offset[0]-params.first[0]);
343 int idx1 = max(0, m_offset[1]-params.first[1]);
344 int idx2 = max(0, m_offset[2]-params.first[2]);
345 // number of values to read
346 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
347 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
348 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
349
350 // make sure we read the right block if going backwards through file
351 if (params.reverse[0])
352 idx0 = edges[dims-1]-num0-idx0;
353 if (dims>1 && params.reverse[1])
354 idx1 = edges[dims-2]-num1-idx1;
355 if (dims>2 && params.reverse[2])
356 idx2 = edges[dims-3]-num2-idx2;
357
358
359 vector<double> values(num0*num1*num2);
360 if (dims==3) {
361 var->set_cur(idx2, idx1, idx0);
362 var->get(&values[0], num2, num1, num0);
363 } else if (dims==2) {
364 var->set_cur(idx1, idx0);
365 var->get(&values[0], num1, num0);
366 } else {
367 var->set_cur(idx0);
368 var->get(&values[0], num0);
369 }
370
371 const int dpp = out.getNumDataPointsPerSample();
372 out.requireWrite();
373
374 // helpers for reversing
375 const int x0 = (params.reverse[0] ? num0-1 : 0);
376 const int x_mult = (params.reverse[0] ? -1 : 1);
377 const int y0 = (params.reverse[1] ? num1-1 : 0);
378 const int y_mult = (params.reverse[1] ? -1 : 1);
379 const int z0 = (params.reverse[2] ? num2-1 : 0);
380 const int z_mult = (params.reverse[2] ? -1 : 1);
381
382 for (index_t z=0; z<num2; z++) {
383 for (index_t y=0; y<num1; y++) {
384 #pragma omp parallel for
385 for (index_t x=0; x<num0; x++) {
386 const int baseIndex = first0+x*params.multiplier[0]
387 +(first1+y*params.multiplier[1])*myN0
388 +(first2+z*params.multiplier[2])*myN0*myN1;
389 const int srcIndex=(z0+z_mult*z)*num1*num0
390 +(y0+y_mult*y)*num0
391 +(x0+x_mult*x);
392 if (!isnan(values[srcIndex])) {
393 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
394 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
395 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
396 const int dataIndex = baseIndex+m0
397 +m1*myN0
398 +m2*myN0*myN1;
399 double* dest = out.getSampleDataRW(dataIndex);
400 for (index_t q=0; q<dpp; q++) {
401 *dest++ = values[srcIndex];
402 }
403 }
404 }
405 }
406 }
407 }
408 }
409 }
410 #else
411 throw RipleyException("readNcGrid(): not compiled with netCDF support");
412 #endif
413 }
414
415 void Brick::readBinaryGrid(escript::Data& out, string filename,
416 const ReaderParameters& params) const
417 {
418 // the mapping is not universally correct but should work on our
419 // supported platforms
420 switch (params.dataType) {
421 case DATATYPE_INT32:
422 readBinaryGridImpl<int>(out, filename, params);
423 break;
424 case DATATYPE_FLOAT32:
425 readBinaryGridImpl<float>(out, filename, params);
426 break;
427 case DATATYPE_FLOAT64:
428 readBinaryGridImpl<double>(out, filename, params);
429 break;
430 default:
431 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
432 }
433 }
434
435 template<typename ValueType>
436 void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
437 const ReaderParameters& params) const
438 {
439 // check destination function space
440 int myN0, myN1, myN2;
441 if (out.getFunctionSpace().getTypeCode() == Nodes) {
442 myN0 = m_NN[0];
443 myN1 = m_NN[1];
444 myN2 = m_NN[2];
445 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
446 out.getFunctionSpace().getTypeCode() == ReducedElements) {
447 myN0 = m_NE[0];
448 myN1 = m_NE[1];
449 myN2 = m_NE[2];
450 } else
451 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
452
453 if (params.first.size() != 3)
454 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
455
456 if (params.numValues.size() != 3)
457 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
458
459 if (params.multiplier.size() != 3)
460 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
461 for (size_t i=0; i<params.multiplier.size(); i++)
462 if (params.multiplier[i]<1)
463 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
464
465 // check file existence and size
466 ifstream f(filename.c_str(), ifstream::binary);
467 if (f.fail()) {
468 throw RipleyException("readBinaryGrid(): cannot open file");
469 }
470 f.seekg(0, ios::end);
471 const int numComp = out.getDataPointSize();
472 const int filesize = f.tellg();
473 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
474 if (filesize < reqsize) {
475 f.close();
476 throw RipleyException("readBinaryGrid(): not enough data in file");
477 }
478
479 // check if this rank contributes anything
480 if (params.first[0] >= m_offset[0]+myN0 ||
481 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
482 params.first[1] >= m_offset[1]+myN1 ||
483 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
484 params.first[2] >= m_offset[2]+myN2 ||
485 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
486 f.close();
487 return;
488 }
489
490 // now determine how much this rank has to write
491
492 // first coordinates in data object to write to
493 const int first0 = max(0, params.first[0]-m_offset[0]);
494 const int first1 = max(0, params.first[1]-m_offset[1]);
495 const int first2 = max(0, params.first[2]-m_offset[2]);
496 // indices to first value in file
497 const int idx0 = max(0, m_offset[0]-params.first[0]);
498 const int idx1 = max(0, m_offset[1]-params.first[1]);
499 const int idx2 = max(0, m_offset[2]-params.first[2]);
500 // number of values to read
501 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
502 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
503 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
504
505 out.requireWrite();
506 vector<ValueType> values(num0*numComp);
507 const int dpp = out.getNumDataPointsPerSample();
508
509 for (int z=0; z<num2; z++) {
510 for (int y=0; y<num1; y++) {
511 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
512 +(idx2+z)*params.numValues[0]*params.numValues[1]);
513 f.seekg(fileofs*sizeof(ValueType));
514 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
515
516 for (int x=0; x<num0; x++) {
517 const int baseIndex = first0+x*params.multiplier[0]
518 +(first1+y*params.multiplier[1])*myN0
519 +(first2+z*params.multiplier[2])*myN0*myN1;
520 for (int m2=0; m2<params.multiplier[2]; m2++) {
521 for (int m1=0; m1<params.multiplier[1]; m1++) {
522 for (int m0=0; m0<params.multiplier[0]; m0++) {
523 const int dataIndex = baseIndex+m0
524 +m1*myN0
525 +m2*myN0*myN1;
526 double* dest = out.getSampleDataRW(dataIndex);
527 for (int c=0; c<numComp; c++) {
528 ValueType val = values[x*numComp+c];
529
530 if (params.byteOrder != BYTEORDER_NATIVE) {
531 char* cval = reinterpret_cast<char*>(&val);
532 // this will alter val!!
533 byte_swap32(cval);
534 }
535 if (!std::isnan(val)) {
536 for (int q=0; q<dpp; q++) {
537 *dest++ = static_cast<double>(val);
538 }
539 }
540 }
541 }
542 }
543 }
544 }
545 }
546 }
547
548 f.close();
549 }
550
551 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
552 int byteOrder, int dataType) const
553 {
554 // the mapping is not universally correct but should work on our
555 // supported platforms
556 switch (dataType) {
557 case DATATYPE_INT32:
558 writeBinaryGridImpl<int>(in, filename, byteOrder);
559 break;
560 case DATATYPE_FLOAT32:
561 writeBinaryGridImpl<float>(in, filename, byteOrder);
562 break;
563 case DATATYPE_FLOAT64:
564 writeBinaryGridImpl<double>(in, filename, byteOrder);
565 break;
566 default:
567 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
568 }
569 }
570
571 template<typename ValueType>
572 void Brick::writeBinaryGridImpl(const escript::Data& in,
573 const string& filename, int byteOrder) const
574 {
575 // check function space and determine number of points
576 int myN0, myN1, myN2;
577 int totalN0, totalN1, totalN2;
578 if (in.getFunctionSpace().getTypeCode() == Nodes) {
579 myN0 = m_NN[0];
580 myN1 = m_NN[1];
581 myN2 = m_NN[2];
582 totalN0 = m_gNE[0]+1;
583 totalN1 = m_gNE[1]+1;
584 totalN2 = m_gNE[2]+1;
585 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
586 in.getFunctionSpace().getTypeCode() == ReducedElements) {
587 myN0 = m_NE[0];
588 myN1 = m_NE[1];
589 myN2 = m_NE[2];
590 totalN0 = m_gNE[0];
591 totalN1 = m_gNE[1];
592 totalN2 = m_gNE[2];
593 } else
594 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
595
596 const int numComp = in.getDataPointSize();
597 const int dpp = in.getNumDataPointsPerSample();
598 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
599
600 if (numComp > 1 || dpp > 1)
601 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
602
603 // from here on we know that each sample consists of one value
604 FileWriter fw;
605 fw.openFile(filename, fileSize);
606 MPIBarrier();
607
608 for (index_t z=0; z<myN2; z++) {
609 for (index_t y=0; y<myN1; y++) {
610 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
611 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
612 ostringstream oss;
613
614 for (index_t x=0; x<myN0; x++) {
615 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
616 ValueType fvalue = static_cast<ValueType>(*sample);
617 if (byteOrder == BYTEORDER_NATIVE) {
618 oss.write((char*)&fvalue, sizeof(fvalue));
619 } else {
620 char* value = reinterpret_cast<char*>(&fvalue);
621 oss.write(byte_swap32(value), sizeof(fvalue));
622 }
623 }
624 fw.writeAt(oss, fileofs);
625 }
626 }
627 fw.close();
628 }
629
630 void Brick::dump(const string& fileName) const
631 {
632 #if USE_SILO
633 string fn(fileName);
634 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
635 fn+=".silo";
636 }
637
638 int driver=DB_HDF5;
639 string siloPath;
640 DBfile* dbfile = NULL;
641
642 #ifdef ESYS_MPI
643 PMPIO_baton_t* baton = NULL;
644 const int NUM_SILO_FILES = 1;
645 const char* blockDirFmt = "/block%04d";
646 #endif
647
648 if (m_mpiInfo->size > 1) {
649 #ifdef ESYS_MPI
650 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
651 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
652 PMPIO_DefaultClose, (void*)&driver);
653 // try the fallback driver in case of error
654 if (!baton && driver != DB_PDB) {
655 driver = DB_PDB;
656 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
657 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
658 PMPIO_DefaultClose, (void*)&driver);
659 }
660 if (baton) {
661 char str[64];
662 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
663 siloPath = str;
664 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
665 }
666 #endif
667 } else {
668 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
669 getDescription().c_str(), driver);
670 // try the fallback driver in case of error
671 if (!dbfile && driver != DB_PDB) {
672 driver = DB_PDB;
673 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
674 getDescription().c_str(), driver);
675 }
676 }
677
678 if (!dbfile)
679 throw RipleyException("dump: Could not create Silo file");
680
681 /*
682 if (driver==DB_HDF5) {
683 // gzip level 1 already provides good compression with minimal
684 // performance penalty. Some tests showed that gzip levels >3 performed
685 // rather badly on escript data both in terms of time and space
686 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
687 }
688 */
689
690 boost::scoped_ptr<double> x(new double[m_NN[0]]);
691 boost::scoped_ptr<double> y(new double[m_NN[1]]);
692 boost::scoped_ptr<double> z(new double[m_NN[2]]);
693 double* coords[3] = { x.get(), y.get(), z.get() };
694 #pragma omp parallel
695 {
696 #pragma omp for
697 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
698 coords[0][i0]=getLocalCoordinate(i0, 0);
699 }
700 #pragma omp for
701 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
702 coords[1][i1]=getLocalCoordinate(i1, 1);
703 }
704 #pragma omp for
705 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
706 coords[2][i2]=getLocalCoordinate(i2, 2);
707 }
708 }
709 int* dims = const_cast<int*>(getNumNodesPerDim());
710
711 // write mesh
712 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
713 DB_COLLINEAR, NULL);
714
715 // write node ids
716 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
717 NULL, 0, DB_INT, DB_NODECENT, NULL);
718
719 // write element ids
720 dims = const_cast<int*>(getNumElementsPerDim());
721 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
722 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
723
724 // rank 0 writes multimesh and multivar
725 if (m_mpiInfo->rank == 0) {
726 vector<string> tempstrings;
727 vector<char*> names;
728 for (dim_t i=0; i<m_mpiInfo->size; i++) {
729 stringstream path;
730 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
731 tempstrings.push_back(path.str());
732 names.push_back((char*)tempstrings.back().c_str());
733 }
734 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
735 DBSetDir(dbfile, "/");
736 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
737 &types[0], NULL);
738 tempstrings.clear();
739 names.clear();
740 for (dim_t i=0; i<m_mpiInfo->size; i++) {
741 stringstream path;
742 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
743 tempstrings.push_back(path.str());
744 names.push_back((char*)tempstrings.back().c_str());
745 }
746 types.assign(m_mpiInfo->size, DB_QUADVAR);
747 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
748 &types[0], NULL);
749 tempstrings.clear();
750 names.clear();
751 for (dim_t i=0; i<m_mpiInfo->size; i++) {
752 stringstream path;
753 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
754 tempstrings.push_back(path.str());
755 names.push_back((char*)tempstrings.back().c_str());
756 }
757 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
758 &types[0], NULL);
759 }
760
761 if (m_mpiInfo->size > 1) {
762 #ifdef ESYS_MPI
763 PMPIO_HandOffBaton(baton, dbfile);
764 PMPIO_Finish(baton);
765 #endif
766 } else {
767 DBClose(dbfile);
768 }
769
770 #else // USE_SILO
771 throw RipleyException("dump: no Silo support");
772 #endif
773 }
774
775 const int* Brick::borrowSampleReferenceIDs(int fsType) const
776 {
777 switch (fsType) {
778 case Nodes:
779 case ReducedNodes: //FIXME: reduced
780 return &m_nodeId[0];
781 case DegreesOfFreedom:
782 case ReducedDegreesOfFreedom: //FIXME: reduced
783 return &m_dofId[0];
784 case Elements:
785 case ReducedElements:
786 return &m_elementId[0];
787 case FaceElements:
788 case ReducedFaceElements:
789 return &m_faceId[0];
790 default:
791 break;
792 }
793
794 stringstream msg;
795 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
796 throw RipleyException(msg.str());
797 }
798
799 bool Brick::ownSample(int fsType, index_t id) const
800 {
801 if (getMPISize()==1)
802 return true;
803
804 switch (fsType) {
805 case Nodes:
806 case ReducedNodes: //FIXME: reduced
807 return (m_dofMap[id] < getNumDOF());
808 case DegreesOfFreedom:
809 case ReducedDegreesOfFreedom:
810 return true;
811 case Elements:
812 case ReducedElements:
813 {
814 // check ownership of element's _last_ node
815 const index_t x=id%m_NE[0] + 1;
816 const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
817 const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
818 return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
819 }
820 case FaceElements:
821 case ReducedFaceElements:
822 {
823 // check ownership of face element's last node
824 dim_t n=0;
825 for (size_t i=0; i<6; i++) {
826 n+=m_faceCount[i];
827 if (id<n) {
828 const index_t j=id-n+m_faceCount[i];
829 if (i>=4) { // front or back
830 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
831 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
832 } else if (i>=2) { // bottom or top
833 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
834 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
835 } else { // left or right
836 const index_t first=(i==0 ? 0 : m_NN[0]-1);
837 return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
838 }
839 }
840 }
841 return false;
842 }
843 default:
844 break;
845 }
846
847 stringstream msg;
848 msg << "ownSample: invalid function space type " << fsType;
849 throw RipleyException(msg.str());
850 }
851
852 void Brick::setToNormal(escript::Data& out) const
853 {
854 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
855 out.requireWrite();
856 #pragma omp parallel
857 {
858 if (m_faceOffset[0] > -1) {
859 #pragma omp for nowait
860 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
861 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
862 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
863 // set vector at four quadrature points
864 *o++ = -1.; *o++ = 0.; *o++ = 0.;
865 *o++ = -1.; *o++ = 0.; *o++ = 0.;
866 *o++ = -1.; *o++ = 0.; *o++ = 0.;
867 *o++ = -1.; *o++ = 0.; *o = 0.;
868 }
869 }
870 }
871
872 if (m_faceOffset[1] > -1) {
873 #pragma omp for nowait
874 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
875 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
876 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
877 // set vector at four quadrature points
878 *o++ = 1.; *o++ = 0.; *o++ = 0.;
879 *o++ = 1.; *o++ = 0.; *o++ = 0.;
880 *o++ = 1.; *o++ = 0.; *o++ = 0.;
881 *o++ = 1.; *o++ = 0.; *o = 0.;
882 }
883 }
884 }
885
886 if (m_faceOffset[2] > -1) {
887 #pragma omp for nowait
888 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
889 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
890 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
891 // set vector at four quadrature points
892 *o++ = 0.; *o++ = -1.; *o++ = 0.;
893 *o++ = 0.; *o++ = -1.; *o++ = 0.;
894 *o++ = 0.; *o++ = -1.; *o++ = 0.;
895 *o++ = 0.; *o++ = -1.; *o = 0.;
896 }
897 }
898 }
899
900 if (m_faceOffset[3] > -1) {
901 #pragma omp for nowait
902 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
903 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
904 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
905 // set vector at four quadrature points
906 *o++ = 0.; *o++ = 1.; *o++ = 0.;
907 *o++ = 0.; *o++ = 1.; *o++ = 0.;
908 *o++ = 0.; *o++ = 1.; *o++ = 0.;
909 *o++ = 0.; *o++ = 1.; *o = 0.;
910 }
911 }
912 }
913
914 if (m_faceOffset[4] > -1) {
915 #pragma omp for nowait
916 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
917 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
918 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
919 // set vector at four quadrature points
920 *o++ = 0.; *o++ = 0.; *o++ = -1.;
921 *o++ = 0.; *o++ = 0.; *o++ = -1.;
922 *o++ = 0.; *o++ = 0.; *o++ = -1.;
923 *o++ = 0.; *o++ = 0.; *o = -1.;
924 }
925 }
926 }
927
928 if (m_faceOffset[5] > -1) {
929 #pragma omp for nowait
930 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
931 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
932 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
933 // set vector at four quadrature points
934 *o++ = 0.; *o++ = 0.; *o++ = 1.;
935 *o++ = 0.; *o++ = 0.; *o++ = 1.;
936 *o++ = 0.; *o++ = 0.; *o++ = 1.;
937 *o++ = 0.; *o++ = 0.; *o = 1.;
938 }
939 }
940 }
941 } // end of parallel section
942 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
943 out.requireWrite();
944 #pragma omp parallel
945 {
946 if (m_faceOffset[0] > -1) {
947 #pragma omp for nowait
948 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
949 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
950 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
951 *o++ = -1.;
952 *o++ = 0.;
953 *o = 0.;
954 }
955 }
956 }
957
958 if (m_faceOffset[1] > -1) {
959 #pragma omp for nowait
960 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
961 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
962 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
963 *o++ = 1.;
964 *o++ = 0.;
965 *o = 0.;
966 }
967 }
968 }
969
970 if (m_faceOffset[2] > -1) {
971 #pragma omp for nowait
972 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
973 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
974 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
975 *o++ = 0.;
976 *o++ = -1.;
977 *o = 0.;
978 }
979 }
980 }
981
982 if (m_faceOffset[3] > -1) {
983 #pragma omp for nowait
984 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
985 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
986 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
987 *o++ = 0.;
988 *o++ = 1.;
989 *o = 0.;
990 }
991 }
992 }
993
994 if (m_faceOffset[4] > -1) {
995 #pragma omp for nowait
996 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
997 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
998 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
999 *o++ = 0.;
1000 *o++ = 0.;
1001 *o = -1.;
1002 }
1003 }
1004 }
1005
1006 if (m_faceOffset[5] > -1) {
1007 #pragma omp for nowait
1008 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1009 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1010 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1011 *o++ = 0.;
1012 *o++ = 0.;
1013 *o = 1.;
1014 }
1015 }
1016 }
1017 } // end of parallel section
1018
1019 } else {
1020 stringstream msg;
1021 msg << "setToNormal: invalid function space type "
1022 << out.getFunctionSpace().getTypeCode();
1023 throw RipleyException(msg.str());
1024 }
1025 }
1026
1027 void Brick::setToSize(escript::Data& out) const
1028 {
1029 if (out.getFunctionSpace().getTypeCode() == Elements
1030 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1031 out.requireWrite();
1032 const dim_t numQuad=out.getNumDataPointsPerSample();
1033 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1034 #pragma omp parallel for
1035 for (index_t k = 0; k < getNumElements(); ++k) {
1036 double* o = out.getSampleDataRW(k);
1037 fill(o, o+numQuad, size);
1038 }
1039 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1040 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1041 out.requireWrite();
1042 const dim_t numQuad=out.getNumDataPointsPerSample();
1043 #pragma omp parallel
1044 {
1045 if (m_faceOffset[0] > -1) {
1046 const double size=min(m_dx[1],m_dx[2]);
1047 #pragma omp for nowait
1048 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1049 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1050 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1051 fill(o, o+numQuad, size);
1052 }
1053 }
1054 }
1055
1056 if (m_faceOffset[1] > -1) {
1057 const double size=min(m_dx[1],m_dx[2]);
1058 #pragma omp for nowait
1059 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1060 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1061 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1062 fill(o, o+numQuad, size);
1063 }
1064 }
1065 }
1066
1067 if (m_faceOffset[2] > -1) {
1068 const double size=min(m_dx[0],m_dx[2]);
1069 #pragma omp for nowait
1070 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1071 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1072 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1073 fill(o, o+numQuad, size);
1074 }
1075 }
1076 }
1077
1078 if (m_faceOffset[3] > -1) {
1079 const double size=min(m_dx[0],m_dx[2]);
1080 #pragma omp for nowait
1081 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1082 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1083 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1084 fill(o, o+numQuad, size);
1085 }
1086 }
1087 }
1088
1089 if (m_faceOffset[4] > -1) {
1090 const double size=min(m_dx[0],m_dx[1]);
1091 #pragma omp for nowait
1092 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1093 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1094 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1095 fill(o, o+numQuad, size);
1096 }
1097 }
1098 }
1099
1100 if (m_faceOffset[5] > -1) {
1101 const double size=min(m_dx[0],m_dx[1]);
1102 #pragma omp for nowait
1103 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1104 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1105 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1106 fill(o, o+numQuad, size);
1107 }
1108 }
1109 }
1110 } // end of parallel section
1111
1112 } else {
1113 stringstream msg;
1114 msg << "setToSize: invalid function space type "
1115 << out.getFunctionSpace().getTypeCode();
1116 throw RipleyException(msg.str());
1117 }
1118 }
1119
1120 void Brick::Print_Mesh_Info(const bool full) const
1121 {
1122 RipleyDomain::Print_Mesh_Info(full);
1123 if (full) {
1124 cout << " Id Coordinates" << endl;
1125 cout.precision(15);
1126 cout.setf(ios::scientific, ios::floatfield);
1127 for (index_t i=0; i < getNumNodes(); i++) {
1128 cout << " " << setw(5) << m_nodeId[i]
1129 << " " << getLocalCoordinate(i%m_NN[0], 0)
1130 << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1131 << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1132 }
1133 }
1134 }
1135
1136
1137 //protected
1138 void Brick::assembleCoordinates(escript::Data& arg) const
1139 {
1140 escriptDataC x = arg.getDataC();
1141 int numDim = m_numDim;
1142 if (!isDataPointShapeEqual(&x, 1, &numDim))
1143 throw RipleyException("setToX: Invalid Data object shape");
1144 if (!numSamplesEqual(&x, 1, getNumNodes()))
1145 throw RipleyException("setToX: Illegal number of samples in Data object");
1146
1147 arg.requireWrite();
1148 #pragma omp parallel for
1149 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1150 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1151 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1152 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1153 point[0] = getLocalCoordinate(i0, 0);
1154 point[1] = getLocalCoordinate(i1, 1);
1155 point[2] = getLocalCoordinate(i2, 2);
1156 }
1157 }
1158 }
1159 }
1160
1161 //protected
1162 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1163 {
1164 const dim_t numComp = in.getDataPointSize();
1165 const double C0 = .044658198738520451079;
1166 const double C1 = .16666666666666666667;
1167 const double C2 = .21132486540518711775;
1168 const double C3 = .25;
1169 const double C4 = .5;
1170 const double C5 = .62200846792814621559;
1171 const double C6 = .78867513459481288225;
1172
1173 if (out.getFunctionSpace().getTypeCode() == Elements) {
1174 out.requireWrite();
1175 #pragma omp parallel
1176 {
1177 vector<double> f_000(numComp);
1178 vector<double> f_001(numComp);
1179 vector<double> f_010(numComp);
1180 vector<double> f_011(numComp);
1181 vector<double> f_100(numComp);
1182 vector<double> f_101(numComp);
1183 vector<double> f_110(numComp);
1184 vector<double> f_111(numComp);
1185 #pragma omp for
1186 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1187 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1188 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1189 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1190 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1191 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1192 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1193 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1194 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1195 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1196 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1197 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1198 for (index_t i=0; i < numComp; ++i) {
1199 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1200 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1201 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1202 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1203 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1204 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1205 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1206 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1207 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1208 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1209 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1210 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1211 o[INDEX3(i,0,0,numComp,3)] = V0;
1212 o[INDEX3(i,1,0,numComp,3)] = V4;
1213 o[INDEX3(i,2,0,numComp,3)] = V8;
1214 o[INDEX3(i,0,1,numComp,3)] = V0;
1215 o[INDEX3(i,1,1,numComp,3)] = V5;
1216 o[INDEX3(i,2,1,numComp,3)] = V9;
1217 o[INDEX3(i,0,2,numComp,3)] = V1;
1218 o[INDEX3(i,1,2,numComp,3)] = V4;
1219 o[INDEX3(i,2,2,numComp,3)] = V10;
1220 o[INDEX3(i,0,3,numComp,3)] = V1;
1221 o[INDEX3(i,1,3,numComp,3)] = V5;
1222 o[INDEX3(i,2,3,numComp,3)] = V11;
1223 o[INDEX3(i,0,4,numComp,3)] = V2;
1224 o[INDEX3(i,1,4,numComp,3)] = V6;
1225 o[INDEX3(i,2,4,numComp,3)] = V8;
1226 o[INDEX3(i,0,5,numComp,3)] = V2;
1227 o[INDEX3(i,1,5,numComp,3)] = V7;
1228 o[INDEX3(i,2,5,numComp,3)] = V9;
1229 o[INDEX3(i,0,6,numComp,3)] = V3;
1230 o[INDEX3(i,1,6,numComp,3)] = V6;
1231 o[INDEX3(i,2,6,numComp,3)] = V10;
1232 o[INDEX3(i,0,7,numComp,3)] = V3;
1233 o[INDEX3(i,1,7,numComp,3)] = V7;
1234 o[INDEX3(i,2,7,numComp,3)] = V11;
1235 } // end of component loop i
1236 } // end of k0 loop
1237 } // end of k1 loop
1238 } // end of k2 loop
1239 } // end of parallel section
1240 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1241 out.requireWrite();
1242 #pragma omp parallel
1243 {
1244 vector<double> f_000(numComp);
1245 vector<double> f_001(numComp);
1246 vector<double> f_010(numComp);
1247 vector<double> f_011(numComp);
1248 vector<double> f_100(numComp);
1249 vector<double> f_101(numComp);
1250 vector<double> f_110(numComp);
1251 vector<double> f_111(numComp);
1252 #pragma omp for
1253 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1254 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1255 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1256 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1257 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1258 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1259 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1260 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1261 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1262 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1263 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1264 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1265 for (index_t i=0; i < numComp; ++i) {
1266 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1267 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1268 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1269 } // end of component loop i
1270 } // end of k0 loop
1271 } // end of k1 loop
1272 } // end of k2 loop
1273 } // end of parallel section
1274 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1275 out.requireWrite();
1276 #pragma omp parallel
1277 {
1278 vector<double> f_000(numComp);
1279 vector<double> f_001(numComp);
1280 vector<double> f_010(numComp);
1281 vector<double> f_011(numComp);
1282 vector<double> f_100(numComp);
1283 vector<double> f_101(numComp);
1284 vector<double> f_110(numComp);
1285 vector<double> f_111(numComp);
1286 if (m_faceOffset[0] > -1) {
1287 #pragma omp for nowait
1288 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1289 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1290 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1291 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1292 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1293 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1294 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1295 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1296 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1297 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1298 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1299 for (index_t i=0; i < numComp; ++i) {
1300 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1301 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1302 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1303 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1304 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1305 o[INDEX3(i,1,0,numComp,3)] = V0;
1306 o[INDEX3(i,2,0,numComp,3)] = V2;
1307 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1308 o[INDEX3(i,1,1,numComp,3)] = V0;
1309 o[INDEX3(i,2,1,numComp,3)] = V3;
1310 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1311 o[INDEX3(i,1,2,numComp,3)] = V1;
1312 o[INDEX3(i,2,2,numComp,3)] = V2;
1313 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1314 o[INDEX3(i,1,3,numComp,3)] = V1;
1315 o[INDEX3(i,2,3,numComp,3)] = V3;
1316 } // end of component loop i
1317 } // end of k1 loop
1318 } // end of k2 loop
1319 } // end of face 0
1320 if (m_faceOffset[1] > -1) {
1321 #pragma omp for nowait
1322 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1323 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1324 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1325 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1326 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1327 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1328 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1329 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1330 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1331 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1332 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1333 for (index_t i=0; i < numComp; ++i) {
1334 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1335 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1336 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1337 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1338 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1339 o[INDEX3(i,1,0,numComp,3)] = V0;
1340 o[INDEX3(i,2,0,numComp,3)] = V2;
1341 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1342 o[INDEX3(i,1,1,numComp,3)] = V0;
1343 o[INDEX3(i,2,1,numComp,3)] = V3;
1344 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1345 o[INDEX3(i,1,2,numComp,3)] = V1;
1346 o[INDEX3(i,2,2,numComp,3)] = V2;
1347 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1348 o[INDEX3(i,1,3,numComp,3)] = V1;
1349 o[INDEX3(i,2,3,numComp,3)] = V3;
1350 } // end of component loop i
1351 } // end of k1 loop
1352 } // end of k2 loop
1353 } // end of face 1
1354 if (m_faceOffset[2] > -1) {
1355 #pragma omp for nowait
1356 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1357 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1358 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1359 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1360 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1361 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1362 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1363 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1364 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1365 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1366 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1367 for (index_t i=0; i < numComp; ++i) {
1368 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1369 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1370 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1371 o[INDEX3(i,0,0,numComp,3)] = V0;
1372 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1373 o[INDEX3(i,2,0,numComp,3)] = V1;
1374 o[INDEX3(i,0,1,numComp,3)] = V0;
1375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1376 o[INDEX3(i,2,1,numComp,3)] = V2;
1377 o[INDEX3(i,0,2,numComp,3)] = V0;
1378 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1379 o[INDEX3(i,2,2,numComp,3)] = V1;
1380 o[INDEX3(i,0,3,numComp,3)] = V0;
1381 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1382 o[INDEX3(i,2,3,numComp,3)] = V2;
1383 } // end of component loop i
1384 } // end of k0 loop
1385 } // end of k2 loop
1386 } // end of face 2
1387 if (m_faceOffset[3] > -1) {
1388 #pragma omp for nowait
1389 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1390 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1391 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1392 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1393 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1394 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1395 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1396 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1397 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1398 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1399 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1400 for (index_t i=0; i < numComp; ++i) {
1401 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1402 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1403 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1404 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1405 o[INDEX3(i,0,0,numComp,3)] = V0;
1406 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1407 o[INDEX3(i,2,0,numComp,3)] = V2;
1408 o[INDEX3(i,0,1,numComp,3)] = V0;
1409 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1410 o[INDEX3(i,2,1,numComp,3)] = V3;
1411 o[INDEX3(i,0,2,numComp,3)] = V1;
1412 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1413 o[INDEX3(i,2,2,numComp,3)] = V2;
1414 o[INDEX3(i,0,3,numComp,3)] = V1;
1415 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1416 o[INDEX3(i,2,3,numComp,3)] = V3;
1417 } // end of component loop i
1418 } // end of k0 loop
1419 } // end of k2 loop
1420 } // end of face 3
1421 if (m_faceOffset[4] > -1) {
1422 #pragma omp for nowait
1423 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1424 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1425 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1426 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1427 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1428 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1429 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1430 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1431 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1432 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1433 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1434 for (index_t i=0; i < numComp; ++i) {
1435 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1436 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1437 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1438 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1439 o[INDEX3(i,0,0,numComp,3)] = V0;
1440 o[INDEX3(i,1,0,numComp,3)] = V2;
1441 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1442 o[INDEX3(i,0,1,numComp,3)] = V0;
1443 o[INDEX3(i,1,1,numComp,3)] = V3;
1444 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1445 o[INDEX3(i,0,2,numComp,3)] = V1;
1446 o[INDEX3(i,1,2,numComp,3)] = V2;
1447 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1448 o[INDEX3(i,0,3,numComp,3)] = V1;
1449 o[INDEX3(i,1,3,numComp,3)] = V3;
1450 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1451 } // end of component loop i
1452 } // end of k0 loop
1453 } // end of k1 loop
1454 } // end of face 4
1455 if (m_faceOffset[5] > -1) {
1456 #pragma omp for nowait
1457 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1458 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1459 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1460 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1461 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1462 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1463 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1464 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1465 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1466 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1467 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1468 for (index_t i=0; i < numComp; ++i) {
1469 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1470 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1471 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1472 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1473 o[INDEX3(i,0,0,numComp,3)] = V0;
1474 o[INDEX3(i,1,0,numComp,3)] = V2;
1475 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1476 o[INDEX3(i,0,1,numComp,3)] = V0;
1477 o[INDEX3(i,1,1,numComp,3)] = V3;
1478 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1479 o[INDEX3(i,0,2,numComp,3)] = V1;
1480 o[INDEX3(i,1,2,numComp,3)] = V2;
1481 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1482 o[INDEX3(i,0,3,numComp,3)] = V1;
1483 o[INDEX3(i,1,3,numComp,3)] = V3;
1484 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1485 } // end of component loop i
1486 } // end of k0 loop
1487 } // end of k1 loop
1488 } // end of face 5
1489 } // end of parallel section
1490 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1491 out.requireWrite();
1492 #pragma omp parallel
1493 {
1494 vector<double> f_000(numComp);
1495 vector<double> f_001(numComp);
1496 vector<double> f_010(numComp);
1497 vector<double> f_011(numComp);
1498 vector<double> f_100(numComp);
1499 vector<double> f_101(numComp);
1500 vector<double> f_110(numComp);
1501 vector<double> f_111(numComp);
1502 if (m_faceOffset[0] > -1) {
1503 #pragma omp for nowait
1504 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1505 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1506 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1507 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1508 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1509 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1510 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1511 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1512 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1513 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1514 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1515 for (index_t i=0; i < numComp; ++i) {
1516 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1517 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1518 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1519 } // end of component loop i
1520 } // end of k1 loop
1521 } // end of k2 loop
1522 } // end of face 0
1523 if (m_faceOffset[1] > -1) {
1524 #pragma omp for nowait
1525 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1526 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1527 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1528 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1529 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1530 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1531 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1532 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1533 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1534 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1535 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1536 for (index_t i=0; i < numComp; ++i) {
1537 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1538 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1539 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1540 } // end of component loop i
1541 } // end of k1 loop
1542 } // end of k2 loop
1543 } // end of face 1
1544 if (m_faceOffset[2] > -1) {
1545 #pragma omp for nowait
1546 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1547 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1548 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1549 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1550 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1551 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1552 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1553 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1554 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1555 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1556 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1557 for (index_t i=0; i < numComp; ++i) {
1558 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / m_dx[0];
1559 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1560 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / m_dx[2];
1561 } // end of component loop i
1562 } // end of k0 loop
1563 } // end of k2 loop
1564 } // end of face 2
1565 if (m_faceOffset[3] > -1) {
1566 #pragma omp for nowait
1567 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1568 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1569 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1570 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1571 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1572 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1573 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1574 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1575 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1576 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1577 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1578 for (index_t i=0; i < numComp; ++i) {
1579 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / m_dx[0];
1580 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1581 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / m_dx[2];
1582 } // end of component loop i
1583 } // end of k0 loop
1584 } // end of k2 loop
1585 } // end of face 3
1586 if (m_faceOffset[4] > -1) {
1587 #pragma omp for nowait
1588 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1589 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1590 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1591 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1592 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1593 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1594 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1595 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1596 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1597 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1598 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1599 for (index_t i=0; i < numComp; ++i) {
1600 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / m_dx[0];
1601 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / m_dx[1];
1602 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1603 } // end of component loop i
1604 } // end of k0 loop
1605 } // end of k1 loop
1606 } // end of face 4
1607 if (m_faceOffset[5] > -1) {
1608 #pragma omp for nowait
1609 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1610 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1611 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1612 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1613 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1614 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1615 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1616 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1617 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1618 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1619 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1620 for (index_t i=0; i < numComp; ++i) {
1621 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / m_dx[0];
1622 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / m_dx[1];
1623 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1624 } // end of component loop i
1625 } // end of k0 loop
1626 } // end of k1 loop
1627 } // end of face 5
1628 } // end of parallel section
1629 }
1630 }
1631
1632 //protected
1633 void Brick::assembleIntegrate(vector<double>& integrals, const escript::Data& arg) const
1634 {
1635 const dim_t numComp = arg.getDataPointSize();
1636 const index_t left = (m_offset[0]==0 ? 0 : 1);
1637 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1638 const index_t front = (m_offset[2]==0 ? 0 : 1);
1639 const int fs = arg.getFunctionSpace().getTypeCode();
1640 if (fs == Elements && arg.actsExpanded()) {
1641 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2]/8.;
1642 #pragma omp parallel
1643 {
1644 vector<double> int_local(numComp, 0);
1645 #pragma omp for nowait
1646 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1647 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1648 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1649 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1650 for (index_t i=0; i < numComp; ++i) {
1651 const double f_0 = f[INDEX2(i,0,numComp)];
1652 const double f_1 = f[INDEX2(i,1,numComp)];
1653 const double f_2 = f[INDEX2(i,2,numComp)];
1654 const double f_3 = f[INDEX2(i,3,numComp)];
1655 const double f_4 = f[INDEX2(i,4,numComp)];
1656 const double f_5 = f[INDEX2(i,5,numComp)];
1657 const double f_6 = f[INDEX2(i,6,numComp)];
1658 const double f_7 = f[INDEX2(i,7,numComp)];
1659 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
1660 } // end of component loop i
1661 } // end of k0 loop
1662 } // end of k1 loop
1663 } // end of k2 loop
1664
1665 #pragma omp critical
1666 for (index_t i=0; i<numComp; i++)
1667 integrals[i]+=int_local[i];
1668 } // end of parallel section
1669
1670 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1671 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2];
1672 #pragma omp parallel
1673 {
1674 vector<double> int_local(numComp, 0);
1675 #pragma omp for nowait
1676 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1677 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1678 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1679 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1680 for (index_t i=0; i < numComp; ++i) {
1681 int_local[i]+=f[i]*w_0;
1682 } // end of component loop i
1683 } // end of k0 loop
1684 } // end of k1 loop
1685 } // end of k2 loop
1686
1687 #pragma omp critical
1688 for (index_t i=0; i<numComp; i++)
1689 integrals[i]+=int_local[i];
1690 } // end of parallel section
1691
1692 } else if (fs == FaceElements && arg.actsExpanded()) {
1693 const double w_0 = m_dx[1]*m_dx[2]/4.;
1694 const double w_1 = m_dx[0]*m_dx[2]/4.;
1695 const double w_2 = m_dx[0]*m_dx[1]/4.;
1696 #pragma omp parallel
1697 {
1698 vector<double> int_local(numComp, 0);
1699 if (m_faceOffset[0] > -1) {
1700 #pragma omp for nowait
1701 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1702 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1703 const double* f = arg.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1704 for (index_t i=0; i < numComp; ++i) {
1705 const double f_0 = f[INDEX2(i,0,numComp)];
1706 const double f_1 = f[INDEX2(i,1,numComp)];
1707 const double f_2 = f[INDEX2(i,2,numComp)];
1708 const double f_3 = f[INDEX2(i,3,numComp)];
1709 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
1710 } // end of component loop i
1711 } // end of k1 loop
1712 } // end of k2 loop
1713 }
1714
1715 if (m_faceOffset[1] > -1) {
1716 #pragma omp for nowait
1717 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1718 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1719 const double* f = arg.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1720 for (index_t i=0; i < numComp; ++i) {
1721 const double f_0 = f[INDEX2(i,0,numComp)];
1722 const double f_1 = f[INDEX2(i,1,numComp)];
1723 const double f_2 = f[INDEX2(i,2,numComp)];
1724 const double f_3 = f[INDEX2(i,3,numComp)];
1725 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
1726 } // end of component loop i
1727 } // end of k1 loop
1728 } // end of k2 loop
1729 }
1730
1731 if (m_faceOffset[2] > -1) {
1732 #pragma omp for nowait
1733 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1734 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1735 const double* f = arg.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1736 for (index_t i=0; i < numComp; ++i) {
1737 const double f_0 = f[INDEX2(i,0,numComp)];
1738 const double f_1 = f[INDEX2(i,1,numComp)];
1739 const double f_2 = f[INDEX2(i,2,numComp)];
1740 const double f_3 = f[INDEX2(i,3,numComp)];
1741 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
1742 } // end of component loop i
1743 } // end of k1 loop
1744 } // end of k2 loop
1745 }
1746
1747 if (m_faceOffset[3] > -1) {
1748 #pragma omp for nowait
1749 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1750 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1751 const double* f = arg.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1752 for (index_t i=0; i < numComp; ++i) {
1753 const double f_0 = f[INDEX2(i,0,numComp)];
1754 const double f_1 = f[INDEX2(i,1,numComp)];
1755 const double f_2 = f[INDEX2(i,2,numComp)];
1756 const double f_3 = f[INDEX2(i,3,numComp)];
1757 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
1758 } // end of component loop i
1759 } // end of k1 loop
1760 } // end of k2 loop
1761 }
1762
1763 if (m_faceOffset[4] > -1) {
1764 #pragma omp for nowait
1765 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1766 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1767 const double* f = arg.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1768 for (index_t i=0; i < numComp; ++i) {
1769 const double f_0 = f[INDEX2(i,0,numComp)];
1770 const double f_1 = f[INDEX2(i,1,numComp)];
1771 const double f_2 = f[INDEX2(i,2,numComp)];
1772 const double f_3 = f[INDEX2(i,3,numComp)];
1773 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
1774 } // end of component loop i
1775 } // end of k1 loop
1776 } // end of k2 loop
1777 }
1778
1779 if (m_faceOffset[5] > -1) {
1780 #pragma omp for nowait
1781 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1782 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1783 const double* f = arg.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1784 for (index_t i=0; i < numComp; ++i) {
1785 const double f_0 = f[INDEX2(i,0,numComp)];
1786 const double f_1 = f[INDEX2(i,1,numComp)];
1787 const double f_2 = f[INDEX2(i,2,numComp)];
1788 const double f_3 = f[INDEX2(i,3,numComp)];
1789 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
1790 } // end of component loop i
1791 } // end of k1 loop
1792 } // end of k2 loop
1793 }
1794
1795 #pragma omp critical
1796 for (index_t i=0; i<numComp; i++)
1797 integrals[i]+=int_local[i];
1798 } // end of parallel section
1799
1800 } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded())) {
1801 const double w_0 = m_dx[1]*m_dx[2];
1802 const double w_1 = m_dx[0]*m_dx[2];