/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4712 - (show annotations)
Wed Feb 26 04:08:41 2014 UTC (5 years, 1 month ago) by sshaw
Original Path: trunk/ripley/src/Brick.cpp
File size: 157468 byte(s)
adding skeleton of fast Lame assemblers
adjusted automatic ripley domain subdivision ( should solve issue #94 )
more tab->space conversions

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <ripley/Brick.h>
18 #include <paso/SystemMatrix.h>
19 #include <esysUtils/esysFileWriter.h>
20 #include <ripley/DefaultAssembler3D.h>
21 #include <ripley/WaveAssembler3D.h>
22 #include <ripley/LameAssembler3D.h>
23 #include <ripley/domainhelpers.h>
24 #include <boost/scoped_array.hpp>
25
26 #ifdef USE_NETCDF
27 #include <netcdfcpp.h>
28 #endif
29
30 #if USE_SILO
31 #include <silo.h>
32 #ifdef ESYS_MPI
33 #include <pmpio.h>
34 #endif
35 #endif
36
37 #include <iomanip>
38
39 #include "esysUtils/EsysRandom.h"
40 #include "blocktools.h"
41
42
43 using namespace std;
44 using esysUtils::FileWriter;
45
46 namespace ripley {
47
48 int indexOfMax(int a, int b, int c) {
49 if (a > b) {
50 if (c > a) {
51 return 2;
52 }
53 return 0;
54 } else if (b > c) {
55 return 1;
56 }
57 return 2;
58 }
59
60 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
61 double x1, double y1, double z1, int d0, int d1, int d2,
62 const std::vector<double>& points, const std::vector<int>& tags,
63 const simap_t& tagnamestonums) :
64 RipleyDomain(3)
65 {
66 // ignore subdivision parameters for serial run
67 if (m_mpiInfo->size == 1) {
68 d0=1;
69 d1=1;
70 d2=1;
71 }
72 bool warn=false;
73
74 std::vector<int> factors;
75 int ranks = m_mpiInfo->size;
76 int epr[3] = {n0,n1,n2};
77 int d[3] = {d0,d1,d2};
78 if (d0<=0 || d1<=0 || d2<=0) {
79 for (int i = 0; i < 3; i++) {
80 if (d[i] < 1) {
81 d[i] = 1;
82 continue;
83 }
84 epr[i] = -1; // can no longer be max
85 if (ranks % d[i] != 0) {
86 throw RipleyException("Invalid number of spatial subdivisions");
87 }
88 //remove
89 ranks /= d[i];
90 }
91 factorise(factors, ranks);
92 if (factors.size() != 0) {
93 warn = true;
94 }
95 }
96 while (factors.size() > 0) {
97 int i = indexOfMax(epr[0],epr[1],epr[2]);
98 int f = factors.back();
99 factors.pop_back();
100 d[i] *= f;
101 epr[i] /= f;
102 }
103 d0 = d[0]; d1 = d[1]; d2 = d[2];
104
105 // ensure number of subdivisions is valid and nodes can be distributed
106 // among number of ranks
107 if (d0*d1*d2 != m_mpiInfo->size){
108 throw RipleyException("Invalid number of spatial subdivisions");
109 }
110 if (warn) {
111 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
112 << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
113 }
114
115 double l0 = x1-x0;
116 double l1 = y1-y0;
117 double l2 = z1-z0;
118 m_dx[0] = l0/n0;
119 m_dx[1] = l1/n1;
120 m_dx[2] = l2/n2;
121
122 if ((n0+1)%d0 > 0) {
123 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
124 l0=m_dx[0]*n0;
125 cout << "Warning: Adjusted number of elements and length. N0="
126 << n0 << ", l0=" << l0 << endl;
127 }
128 if ((n1+1)%d1 > 0) {
129 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
130 l1=m_dx[1]*n1;
131 cout << "Warning: Adjusted number of elements and length. N1="
132 << n1 << ", l1=" << l1 << endl;
133 }
134 if ((n2+1)%d2 > 0) {
135 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
136 l2=m_dx[2]*n2;
137 cout << "Warning: Adjusted number of elements and length. N2="
138 << n2 << ", l2=" << l2 << endl;
139 }
140
141 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
142 throw RipleyException("Too few elements for the number of ranks");
143
144 m_gNE[0] = n0;
145 m_gNE[1] = n1;
146 m_gNE[2] = n2;
147 m_origin[0] = x0;
148 m_origin[1] = y0;
149 m_origin[2] = z0;
150 m_length[0] = l0;
151 m_length[1] = l1;
152 m_length[2] = l2;
153 m_NX[0] = d0;
154 m_NX[1] = d1;
155 m_NX[2] = d2;
156
157 // local number of elements (including overlap)
158 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
159 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
160 m_NE[0]++;
161 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
162 m_ownNE[0]--;
163
164 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
165 if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
166 m_NE[1]++;
167 else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
168 m_ownNE[1]--;
169
170 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
171 if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
172 m_NE[2]++;
173 else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
174 m_ownNE[2]--;
175
176 // local number of nodes
177 m_NN[0] = m_NE[0]+1;
178 m_NN[1] = m_NE[1]+1;
179 m_NN[2] = m_NE[2]+1;
180
181 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
182 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
183 if (m_offset[0] > 0)
184 m_offset[0]--;
185 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
186 if (m_offset[1] > 0)
187 m_offset[1]--;
188 m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
189 if (m_offset[2] > 0)
190 m_offset[2]--;
191
192 populateSampleIds();
193 createPattern();
194
195 assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
196 for (map<string, int>::const_iterator i = tagnamestonums.begin();
197 i != tagnamestonums.end(); i++) {
198 setTagMap(i->first, i->second);
199 }
200 addPoints(tags.size(), &points[0], &tags[0]);
201 }
202
203
204 Brick::~Brick()
205 {
206 Paso_SystemMatrixPattern_free(m_pattern);
207 Paso_Connector_free(m_connector);
208 delete assembler;
209 }
210
211 string Brick::getDescription() const
212 {
213 return "ripley::Brick";
214 }
215
216 bool Brick::operator==(const AbstractDomain& other) const
217 {
218 const Brick* o=dynamic_cast<const Brick*>(&other);
219 if (o) {
220 return (RipleyDomain::operator==(other) &&
221 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
222 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
223 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
224 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
225 }
226
227 return false;
228 }
229
230 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
231 const ReaderParameters& params) const
232 {
233 #ifdef USE_NETCDF
234 // check destination function space
235 int myN0, myN1, myN2;
236 if (out.getFunctionSpace().getTypeCode() == Nodes) {
237 myN0 = m_NN[0];
238 myN1 = m_NN[1];
239 myN2 = m_NN[2];
240 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
241 out.getFunctionSpace().getTypeCode() == ReducedElements) {
242 myN0 = m_NE[0];
243 myN1 = m_NE[1];
244 myN2 = m_NE[2];
245 } else
246 throw RipleyException("readNcGrid(): invalid function space for output data object");
247
248 if (params.first.size() != 3)
249 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
250
251 if (params.numValues.size() != 3)
252 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
253
254 if (params.multiplier.size() != 3)
255 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
256 for (size_t i=0; i<params.multiplier.size(); i++)
257 if (params.multiplier[i]<1)
258 throw RipleyException("readNcGrid(): all multipliers must be positive");
259
260 // check file existence and size
261 NcFile f(filename.c_str(), NcFile::ReadOnly);
262 if (!f.is_valid())
263 throw RipleyException("readNcGrid(): cannot open file");
264
265 NcVar* var = f.get_var(varname.c_str());
266 if (!var)
267 throw RipleyException("readNcGrid(): invalid variable name");
268
269 // TODO: rank>0 data support
270 const int numComp = out.getDataPointSize();
271 if (numComp > 1)
272 throw RipleyException("readNcGrid(): only scalar data supported");
273
274 const int dims = var->num_dims();
275 boost::scoped_array<long> edges(var->edges());
276
277 // is this a slice of the data object (dims!=3)?
278 // note the expected ordering of edges (as in numpy: z,y,x)
279 if ( (dims==3 && (params.numValues[2] > edges[0] ||
280 params.numValues[1] > edges[1] ||
281 params.numValues[0] > edges[2]))
282 || (dims==2 && params.numValues[2]>1)
283 || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
284 throw RipleyException("readNcGrid(): not enough data in file");
285 }
286
287 // check if this rank contributes anything
288 if (params.first[0] >= m_offset[0]+myN0 ||
289 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
290 params.first[1] >= m_offset[1]+myN1 ||
291 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
292 params.first[2] >= m_offset[2]+myN2 ||
293 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
294 return;
295 }
296
297 // now determine how much this rank has to write
298
299 // first coordinates in data object to write to
300 const int first0 = max(0, params.first[0]-m_offset[0]);
301 const int first1 = max(0, params.first[1]-m_offset[1]);
302 const int first2 = max(0, params.first[2]-m_offset[2]);
303 // indices to first value in file (not accounting for reverse yet)
304 int idx0 = max(0, m_offset[0]-params.first[0]);
305 int idx1 = max(0, m_offset[1]-params.first[1]);
306 int idx2 = max(0, m_offset[2]-params.first[2]);
307 // number of values to read
308 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
309 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
310 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
311
312 // make sure we read the right block if going backwards through file
313 if (params.reverse[0])
314 idx0 = edges[dims-1]-num0-idx0;
315 if (dims>1 && params.reverse[1])
316 idx1 = edges[dims-2]-num1-idx1;
317 if (dims>2 && params.reverse[2])
318 idx2 = edges[dims-3]-num2-idx2;
319
320
321 vector<double> values(num0*num1*num2);
322 if (dims==3) {
323 var->set_cur(idx2, idx1, idx0);
324 var->get(&values[0], num2, num1, num0);
325 } else if (dims==2) {
326 var->set_cur(idx1, idx0);
327 var->get(&values[0], num1, num0);
328 } else {
329 var->set_cur(idx0);
330 var->get(&values[0], num0);
331 }
332
333 const int dpp = out.getNumDataPointsPerSample();
334 out.requireWrite();
335
336 // helpers for reversing
337 const int x0 = (params.reverse[0] ? num0-1 : 0);
338 const int x_mult = (params.reverse[0] ? -1 : 1);
339 const int y0 = (params.reverse[1] ? num1-1 : 0);
340 const int y_mult = (params.reverse[1] ? -1 : 1);
341 const int z0 = (params.reverse[2] ? num2-1 : 0);
342 const int z_mult = (params.reverse[2] ? -1 : 1);
343
344 for (index_t z=0; z<num2; z++) {
345 for (index_t y=0; y<num1; y++) {
346 #pragma omp parallel for
347 for (index_t x=0; x<num0; x++) {
348 const int baseIndex = first0+x*params.multiplier[0]
349 +(first1+y*params.multiplier[1])*myN0
350 +(first2+z*params.multiplier[2])*myN0*myN1;
351 const int srcIndex=(z0+z_mult*z)*num1*num0
352 +(y0+y_mult*y)*num0
353 +(x0+x_mult*x);
354 if (!isnan(values[srcIndex])) {
355 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
356 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
357 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
358 const int dataIndex = baseIndex+m0
359 +m1*myN0
360 +m2*myN0*myN1;
361 double* dest = out.getSampleDataRW(dataIndex);
362 for (index_t q=0; q<dpp; q++) {
363 *dest++ = values[srcIndex];
364 }
365 }
366 }
367 }
368 }
369 }
370 }
371 }
372 #else
373 throw RipleyException("readNcGrid(): not compiled with netCDF support");
374 #endif
375 }
376
377 void Brick::readBinaryGrid(escript::Data& out, string filename,
378 const ReaderParameters& params) const
379 {
380 // the mapping is not universally correct but should work on our
381 // supported platforms
382 switch (params.dataType) {
383 case DATATYPE_INT32:
384 readBinaryGridImpl<int>(out, filename, params);
385 break;
386 case DATATYPE_FLOAT32:
387 readBinaryGridImpl<float>(out, filename, params);
388 break;
389 case DATATYPE_FLOAT64:
390 readBinaryGridImpl<double>(out, filename, params);
391 break;
392 default:
393 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
394 }
395 }
396
397 template<typename ValueType>
398 void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
399 const ReaderParameters& params) const
400 {
401 // check destination function space
402 int myN0, myN1, myN2;
403 if (out.getFunctionSpace().getTypeCode() == Nodes) {
404 myN0 = m_NN[0];
405 myN1 = m_NN[1];
406 myN2 = m_NN[2];
407 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
408 out.getFunctionSpace().getTypeCode() == ReducedElements) {
409 myN0 = m_NE[0];
410 myN1 = m_NE[1];
411 myN2 = m_NE[2];
412 } else
413 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
414
415 if (params.first.size() != 3)
416 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
417
418 if (params.numValues.size() != 3)
419 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
420
421 if (params.multiplier.size() != 3)
422 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
423 for (size_t i=0; i<params.multiplier.size(); i++)
424 if (params.multiplier[i]<1)
425 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
426
427 // check file existence and size
428 ifstream f(filename.c_str(), ifstream::binary);
429 if (f.fail()) {
430 throw RipleyException("readBinaryGrid(): cannot open file");
431 }
432 f.seekg(0, ios::end);
433 const int numComp = out.getDataPointSize();
434 const int filesize = f.tellg();
435 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
436 if (filesize < reqsize) {
437 f.close();
438 throw RipleyException("readBinaryGrid(): not enough data in file");
439 }
440
441 // check if this rank contributes anything
442 if (params.first[0] >= m_offset[0]+myN0 ||
443 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
444 params.first[1] >= m_offset[1]+myN1 ||
445 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
446 params.first[2] >= m_offset[2]+myN2 ||
447 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
448 f.close();
449 return;
450 }
451
452 // now determine how much this rank has to write
453
454 // first coordinates in data object to write to
455 const int first0 = max(0, params.first[0]-m_offset[0]);
456 const int first1 = max(0, params.first[1]-m_offset[1]);
457 const int first2 = max(0, params.first[2]-m_offset[2]);
458 // indices to first value in file
459 const int idx0 = max(0, m_offset[0]-params.first[0]);
460 const int idx1 = max(0, m_offset[1]-params.first[1]);
461 const int idx2 = max(0, m_offset[2]-params.first[2]);
462 // number of values to read
463 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
464 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
465 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
466
467 out.requireWrite();
468 vector<ValueType> values(num0*numComp);
469 const int dpp = out.getNumDataPointsPerSample();
470
471 for (int z=0; z<num2; z++) {
472 for (int y=0; y<num1; y++) {
473 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
474 +(idx2+z)*params.numValues[0]*params.numValues[1]);
475 f.seekg(fileofs*sizeof(ValueType));
476 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
477
478 for (int x=0; x<num0; x++) {
479 const int baseIndex = first0+x*params.multiplier[0]
480 +(first1+y*params.multiplier[1])*myN0
481 +(first2+z*params.multiplier[2])*myN0*myN1;
482 for (int m2=0; m2<params.multiplier[2]; m2++) {
483 for (int m1=0; m1<params.multiplier[1]; m1++) {
484 for (int m0=0; m0<params.multiplier[0]; m0++) {
485 const int dataIndex = baseIndex+m0
486 +m1*myN0
487 +m2*myN0*myN1;
488 double* dest = out.getSampleDataRW(dataIndex);
489 for (int c=0; c<numComp; c++) {
490 ValueType val = values[x*numComp+c];
491
492 if (params.byteOrder != BYTEORDER_NATIVE) {
493 char* cval = reinterpret_cast<char*>(&val);
494 // this will alter val!!
495 byte_swap32(cval);
496 }
497 if (!std::isnan(val)) {
498 for (int q=0; q<dpp; q++) {
499 *dest++ = static_cast<double>(val);
500 }
501 }
502 }
503 }
504 }
505 }
506 }
507 }
508 }
509
510 f.close();
511 }
512
513 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
514 int byteOrder, int dataType) const
515 {
516 // the mapping is not universally correct but should work on our
517 // supported platforms
518 switch (dataType) {
519 case DATATYPE_INT32:
520 writeBinaryGridImpl<int>(in, filename, byteOrder);
521 break;
522 case DATATYPE_FLOAT32:
523 writeBinaryGridImpl<float>(in, filename, byteOrder);
524 break;
525 case DATATYPE_FLOAT64:
526 writeBinaryGridImpl<double>(in, filename, byteOrder);
527 break;
528 default:
529 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
530 }
531 }
532
533 template<typename ValueType>
534 void Brick::writeBinaryGridImpl(const escript::Data& in,
535 const string& filename, int byteOrder) const
536 {
537 // check function space and determine number of points
538 int myN0, myN1, myN2;
539 int totalN0, totalN1, totalN2;
540 if (in.getFunctionSpace().getTypeCode() == Nodes) {
541 myN0 = m_NN[0];
542 myN1 = m_NN[1];
543 myN2 = m_NN[2];
544 totalN0 = m_gNE[0]+1;
545 totalN1 = m_gNE[1]+1;
546 totalN2 = m_gNE[2]+1;
547 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
548 in.getFunctionSpace().getTypeCode() == ReducedElements) {
549 myN0 = m_NE[0];
550 myN1 = m_NE[1];
551 myN2 = m_NE[2];
552 totalN0 = m_gNE[0];
553 totalN1 = m_gNE[1];
554 totalN2 = m_gNE[2];
555 } else
556 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
557
558 const int numComp = in.getDataPointSize();
559 const int dpp = in.getNumDataPointsPerSample();
560 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
561
562 if (numComp > 1 || dpp > 1)
563 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
564
565 // from here on we know that each sample consists of one value
566 FileWriter fw;
567 fw.openFile(filename, fileSize);
568 MPIBarrier();
569
570 for (index_t z=0; z<myN2; z++) {
571 for (index_t y=0; y<myN1; y++) {
572 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
573 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
574 ostringstream oss;
575
576 for (index_t x=0; x<myN0; x++) {
577 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
578 ValueType fvalue = static_cast<ValueType>(*sample);
579 if (byteOrder == BYTEORDER_NATIVE) {
580 oss.write((char*)&fvalue, sizeof(fvalue));
581 } else {
582 char* value = reinterpret_cast<char*>(&fvalue);
583 oss.write(byte_swap32(value), sizeof(fvalue));
584 }
585 }
586 fw.writeAt(oss, fileofs);
587 }
588 }
589 fw.close();
590 }
591
592 void Brick::dump(const string& fileName) const
593 {
594 #if USE_SILO
595 string fn(fileName);
596 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
597 fn+=".silo";
598 }
599
600 int driver=DB_HDF5;
601 string siloPath;
602 DBfile* dbfile = NULL;
603
604 #ifdef ESYS_MPI
605 PMPIO_baton_t* baton = NULL;
606 const int NUM_SILO_FILES = 1;
607 const char* blockDirFmt = "/block%04d";
608 #endif
609
610 if (m_mpiInfo->size > 1) {
611 #ifdef ESYS_MPI
612 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
613 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
614 PMPIO_DefaultClose, (void*)&driver);
615 // try the fallback driver in case of error
616 if (!baton && driver != DB_PDB) {
617 driver = DB_PDB;
618 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
619 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
620 PMPIO_DefaultClose, (void*)&driver);
621 }
622 if (baton) {
623 char str[64];
624 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
625 siloPath = str;
626 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
627 }
628 #endif
629 } else {
630 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
631 getDescription().c_str(), driver);
632 // try the fallback driver in case of error
633 if (!dbfile && driver != DB_PDB) {
634 driver = DB_PDB;
635 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
636 getDescription().c_str(), driver);
637 }
638 }
639
640 if (!dbfile)
641 throw RipleyException("dump: Could not create Silo file");
642
643 /*
644 if (driver==DB_HDF5) {
645 // gzip level 1 already provides good compression with minimal
646 // performance penalty. Some tests showed that gzip levels >3 performed
647 // rather badly on escript data both in terms of time and space
648 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
649 }
650 */
651
652 boost::scoped_ptr<double> x(new double[m_NN[0]]);
653 boost::scoped_ptr<double> y(new double[m_NN[1]]);
654 boost::scoped_ptr<double> z(new double[m_NN[2]]);
655 double* coords[3] = { x.get(), y.get(), z.get() };
656 #pragma omp parallel
657 {
658 #pragma omp for
659 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
660 coords[0][i0]=getLocalCoordinate(i0, 0);
661 }
662 #pragma omp for
663 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
664 coords[1][i1]=getLocalCoordinate(i1, 1);
665 }
666 #pragma omp for
667 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
668 coords[2][i2]=getLocalCoordinate(i2, 2);
669 }
670 }
671 int* dims = const_cast<int*>(getNumNodesPerDim());
672
673 // write mesh
674 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
675 DB_COLLINEAR, NULL);
676
677 // write node ids
678 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
679 NULL, 0, DB_INT, DB_NODECENT, NULL);
680
681 // write element ids
682 dims = const_cast<int*>(getNumElementsPerDim());
683 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
684 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
685
686 // rank 0 writes multimesh and multivar
687 if (m_mpiInfo->rank == 0) {
688 vector<string> tempstrings;
689 vector<char*> names;
690 for (dim_t i=0; i<m_mpiInfo->size; i++) {
691 stringstream path;
692 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
693 tempstrings.push_back(path.str());
694 names.push_back((char*)tempstrings.back().c_str());
695 }
696 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
697 DBSetDir(dbfile, "/");
698 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
699 &types[0], NULL);
700 tempstrings.clear();
701 names.clear();
702 for (dim_t i=0; i<m_mpiInfo->size; i++) {
703 stringstream path;
704 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
705 tempstrings.push_back(path.str());
706 names.push_back((char*)tempstrings.back().c_str());
707 }
708 types.assign(m_mpiInfo->size, DB_QUADVAR);
709 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
710 &types[0], NULL);
711 tempstrings.clear();
712 names.clear();
713 for (dim_t i=0; i<m_mpiInfo->size; i++) {
714 stringstream path;
715 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
716 tempstrings.push_back(path.str());
717 names.push_back((char*)tempstrings.back().c_str());
718 }
719 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
720 &types[0], NULL);
721 }
722
723 if (m_mpiInfo->size > 1) {
724 #ifdef ESYS_MPI
725 PMPIO_HandOffBaton(baton, dbfile);
726 PMPIO_Finish(baton);
727 #endif
728 } else {
729 DBClose(dbfile);
730 }
731
732 #else // USE_SILO
733 throw RipleyException("dump: no Silo support");
734 #endif
735 }
736
737 const int* Brick::borrowSampleReferenceIDs(int fsType) const
738 {
739 switch (fsType) {
740 case Nodes:
741 case ReducedNodes: //FIXME: reduced
742 return &m_nodeId[0];
743 case DegreesOfFreedom:
744 case ReducedDegreesOfFreedom: //FIXME: reduced
745 return &m_dofId[0];
746 case Elements:
747 case ReducedElements:
748 return &m_elementId[0];
749 case FaceElements:
750 case ReducedFaceElements:
751 return &m_faceId[0];
752 case Points:
753 return &m_diracPointNodeIDs[0];
754 default:
755 break;
756 }
757
758 stringstream msg;
759 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
760 throw RipleyException(msg.str());
761 }
762
763 bool Brick::ownSample(int fsType, index_t id) const
764 {
765 if (getMPISize()==1)
766 return true;
767
768 switch (fsType) {
769 case Nodes:
770 case ReducedNodes: //FIXME: reduced
771 return (m_dofMap[id] < getNumDOF());
772 case DegreesOfFreedom:
773 case ReducedDegreesOfFreedom:
774 return true;
775 case Elements:
776 case ReducedElements:
777 {
778 // check ownership of element's _last_ node
779 const index_t x=id%m_NE[0] + 1;
780 const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
781 const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
782 return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
783 }
784 case FaceElements:
785 case ReducedFaceElements:
786 {
787 // check ownership of face element's last node
788 dim_t n=0;
789 for (size_t i=0; i<6; i++) {
790 n+=m_faceCount[i];
791 if (id<n) {
792 const index_t j=id-n+m_faceCount[i];
793 if (i>=4) { // front or back
794 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
795 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
796 } else if (i>=2) { // bottom or top
797 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
798 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
799 } else { // left or right
800 const index_t first=(i==0 ? 0 : m_NN[0]-1);
801 return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
802 }
803 }
804 }
805 return false;
806 }
807 default:
808 break;
809 }
810
811 stringstream msg;
812 msg << "ownSample: invalid function space type " << fsType;
813 throw RipleyException(msg.str());
814 }
815
816 void Brick::setToNormal(escript::Data& out) const
817 {
818 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
819 out.requireWrite();
820 #pragma omp parallel
821 {
822 if (m_faceOffset[0] > -1) {
823 #pragma omp for nowait
824 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
825 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
826 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
827 // set vector at four quadrature points
828 *o++ = -1.; *o++ = 0.; *o++ = 0.;
829 *o++ = -1.; *o++ = 0.; *o++ = 0.;
830 *o++ = -1.; *o++ = 0.; *o++ = 0.;
831 *o++ = -1.; *o++ = 0.; *o = 0.;
832 }
833 }
834 }
835
836 if (m_faceOffset[1] > -1) {
837 #pragma omp for nowait
838 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
839 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
840 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
841 // set vector at four quadrature points
842 *o++ = 1.; *o++ = 0.; *o++ = 0.;
843 *o++ = 1.; *o++ = 0.; *o++ = 0.;
844 *o++ = 1.; *o++ = 0.; *o++ = 0.;
845 *o++ = 1.; *o++ = 0.; *o = 0.;
846 }
847 }
848 }
849
850 if (m_faceOffset[2] > -1) {
851 #pragma omp for nowait
852 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
853 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
854 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
855 // set vector at four quadrature points
856 *o++ = 0.; *o++ = -1.; *o++ = 0.;
857 *o++ = 0.; *o++ = -1.; *o++ = 0.;
858 *o++ = 0.; *o++ = -1.; *o++ = 0.;
859 *o++ = 0.; *o++ = -1.; *o = 0.;
860 }
861 }
862 }
863
864 if (m_faceOffset[3] > -1) {
865 #pragma omp for nowait
866 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
867 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
868 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
869 // set vector at four quadrature points
870 *o++ = 0.; *o++ = 1.; *o++ = 0.;
871 *o++ = 0.; *o++ = 1.; *o++ = 0.;
872 *o++ = 0.; *o++ = 1.; *o++ = 0.;
873 *o++ = 0.; *o++ = 1.; *o = 0.;
874 }
875 }
876 }
877
878 if (m_faceOffset[4] > -1) {
879 #pragma omp for nowait
880 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
881 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
882 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
883 // set vector at four quadrature points
884 *o++ = 0.; *o++ = 0.; *o++ = -1.;
885 *o++ = 0.; *o++ = 0.; *o++ = -1.;
886 *o++ = 0.; *o++ = 0.; *o++ = -1.;
887 *o++ = 0.; *o++ = 0.; *o = -1.;
888 }
889 }
890 }
891
892 if (m_faceOffset[5] > -1) {
893 #pragma omp for nowait
894 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
895 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
896 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
897 // set vector at four quadrature points
898 *o++ = 0.; *o++ = 0.; *o++ = 1.;
899 *o++ = 0.; *o++ = 0.; *o++ = 1.;
900 *o++ = 0.; *o++ = 0.; *o++ = 1.;
901 *o++ = 0.; *o++ = 0.; *o = 1.;
902 }
903 }
904 }
905 } // end of parallel section
906 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
907 out.requireWrite();
908 #pragma omp parallel
909 {
910 if (m_faceOffset[0] > -1) {
911 #pragma omp for nowait
912 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
913 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
914 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
915 *o++ = -1.;
916 *o++ = 0.;
917 *o = 0.;
918 }
919 }
920 }
921
922 if (m_faceOffset[1] > -1) {
923 #pragma omp for nowait
924 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
925 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
926 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
927 *o++ = 1.;
928 *o++ = 0.;
929 *o = 0.;
930 }
931 }
932 }
933
934 if (m_faceOffset[2] > -1) {
935 #pragma omp for nowait
936 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
937 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
938 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
939 *o++ = 0.;
940 *o++ = -1.;
941 *o = 0.;
942 }
943 }
944 }
945
946 if (m_faceOffset[3] > -1) {
947 #pragma omp for nowait
948 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
949 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
950 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
951 *o++ = 0.;
952 *o++ = 1.;
953 *o = 0.;
954 }
955 }
956 }
957
958 if (m_faceOffset[4] > -1) {
959 #pragma omp for nowait
960 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
961 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
962 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
963 *o++ = 0.;
964 *o++ = 0.;
965 *o = -1.;
966 }
967 }
968 }
969
970 if (m_faceOffset[5] > -1) {
971 #pragma omp for nowait
972 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
973 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
974 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
975 *o++ = 0.;
976 *o++ = 0.;
977 *o = 1.;
978 }
979 }
980 }
981 } // end of parallel section
982
983 } else {
984 stringstream msg;
985 msg << "setToNormal: invalid function space type "
986 << out.getFunctionSpace().getTypeCode();
987 throw RipleyException(msg.str());
988 }
989 }
990
991 void Brick::setToSize(escript::Data& out) const
992 {
993 if (out.getFunctionSpace().getTypeCode() == Elements
994 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
995 out.requireWrite();
996 const dim_t numQuad=out.getNumDataPointsPerSample();
997 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
998 #pragma omp parallel for
999 for (index_t k = 0; k < getNumElements(); ++k) {
1000 double* o = out.getSampleDataRW(k);
1001 fill(o, o+numQuad, size);
1002 }
1003 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1004 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1005 out.requireWrite();
1006 const dim_t numQuad=out.getNumDataPointsPerSample();
1007 #pragma omp parallel
1008 {
1009 if (m_faceOffset[0] > -1) {
1010 const double size=min(m_dx[1],m_dx[2]);
1011 #pragma omp for nowait
1012 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1013 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1014 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1015 fill(o, o+numQuad, size);
1016 }
1017 }
1018 }
1019
1020 if (m_faceOffset[1] > -1) {
1021 const double size=min(m_dx[1],m_dx[2]);
1022 #pragma omp for nowait
1023 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1024 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1025 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1026 fill(o, o+numQuad, size);
1027 }
1028 }
1029 }
1030
1031 if (m_faceOffset[2] > -1) {
1032 const double size=min(m_dx[0],m_dx[2]);
1033 #pragma omp for nowait
1034 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1035 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1036 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1037 fill(o, o+numQuad, size);
1038 }
1039 }
1040 }
1041
1042 if (m_faceOffset[3] > -1) {
1043 const double size=min(m_dx[0],m_dx[2]);
1044 #pragma omp for nowait
1045 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1046 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1047 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1048 fill(o, o+numQuad, size);
1049 }
1050 }
1051 }
1052
1053 if (m_faceOffset[4] > -1) {
1054 const double size=min(m_dx[0],m_dx[1]);
1055 #pragma omp for nowait
1056 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1057 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1058 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1059 fill(o, o+numQuad, size);
1060 }
1061 }
1062 }
1063
1064 if (m_faceOffset[5] > -1) {
1065 const double size=min(m_dx[0],m_dx[1]);
1066 #pragma omp for nowait
1067 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1068 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1069 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1070 fill(o, o+numQuad, size);
1071 }
1072 }
1073 }
1074 } // end of parallel section
1075
1076 } else {
1077 stringstream msg;
1078 msg << "setToSize: invalid function space type "
1079 << out.getFunctionSpace().getTypeCode();
1080 throw RipleyException(msg.str());
1081 }
1082 }
1083
1084 void Brick::Print_Mesh_Info(const bool full) const
1085 {
1086 RipleyDomain::Print_Mesh_Info(full);
1087 if (full) {
1088 cout << " Id Coordinates" << endl;
1089 cout.precision(15);
1090 cout.setf(ios::scientific, ios::floatfield);
1091 for (index_t i=0; i < getNumNodes(); i++) {
1092 cout << " " << setw(5) << m_nodeId[i]
1093 << " " << getLocalCoordinate(i%m_NN[0], 0)
1094 << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1095 << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1096 }
1097 }
1098 }
1099
1100
1101 //protected
1102 void Brick::assembleCoordinates(escript::Data& arg) const
1103 {
1104 escriptDataC x = arg.getDataC();
1105 int numDim = m_numDim;
1106 if (!isDataPointShapeEqual(&x, 1, &numDim))
1107 throw RipleyException("setToX: Invalid Data object shape");
1108 if (!numSamplesEqual(&x, 1, getNumNodes()))
1109 throw RipleyException("setToX: Illegal number of samples in Data object");
1110
1111 arg.requireWrite();
1112 #pragma omp parallel for
1113 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1114 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1115 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1116 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1117 point[0] = getLocalCoordinate(i0, 0);
1118 point[1] = getLocalCoordinate(i1, 1);
1119 point[2] = getLocalCoordinate(i2, 2);
1120 }
1121 }
1122 }
1123 }
1124
1125 //protected
1126 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1127 {
1128 const dim_t numComp = in.getDataPointSize();
1129 const double C0 = .044658198738520451079;
1130 const double C1 = .16666666666666666667;
1131 const double C2 = .21132486540518711775;
1132 const double C3 = .25;
1133 const double C4 = .5;
1134 const double C5 = .62200846792814621559;
1135 const double C6 = .78867513459481288225;
1136
1137 if (out.getFunctionSpace().getTypeCode() == Elements) {
1138 out.requireWrite();
1139 #pragma omp parallel
1140 {
1141 vector<double> f_000(numComp);
1142 vector<double> f_001(numComp);
1143 vector<double> f_010(numComp);
1144 vector<double> f_011(numComp);
1145 vector<double> f_100(numComp);
1146 vector<double> f_101(numComp);
1147 vector<double> f_110(numComp);
1148 vector<double> f_111(numComp);
1149 #pragma omp for
1150 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1151 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1152 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1153 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1154 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1155 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1156 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1157 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1158 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1159 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1160 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1161 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1162 for (index_t i=0; i < numComp; ++i) {
1163 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1164 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1165 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1166 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1167 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1168 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1169 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1170 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1171 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1172 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1173 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1174 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1175 o[INDEX3(i,0,0,numComp,3)] = V0;
1176 o[INDEX3(i,1,0,numComp,3)] = V4;
1177 o[INDEX3(i,2,0,numComp,3)] = V8;
1178 o[INDEX3(i,0,1,numComp,3)] = V0;
1179 o[INDEX3(i,1,1,numComp,3)] = V5;
1180 o[INDEX3(i,2,1,numComp,3)] = V9;
1181 o[INDEX3(i,0,2,numComp,3)] = V1;
1182 o[INDEX3(i,1,2,numComp,3)] = V4;
1183 o[INDEX3(i,2,2,numComp,3)] = V10;
1184 o[INDEX3(i,0,3,numComp,3)] = V1;
1185 o[INDEX3(i,1,3,numComp,3)] = V5;
1186 o[INDEX3(i,2,3,numComp,3)] = V11;
1187 o[INDEX3(i,0,4,numComp,3)] = V2;
1188 o[INDEX3(i,1,4,numComp,3)] = V6;
1189 o[INDEX3(i,2,4,numComp,3)] = V8;
1190 o[INDEX3(i,0,5,numComp,3)] = V2;
1191 o[INDEX3(i,1,5,numComp,3)] = V7;
1192 o[INDEX3(i,2,5,numComp,3)] = V9;
1193 o[INDEX3(i,0,6,numComp,3)] = V3;
1194 o[INDEX3(i,1,6,numComp,3)] = V6;
1195 o[INDEX3(i,2,6,numComp,3)] = V10;
1196 o[INDEX3(i,0,7,numComp,3)] = V3;
1197 o[INDEX3(i,1,7,numComp,3)] = V7;
1198 o[INDEX3(i,2,7,numComp,3)] = V11;
1199 } // end of component loop i
1200 } // end of k0 loop
1201 } // end of k1 loop
1202 } // end of k2 loop
1203 } // end of parallel section
1204 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1205 out.requireWrite();
1206 #pragma omp parallel
1207 {
1208 vector<double> f_000(numComp);
1209 vector<double> f_001(numComp);
1210 vector<double> f_010(numComp);
1211 vector<double> f_011(numComp);
1212 vector<double> f_100(numComp);
1213 vector<double> f_101(numComp);
1214 vector<double> f_110(numComp);
1215 vector<double> f_111(numComp);
1216 #pragma omp for
1217 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1218 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1219 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1220 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1221 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1222 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1223 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1224 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1225 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1226 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1227 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1228 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1229 for (index_t i=0; i < numComp; ++i) {
1230 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1231 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1232 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1233 } // end of component loop i
1234 } // end of k0 loop
1235 } // end of k1 loop
1236 } // end of k2 loop
1237 } // end of parallel section
1238 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1239 out.requireWrite();
1240 #pragma omp parallel
1241 {
1242 vector<double> f_000(numComp);
1243 vector<double> f_001(numComp);
1244 vector<double> f_010(numComp);
1245 vector<double> f_011(numComp);
1246 vector<double> f_100(numComp);
1247 vector<double> f_101(numComp);
1248 vector<double> f_110(numComp);
1249 vector<double> f_111(numComp);
1250 if (m_faceOffset[0] > -1) {
1251 #pragma omp for nowait
1252 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1253 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1254 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1255 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1256 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1257 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1258 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1259 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1260 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1261 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1262 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1263 for (index_t i=0; i < numComp; ++i) {
1264 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1265 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1266 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1267 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1268 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1269 o[INDEX3(i,1,0,numComp,3)] = V0;
1270 o[INDEX3(i,2,0,numComp,3)] = V2;
1271 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1272 o[INDEX3(i,1,1,numComp,3)] = V0;
1273 o[INDEX3(i,2,1,numComp,3)] = V3;
1274 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1275 o[INDEX3(i,1,2,numComp,3)] = V1;
1276 o[INDEX3(i,2,2,numComp,3)] = V2;
1277 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1278 o[INDEX3(i,1,3,numComp,3)] = V1;
1279 o[INDEX3(i,2,3,numComp,3)] = V3;
1280 } // end of component loop i
1281 } // end of k1 loop
1282 } // end of k2 loop
1283 } // end of face 0
1284 if (m_faceOffset[1] > -1) {
1285 #pragma omp for nowait
1286 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1287 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1288 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1289 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1290 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1291 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1292 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1293 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1294 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1295 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1296 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1297 for (index_t i=0; i < numComp; ++i) {
1298 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1299 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1300 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1301 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1302 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1303 o[INDEX3(i,1,0,numComp,3)] = V0;
1304 o[INDEX3(i,2,0,numComp,3)] = V2;
1305 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1306 o[INDEX3(i,1,1,numComp,3)] = V0;
1307 o[INDEX3(i,2,1,numComp,3)] = V3;
1308 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1309 o[INDEX3(i,1,2,numComp,3)] = V1;
1310 o[INDEX3(i,2,2,numComp,3)] = V2;
1311 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1312 o[INDEX3(i,1,3,numComp,3)] = V1;
1313 o[INDEX3(i,2,3,numComp,3)] = V3;
1314 } // end of component loop i
1315 } // end of k1 loop
1316 } // end of k2 loop
1317 } // end of face 1
1318 if (m_faceOffset[2] > -1) {
1319 #pragma omp for nowait
1320 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1321 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1322 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1323 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1324 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1325 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1326 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1327 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1328 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1329 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1330 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1331 for (index_t i=0; i < numComp; ++i) {
1332 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1333 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1334 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1335 o[INDEX3(i,0,0,numComp,3)] = V0;
1336 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1337 o[INDEX3(i,2,0,numComp,3)] = V1;
1338 o[INDEX3(i,0,1,numComp,3)] = V0;
1339 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1340 o[INDEX3(i,2,1,numComp,3)] = V2;
1341 o[INDEX3(i,0,2,numComp,3)] = V0;
1342 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1343 o[INDEX3(i,2,2,numComp,3)] = V1;
1344 o[INDEX3(i,0,3,numComp,3)] = V0;
1345 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1346 o[INDEX3(i,2,3,numComp,3)] = V2;
1347 } // end of component loop i
1348 } // end of k0 loop
1349 } // end of k2 loop
1350 } // end of face 2
1351 if (m_faceOffset[3] > -1) {
1352 #pragma omp for nowait
1353 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1354 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1355 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1356 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1357 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1358 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1359 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1360 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1361 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1362 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1363 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1364 for (index_t i=0; i < numComp; ++i) {
1365 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1366 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1367 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1368 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1369 o[INDEX3(i,0,0,numComp,3)] = V0;
1370 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1371 o[INDEX3(i,2,0,numComp,3)] = V2;
1372 o[INDEX3(i,0,1,numComp,3)] = V0;
1373 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1374 o[INDEX3(i,2,1,numComp,3)] = V3;
1375 o[INDEX3(i,0,2,numComp,3)] = V1;
1376 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1377 o[INDEX3(i,2,2,numComp,3)] = V2;
1378 o[INDEX3(i,0,3,numComp,3)] = V1;
1379 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1380 o[INDEX3(i,2,3,numComp,3)] = V3;
1381 } // end of component loop i
1382 } // end of k0 loop
1383 } // end of k2 loop
1384 } // end of face 3
1385 if (m_faceOffset[4] > -1) {
1386 #pragma omp for nowait
1387 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1388 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1389 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1390 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1391 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1392 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1393 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1394 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1395 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1396 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1397 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1398 for (index_t i=0; i < numComp; ++i) {
1399 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1400 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1401 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1402 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1403 o[INDEX3(i,0,0,numComp,3)] = V0;
1404 o[INDEX3(i,1,0,numComp,3)] = V2;
1405 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1406 o[INDEX3(i,0,1,numComp,3)] = V0;
1407 o[INDEX3(i,1,1,numComp,3)] = V3;
1408 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1409 o[INDEX3(i,0,2,numComp,3)] = V1;
1410 o[INDEX3(i,1,2,numComp,3)] = V2;
1411 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1412 o[INDEX3(i,0,3,numComp,3)] = V1;
1413 o[INDEX3(i,1,3,numComp,3)] = V3;
1414 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1415 } // end of component loop i
1416 } // end of k0 loop
1417 } // end of k1 loop
1418 } // end of face 4
1419 if (m_faceOffset[5] > -1) {
1420 #pragma omp for nowait
1421 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1422 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1423 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1424 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1425 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1426 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1427 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1428 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1429 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1430 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1431 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1432 for (index_t i=0; i < numComp; ++i) {
1433 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1434 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1435 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1436 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1437 o[INDEX3(i,0,0,numComp,3)] = V0;
1438 o[INDEX3(i,1,0,numComp,3)] = V2;
1439 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1440 o[INDEX3(i,0,1,numComp,3)] = V0;
1441 o[INDEX3(i,1,1,numComp,3)] = V3;
1442 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1443 o[INDEX3(i,0,2,numComp,3)] = V1;
1444 o[INDEX3(i,1,2,numComp,3)] = V2;
1445 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1446 o[INDEX3(i,0,3,numComp,3)] = V1;
1447 o[INDEX3(i,1,3,numComp,3)] = V3;
1448 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1449 } // end of component loop i
1450 } // end of k0 loop
1451 } // end of k1 loop
1452 } // end of face 5
1453 } // end of parallel section
1454 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1455 out.requireWrite();
1456 #pragma omp parallel
1457 {
1458 vector<double> f_000(numComp);
1459 vector<double> f_001(numComp);
1460 vector<double> f_010(numComp);
1461 vector<double> f_011(numComp);
1462 vector<double> f_100(numComp);
1463 vector<double> f_101(numComp);
1464 vector<double> f_110(numComp);
1465 vector<double> f_111(numComp);
1466 if (m_faceOffset[0] > -1) {
1467 #pragma omp for nowait
1468 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1469 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1470 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1471 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1472 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1473 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1474 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1475 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1476 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1477 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1478 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1479 for (index_t i=0; i < numComp; ++i) {
1480 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1481 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1482 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1483 } // end of component loop i
1484 } // end of k1 loop
1485 } // end of k2 loop
1486 } // end of face 0
1487 if (m_faceOffset[1] > -1) {
1488 #pragma omp for nowait
1489 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1490 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1491 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1492 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1493 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1494 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1495 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1496 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1497 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1498 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1499 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1500 for (index_t i=0; i < numComp; ++i) {
1501 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1502 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1503 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1504 } // end of component loop i
1505 } // end of k1 loop
1506 } // end of k2 loop
1507 } // end of face 1
1508 if (m_faceOffset[2] > -1) {
1509 #pragma omp for nowait
1510 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1511 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1512 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1513 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1514 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1515 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1516 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1517 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1518 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1519 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1520 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1521 for (index_t i=0; i < numComp; ++i) {
1522 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / m_dx[0];
1523 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1524 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / m_dx[2];
1525 } // end of component loop i
1526 } // end of k0 loop
1527 } // end of k2 loop
1528 } // end of face 2
1529 if (m_faceOffset[3] > -1) {
1530 #pragma omp for nowait
1531 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1532 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1533 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1534 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1535 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1536 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1537 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1538 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1539 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1540 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1541 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1542 for (index_t i=0; i < numComp; ++i) {
1543 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / m_dx[0];
1544 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1545 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / m_dx[2];
1546 } // end of component loop i
1547 } // end of k0 loop
1548 } // end of k2 loop
1549 } // end of face 3
1550 if (m_faceOffset[4] > -1) {
1551 #pragma omp for nowait
1552 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1553 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1554 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1555 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1556 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1557 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1558 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1559 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1560 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1561 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1562 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1563 for (index_t i=0; i < numComp; ++i) {
1564 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / m_dx[0];
1565 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / m_dx[1];
1566 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1567 } // end of component loop i
1568 } // end of k0 loop
1569 } // end of k1 loop
1570 } // end of face 4
1571 if (m_faceOffset[5] > -1) {
1572 #pragma omp for nowait
1573 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1574 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1575 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1576 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1577 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1578 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1579 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1580 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1581 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1582 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1583 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1584 for (index_t i=0; i < numComp; ++i) {
1585 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / m_dx[0];
1586 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / m_dx[1];
1587 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1588 } // end of component loop i
1589 } // end of k0 loop
1590 } // end of k1 loop
1591 } // end of face 5
1592 } // end of parallel section
1593 }
1594 }
1595
1596 //protected
1597 void Brick::assembleIntegrate(vector<double>& integrals, const escript::Data& arg) const
1598 {
1599 const dim_t numComp = arg.getDataPointSize();
1600 const index_t left = (m_offset[0]==0 ? 0 : 1);
1601 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1602 const index_t front = (m_offset[2]==0 ? 0 : 1);
1603 const int fs = arg.getFunctionSpace().getTypeCode();
1604 if (fs == Elements && arg.actsExpanded()) {
1605 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2]/8.;
1606 #pragma omp parallel
1607 {
1608 vector<double> int_local(numComp, 0);
1609 #pragma omp for nowait
1610 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1611 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1612 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1613 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1614 for (index_t i=0; i < numComp; ++i) {
1615 const double f_0 = f[INDEX2(i,0,numComp)];
1616 const double f_1 = f[INDEX2(i,1,numComp)];
1617 const double f_2 = f[INDEX2(i,2,numComp)];
1618 const double f_3 = f[INDEX2(i,3,numComp)];
1619 const double f_4 = f[INDEX2(i,4,numComp)];
1620 const double f_5 = f[INDEX2(i,5,numComp)];
1621 const double f_6 = f[INDEX2(i,6,numComp)];
1622 const double f_7 = f[INDEX2(i,7,numComp)];
1623 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
1624 } // end of component loop i
1625 } // end of k0 loop
1626 } // end of k1 loop
1627 } // end of k2 loop
1628
1629 #pragma omp critical
1630 for (index_t i=0; i<numComp; i++)
1631 integrals[i]+=int_local[i];
1632 } // end of parallel section
1633
1634 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1635 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2];
1636 #pragma omp parallel
1637 {
1638 vector<double> int_local(numComp, 0);
1639 #pragma omp for nowait
1640 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1641 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1642 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1643 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1644 for (index_t i=0; i < numComp; ++i) {
1645 int_local[i]+=f[i]*w_0;
1646 } // end of component loop i
1647 } // end of k0 loop
1648 } // end of k1 loop
1649 } // end of k2 loop
1650
1651 #pragma omp critical
1652 for (index_t i=0; i<numComp; i++)
1653 integrals[i]+=int_local[i];
1654 } // end of parallel section
1655
1656 } else if (fs == FaceElements && arg.actsExpanded()) {
1657 const double w_0 = m_dx[1]*m_dx[2]/4.;
1658 const double w_1 = m_dx[0]*m_dx[2]/4.;
1659 const double w_2 = m_dx[0]*m_dx[1]/4.;
1660 #pragma omp parallel
1661 {
1662 vector<double> int_local(numComp, 0);
1663 if (m_faceOffset[0] > -1) {
1664 #pragma omp for nowait
1665 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1666 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1667 const double* f = arg.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1668 for (index_t i=0; i < numComp; ++i) {
1669 const double f_0 = f[INDEX2(i,0,numComp)];
1670 const double f_1 = f[INDEX2(i,1,numComp)];
1671 const double f_2 = f[INDEX2(i,2,numComp)];
1672 const double f_3 = f[INDEX2(i,3,numComp)];
1673 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
1674 } // end of component loop i
1675 } // end of k1 loop
1676 } // end of k2 loop
1677 }
1678
1679 if (m_faceOffset[1] > -1) {
1680 #pragma omp for nowait
1681 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1682 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1683 const double* f = arg.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1684 for (index_t i=0; i < numComp; ++i) {
1685 const double f_0 = f[INDEX2(i,0,numComp)];
1686 const double f_1 = f[INDEX2(i,1,numComp)];
1687 const double f_2 = f[INDEX2(i,2,numComp)];
1688 const double f_3 = f[INDEX2(i,3,numComp)];
1689 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
1690 } // end of component loop i
1691 } // end of k1 loop
1692 } // end of k2 loop
1693 }
1694
1695 if (m_faceOffset[2] > -1) {
1696 #pragma omp for nowait
1697 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1698 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1699 const double* f = arg.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1700 for (index_t i=0; i < numComp; ++i) {
1701 const double f_0 = f[INDEX2(i,0,numComp)];
1702 const double f_1 = f[INDEX2(i,1,numComp)];
1703 const double f_2 = f[INDEX2(i,2,numComp)];
1704 const double f_3 = f[INDEX2(i,3,numComp)];
1705 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
1706 } // end of component loop i
1707 } // end of k1 loop
1708 } // end of k2 loop
1709 }
1710
1711 if (m_faceOffset[3] > -1) {
1712 #pragma omp for nowait
1713 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1714 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1715 const double* f = arg.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1716 for (index_t i=0; i < numComp; ++i) {
1717 const double f_0 = f[INDEX2(i,0,numComp)];
1718 const double f_1 = f[INDEX2(i,1,numComp)];
1719 const double f_2 = f[INDEX2(i,2,numComp)];
1720 const double f_3 = f[INDEX2(i,3,numComp)];
1721 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
1722 } // end of component loop i
1723 } // end of k1 loop
1724 } // end of k2 loop
1725 }
1726
1727 if (m_faceOffset[4] > -1) {
1728 #pragma omp for nowait
1729 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1730 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1731 const double* f = arg.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1732 for (index_t i=0; i < numComp; ++i) {
1733 const double f_0 = f[INDEX2(i,0,numComp)];
1734 const double f_1 = f[INDEX2(i,1,numComp)];
1735 const double f_2 = f[INDEX2(i,2,numComp)];
1736 const double f_3 = f[INDEX2(i,3,numComp)];
1737 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
1738 } // end of component loop i
1739 } // end of k1 loop
1740 } // end of k2 loop
1741 }
1742
1743 if (m_faceOffset[5] > -1) {
1744 #pragma omp for nowait
1745 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1746 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1747 const double* f = arg.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1748 for (index_t i=0; i < numComp; ++i) {
1749 const double f_0 = f[INDEX2(i,0,numComp)];
1750 const double f_1 = f[INDEX2(i,1,numComp)];
1751 const double f_2 = f[INDEX2(i,2,numComp)];
1752 const double f_3 = f[INDEX2(i,3,numComp)];
1753 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
1754 } // end of component loop i
1755 } // end of k1 loop
1756 } // end of k2 loop
1757 }
1758
1759 #pragma omp critical
1760 for (index_t i=0; i<numComp; i++)
1761 integrals[i]+=int_local[i];
1762 } // end of parallel section
1763
1764 } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded())) {
1765 const double w_0 = m_dx[1]*m_dx[2];
1766 const double w_1 = m_dx[0]*m_dx[2];
1767 const double w_2 = m_dx[0]*m_dx[1];
1768 #pragma omp parallel
1769 {
1770 vector<double> int_local(numComp, 0);
1771 if (m_faceOffset[0] > -1) {
1772 #pragma omp for nowait
1773 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1774 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1775 const double* f = arg.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1776 for (index_t i=0; i < numComp; ++i) {
1777 int_local[i]+=f[i]*w_0;
1778 } // end of component loop i
1779 } // end of k1 loop
1780 } // end of k2 loop
1781 }
1782
1783 if (m_faceOffset[1] > -1) {
1784 #pragma omp for nowait
1785 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1786 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1787 const double* f = arg.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1788 for (index_t i=0; i < numComp; ++i) {
1789 int_local[i]+=f[i]*w_0;
1790 } // end of component loop i
1791 } // end of k1 loop
1792 } // end of k2 loop
1793 }
1794
1795 if (m_faceOffset[2] > -1) {
1796 #pragma omp for nowait
1797 for (index_t k2 = front; k2 < front+m_ownNE