/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4751 - (show annotations)
Fri Mar 14 06:49:06 2014 UTC (5 years ago) by sshaw
Original Path: trunk/ripley/src/Brick.cpp
File size: 165562 byte(s)
completed LameAssemblers
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <ripley/Brick.h>
18 #include <paso/SystemMatrix.h>
19 #include <esysUtils/esysFileWriter.h>
20 #include <ripley/DefaultAssembler3D.h>
21 #include <ripley/WaveAssembler3D.h>
22 #include <ripley/LameAssembler3D.h>
23 #include <ripley/domainhelpers.h>
24 #include <boost/scoped_array.hpp>
25
26 #ifdef USE_NETCDF
27 #include <netcdfcpp.h>
28 #endif
29
30 #if USE_SILO
31 #include <silo.h>
32 #ifdef ESYS_MPI
33 #include <pmpio.h>
34 #endif
35 #endif
36
37 #include <iomanip>
38
39 #include "esysUtils/EsysRandom.h"
40 #include "blocktools.h"
41
42
43 using namespace std;
44 using esysUtils::FileWriter;
45
46 namespace ripley {
47
48 int indexOfMax(int a, int b, int c) {
49 if (a > b) {
50 if (c > a) {
51 return 2;
52 }
53 return 0;
54 } else if (b > c) {
55 return 1;
56 }
57 return 2;
58 }
59
60 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
61 double x1, double y1, double z1, int d0, int d1, int d2,
62 const std::vector<double>& points, const std::vector<int>& tags,
63 const simap_t& tagnamestonums) :
64 RipleyDomain(3)
65 {
66 // ignore subdivision parameters for serial run
67 if (m_mpiInfo->size == 1) {
68 d0=1;
69 d1=1;
70 d2=1;
71 }
72 bool warn=false;
73
74 std::vector<int> factors;
75 int ranks = m_mpiInfo->size;
76 int epr[3] = {n0,n1,n2};
77 int d[3] = {d0,d1,d2};
78 if (d0<=0 || d1<=0 || d2<=0) {
79 for (int i = 0; i < 3; i++) {
80 if (d[i] < 1) {
81 d[i] = 1;
82 continue;
83 }
84 epr[i] = -1; // can no longer be max
85 if (ranks % d[i] != 0) {
86 throw RipleyException("Invalid number of spatial subdivisions");
87 }
88 //remove
89 ranks /= d[i];
90 }
91 factorise(factors, ranks);
92 if (factors.size() != 0) {
93 warn = true;
94 }
95 }
96 while (factors.size() > 0) {
97 int i = indexOfMax(epr[0],epr[1],epr[2]);
98 int f = factors.back();
99 factors.pop_back();
100 d[i] *= f;
101 epr[i] /= f;
102 }
103 d0 = d[0]; d1 = d[1]; d2 = d[2];
104
105 // ensure number of subdivisions is valid and nodes can be distributed
106 // among number of ranks
107 if (d0*d1*d2 != m_mpiInfo->size){
108 throw RipleyException("Invalid number of spatial subdivisions");
109 }
110 if (warn) {
111 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
112 << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
113 }
114
115 double l0 = x1-x0;
116 double l1 = y1-y0;
117 double l2 = z1-z0;
118 m_dx[0] = l0/n0;
119 m_dx[1] = l1/n1;
120 m_dx[2] = l2/n2;
121
122 if ((n0+1)%d0 > 0) {
123 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
124 l0=m_dx[0]*n0;
125 cout << "Warning: Adjusted number of elements and length. N0="
126 << n0 << ", l0=" << l0 << endl;
127 }
128 if ((n1+1)%d1 > 0) {
129 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
130 l1=m_dx[1]*n1;
131 cout << "Warning: Adjusted number of elements and length. N1="
132 << n1 << ", l1=" << l1 << endl;
133 }
134 if ((n2+1)%d2 > 0) {
135 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
136 l2=m_dx[2]*n2;
137 cout << "Warning: Adjusted number of elements and length. N2="
138 << n2 << ", l2=" << l2 << endl;
139 }
140
141 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
142 throw RipleyException("Too few elements for the number of ranks");
143
144 m_gNE[0] = n0;
145 m_gNE[1] = n1;
146 m_gNE[2] = n2;
147 m_origin[0] = x0;
148 m_origin[1] = y0;
149 m_origin[2] = z0;
150 m_length[0] = l0;
151 m_length[1] = l1;
152 m_length[2] = l2;
153 m_NX[0] = d0;
154 m_NX[1] = d1;
155 m_NX[2] = d2;
156
157 // local number of elements (including overlap)
158 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
159 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
160 m_NE[0]++;
161 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
162 m_ownNE[0]--;
163
164 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
165 if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
166 m_NE[1]++;
167 else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
168 m_ownNE[1]--;
169
170 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
171 if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
172 m_NE[2]++;
173 else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
174 m_ownNE[2]--;
175
176 // local number of nodes
177 m_NN[0] = m_NE[0]+1;
178 m_NN[1] = m_NE[1]+1;
179 m_NN[2] = m_NE[2]+1;
180
181 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
182 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
183 if (m_offset[0] > 0)
184 m_offset[0]--;
185 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
186 if (m_offset[1] > 0)
187 m_offset[1]--;
188 m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
189 if (m_offset[2] > 0)
190 m_offset[2]--;
191
192 populateSampleIds();
193 createPattern();
194
195 assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
196 for (map<string, int>::const_iterator i = tagnamestonums.begin();
197 i != tagnamestonums.end(); i++) {
198 setTagMap(i->first, i->second);
199 }
200 addPoints(tags.size(), &points[0], &tags[0]);
201 }
202
203
204 Brick::~Brick()
205 {
206 Paso_SystemMatrixPattern_free(m_pattern);
207 Paso_Connector_free(m_connector);
208 delete assembler;
209 }
210
211 string Brick::getDescription() const
212 {
213 return "ripley::Brick";
214 }
215
216 bool Brick::operator==(const AbstractDomain& other) const
217 {
218 const Brick* o=dynamic_cast<const Brick*>(&other);
219 if (o) {
220 return (RipleyDomain::operator==(other) &&
221 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
222 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
223 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
224 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
225 }
226
227 return false;
228 }
229
230 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
231 const ReaderParameters& params) const
232 {
233 #ifdef USE_NETCDF
234 // check destination function space
235 int myN0, myN1, myN2;
236 if (out.getFunctionSpace().getTypeCode() == Nodes) {
237 myN0 = m_NN[0];
238 myN1 = m_NN[1];
239 myN2 = m_NN[2];
240 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
241 out.getFunctionSpace().getTypeCode() == ReducedElements) {
242 myN0 = m_NE[0];
243 myN1 = m_NE[1];
244 myN2 = m_NE[2];
245 } else
246 throw RipleyException("readNcGrid(): invalid function space for output data object");
247
248 if (params.first.size() != 3)
249 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
250
251 if (params.numValues.size() != 3)
252 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
253
254 if (params.multiplier.size() != 3)
255 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
256 for (size_t i=0; i<params.multiplier.size(); i++)
257 if (params.multiplier[i]<1)
258 throw RipleyException("readNcGrid(): all multipliers must be positive");
259
260 // check file existence and size
261 NcFile f(filename.c_str(), NcFile::ReadOnly);
262 if (!f.is_valid())
263 throw RipleyException("readNcGrid(): cannot open file");
264
265 NcVar* var = f.get_var(varname.c_str());
266 if (!var)
267 throw RipleyException("readNcGrid(): invalid variable name");
268
269 // TODO: rank>0 data support
270 const int numComp = out.getDataPointSize();
271 if (numComp > 1)
272 throw RipleyException("readNcGrid(): only scalar data supported");
273
274 const int dims = var->num_dims();
275 boost::scoped_array<long> edges(var->edges());
276
277 // is this a slice of the data object (dims!=3)?
278 // note the expected ordering of edges (as in numpy: z,y,x)
279 if ( (dims==3 && (params.numValues[2] > edges[0] ||
280 params.numValues[1] > edges[1] ||
281 params.numValues[0] > edges[2]))
282 || (dims==2 && params.numValues[2]>1)
283 || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
284 throw RipleyException("readNcGrid(): not enough data in file");
285 }
286
287 // check if this rank contributes anything
288 if (params.first[0] >= m_offset[0]+myN0 ||
289 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
290 params.first[1] >= m_offset[1]+myN1 ||
291 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
292 params.first[2] >= m_offset[2]+myN2 ||
293 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
294 return;
295 }
296
297 // now determine how much this rank has to write
298
299 // first coordinates in data object to write to
300 const int first0 = max(0, params.first[0]-m_offset[0]);
301 const int first1 = max(0, params.first[1]-m_offset[1]);
302 const int first2 = max(0, params.first[2]-m_offset[2]);
303 // indices to first value in file (not accounting for reverse yet)
304 int idx0 = max(0, m_offset[0]-params.first[0]);
305 int idx1 = max(0, m_offset[1]-params.first[1]);
306 int idx2 = max(0, m_offset[2]-params.first[2]);
307 // number of values to read
308 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
309 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
310 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
311
312 // make sure we read the right block if going backwards through file
313 if (params.reverse[0])
314 idx0 = edges[dims-1]-num0-idx0;
315 if (dims>1 && params.reverse[1])
316 idx1 = edges[dims-2]-num1-idx1;
317 if (dims>2 && params.reverse[2])
318 idx2 = edges[dims-3]-num2-idx2;
319
320
321 vector<double> values(num0*num1*num2);
322 if (dims==3) {
323 var->set_cur(idx2, idx1, idx0);
324 var->get(&values[0], num2, num1, num0);
325 } else if (dims==2) {
326 var->set_cur(idx1, idx0);
327 var->get(&values[0], num1, num0);
328 } else {
329 var->set_cur(idx0);
330 var->get(&values[0], num0);
331 }
332
333 const int dpp = out.getNumDataPointsPerSample();
334 out.requireWrite();
335
336 // helpers for reversing
337 const int x0 = (params.reverse[0] ? num0-1 : 0);
338 const int x_mult = (params.reverse[0] ? -1 : 1);
339 const int y0 = (params.reverse[1] ? num1-1 : 0);
340 const int y_mult = (params.reverse[1] ? -1 : 1);
341 const int z0 = (params.reverse[2] ? num2-1 : 0);
342 const int z_mult = (params.reverse[2] ? -1 : 1);
343
344 for (index_t z=0; z<num2; z++) {
345 for (index_t y=0; y<num1; y++) {
346 #pragma omp parallel for
347 for (index_t x=0; x<num0; x++) {
348 const int baseIndex = first0+x*params.multiplier[0]
349 +(first1+y*params.multiplier[1])*myN0
350 +(first2+z*params.multiplier[2])*myN0*myN1;
351 const int srcIndex=(z0+z_mult*z)*num1*num0
352 +(y0+y_mult*y)*num0
353 +(x0+x_mult*x);
354 if (!isnan(values[srcIndex])) {
355 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
356 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
357 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
358 const int dataIndex = baseIndex+m0
359 +m1*myN0
360 +m2*myN0*myN1;
361 double* dest = out.getSampleDataRW(dataIndex);
362 for (index_t q=0; q<dpp; q++) {
363 *dest++ = values[srcIndex];
364 }
365 }
366 }
367 }
368 }
369 }
370 }
371 }
372 #else
373 throw RipleyException("readNcGrid(): not compiled with netCDF support");
374 #endif
375 }
376
377 #ifdef USE_BOOSTIO
378 void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
379 const ReaderParameters& params) const
380 {
381 // the mapping is not universally correct but should work on our
382 // supported platforms
383 switch (params.dataType) {
384 case DATATYPE_INT32:
385 readBinaryGridZippedImpl<int>(out, filename, params);
386 break;
387 case DATATYPE_FLOAT32:
388 readBinaryGridZippedImpl<float>(out, filename, params);
389 break;
390 case DATATYPE_FLOAT64:
391 readBinaryGridZippedImpl<double>(out, filename, params);
392 break;
393 default:
394 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
395 }
396 }
397 #endif
398
399 void Brick::readBinaryGrid(escript::Data& out, string filename,
400 const ReaderParameters& params) const
401 {
402 // the mapping is not universally correct but should work on our
403 // supported platforms
404 switch (params.dataType) {
405 case DATATYPE_INT32:
406 readBinaryGridImpl<int>(out, filename, params);
407 break;
408 case DATATYPE_FLOAT32:
409 readBinaryGridImpl<float>(out, filename, params);
410 break;
411 case DATATYPE_FLOAT64:
412 readBinaryGridImpl<double>(out, filename, params);
413 break;
414 default:
415 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
416 }
417 }
418
419 template<typename ValueType>
420 void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
421 const ReaderParameters& params) const
422 {
423 // check destination function space
424 int myN0, myN1, myN2;
425 if (out.getFunctionSpace().getTypeCode() == Nodes) {
426 myN0 = m_NN[0];
427 myN1 = m_NN[1];
428 myN2 = m_NN[2];
429 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
430 out.getFunctionSpace().getTypeCode() == ReducedElements) {
431 myN0 = m_NE[0];
432 myN1 = m_NE[1];
433 myN2 = m_NE[2];
434 } else
435 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
436
437 if (params.first.size() != 3)
438 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
439
440 if (params.numValues.size() != 3)
441 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
442
443 if (params.multiplier.size() != 3)
444 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
445 for (size_t i=0; i<params.multiplier.size(); i++)
446 if (params.multiplier[i]<1)
447 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
448
449 // check file existence and size
450 ifstream f(filename.c_str(), ifstream::binary);
451 if (f.fail()) {
452 throw RipleyException("readBinaryGrid(): cannot open file");
453 }
454 f.seekg(0, ios::end);
455 const int numComp = out.getDataPointSize();
456 const int filesize = f.tellg();
457 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
458 if (filesize < reqsize) {
459 f.close();
460 throw RipleyException("readBinaryGrid(): not enough data in file");
461 }
462
463 // check if this rank contributes anything
464 if (params.first[0] >= m_offset[0]+myN0 ||
465 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
466 params.first[1] >= m_offset[1]+myN1 ||
467 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
468 params.first[2] >= m_offset[2]+myN2 ||
469 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
470 f.close();
471 return;
472 }
473
474 // now determine how much this rank has to write
475
476 // first coordinates in data object to write to
477 const int first0 = max(0, params.first[0]-m_offset[0]);
478 const int first1 = max(0, params.first[1]-m_offset[1]);
479 const int first2 = max(0, params.first[2]-m_offset[2]);
480 // indices to first value in file
481 const int idx0 = max(0, m_offset[0]-params.first[0]);
482 const int idx1 = max(0, m_offset[1]-params.first[1]);
483 const int idx2 = max(0, m_offset[2]-params.first[2]);
484 // number of values to read
485 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
486 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
487 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
488
489 out.requireWrite();
490 vector<ValueType> values(num0*numComp);
491 const int dpp = out.getNumDataPointsPerSample();
492
493 for (int z=0; z<num2; z++) {
494 for (int y=0; y<num1; y++) {
495 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
496 +(idx2+z)*params.numValues[0]*params.numValues[1]);
497 f.seekg(fileofs*sizeof(ValueType));
498 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
499
500 for (int x=0; x<num0; x++) {
501 const int baseIndex = first0+x*params.multiplier[0]
502 +(first1+y*params.multiplier[1])*myN0
503 +(first2+z*params.multiplier[2])*myN0*myN1;
504 for (int m2=0; m2<params.multiplier[2]; m2++) {
505 for (int m1=0; m1<params.multiplier[1]; m1++) {
506 for (int m0=0; m0<params.multiplier[0]; m0++) {
507 const int dataIndex = baseIndex+m0
508 +m1*myN0
509 +m2*myN0*myN1;
510 double* dest = out.getSampleDataRW(dataIndex);
511 for (int c=0; c<numComp; c++) {
512 ValueType val = values[x*numComp+c];
513
514 if (params.byteOrder != BYTEORDER_NATIVE) {
515 char* cval = reinterpret_cast<char*>(&val);
516 // this will alter val!!
517 byte_swap32(cval);
518 }
519 if (!std::isnan(val)) {
520 for (int q=0; q<dpp; q++) {
521 *dest++ = static_cast<double>(val);
522 }
523 }
524 }
525 }
526 }
527 }
528 }
529 }
530 }
531
532 f.close();
533 }
534
535 #ifdef USE_BOOSTIO
536 template<typename ValueType>
537 void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
538 const ReaderParameters& params) const
539 {
540 // check destination function space
541 int myN0, myN1, myN2;
542 if (out.getFunctionSpace().getTypeCode() == Nodes) {
543 myN0 = m_NN[0];
544 myN1 = m_NN[1];
545 myN2 = m_NN[2];
546 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
547 out.getFunctionSpace().getTypeCode() == ReducedElements) {
548 myN0 = m_NE[0];
549 myN1 = m_NE[1];
550 myN2 = m_NE[2];
551 } else
552 throw RipleyException("readBinaryGridFromZipped(): invalid function space for output data object");
553
554 if (params.first.size() != 3)
555 throw RipleyException("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
556
557 if (params.numValues.size() != 3)
558 throw RipleyException("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
559
560 if (params.multiplier.size() != 3)
561 throw RipleyException("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
562 for (size_t i=0; i<params.multiplier.size(); i++)
563 if (params.multiplier[i]<1)
564 throw RipleyException("readBinaryGridFromZipped(): all multipliers must be positive");
565
566 // check file existence and size
567 ifstream f(filename.c_str(), ifstream::binary);
568 if (f.fail()) {
569 throw RipleyException("readBinaryGridFromZipped(): cannot open file");
570 }
571 f.seekg(0, ios::end);
572 const int numComp = out.getDataPointSize();
573 int filesize = f.tellg();
574 f.seekg(0, ios::beg);
575 std::vector<char> compressed(filesize);
576 f.read((char*)&compressed[0], filesize);
577 f.close();
578 std::vector<char> decompressed = unzip(compressed);
579 filesize = decompressed.size();
580 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
581 if (filesize < reqsize) {
582 throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
583 }
584
585 // check if this rank contributes anything
586 if (params.first[0] >= m_offset[0]+myN0 ||
587 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
588 params.first[1] >= m_offset[1]+myN1 ||
589 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
590 params.first[2] >= m_offset[2]+myN2 ||
591 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
592 return;
593 }
594
595 // now determine how much this rank has to write
596
597 // first coordinates in data object to write to
598 const int first0 = max(0, params.first[0]-m_offset[0]);
599 const int first1 = max(0, params.first[1]-m_offset[1]);
600 const int first2 = max(0, params.first[2]-m_offset[2]);
601 // indices to first value in file
602 const int idx0 = max(0, m_offset[0]-params.first[0]);
603 const int idx1 = max(0, m_offset[1]-params.first[1]);
604 const int idx2 = max(0, m_offset[2]-params.first[2]);
605 // number of values to read
606 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
607 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
608 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
609
610 out.requireWrite();
611 vector<ValueType> values(num0*numComp);
612 const int dpp = out.getNumDataPointsPerSample();
613
614 for (int z=0; z<num2; z++) {
615 for (int y=0; y<num1; y++) {
616 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
617 +(idx2+z)*params.numValues[0]*params.numValues[1]);
618 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
619
620 for (int x=0; x<num0; x++) {
621 const int baseIndex = first0+x*params.multiplier[0]
622 +(first1+y*params.multiplier[1])*myN0
623 +(first2+z*params.multiplier[2])*myN0*myN1;
624 for (int m2=0; m2<params.multiplier[2]; m2++) {
625 for (int m1=0; m1<params.multiplier[1]; m1++) {
626 for (int m0=0; m0<params.multiplier[0]; m0++) {
627 const int dataIndex = baseIndex+m0
628 +m1*myN0
629 +m2*myN0*myN1;
630 double* dest = out.getSampleDataRW(dataIndex);
631 for (int c=0; c<numComp; c++) {
632 ValueType val = values[x*numComp+c];
633
634 if (params.byteOrder != BYTEORDER_NATIVE) {
635 char* cval = reinterpret_cast<char*>(&val);
636 // this will alter val!!
637 byte_swap32(cval);
638 }
639 if (!std::isnan(val)) {
640 for (int q=0; q<dpp; q++) {
641 *dest++ = static_cast<double>(val);
642 }
643 }
644 }
645 }
646 }
647 }
648 }
649 }
650 }
651 }
652 #endif
653
654 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
655 int byteOrder, int dataType) const
656 {
657 // the mapping is not universally correct but should work on our
658 // supported platforms
659 switch (dataType) {
660 case DATATYPE_INT32:
661 writeBinaryGridImpl<int>(in, filename, byteOrder);
662 break;
663 case DATATYPE_FLOAT32:
664 writeBinaryGridImpl<float>(in, filename, byteOrder);
665 break;
666 case DATATYPE_FLOAT64:
667 writeBinaryGridImpl<double>(in, filename, byteOrder);
668 break;
669 default:
670 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
671 }
672 }
673
674 template<typename ValueType>
675 void Brick::writeBinaryGridImpl(const escript::Data& in,
676 const string& filename, int byteOrder) const
677 {
678 // check function space and determine number of points
679 int myN0, myN1, myN2;
680 int totalN0, totalN1, totalN2;
681 if (in.getFunctionSpace().getTypeCode() == Nodes) {
682 myN0 = m_NN[0];
683 myN1 = m_NN[1];
684 myN2 = m_NN[2];
685 totalN0 = m_gNE[0]+1;
686 totalN1 = m_gNE[1]+1;
687 totalN2 = m_gNE[2]+1;
688 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
689 in.getFunctionSpace().getTypeCode() == ReducedElements) {
690 myN0 = m_NE[0];
691 myN1 = m_NE[1];
692 myN2 = m_NE[2];
693 totalN0 = m_gNE[0];
694 totalN1 = m_gNE[1];
695 totalN2 = m_gNE[2];
696 } else
697 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
698
699 const int numComp = in.getDataPointSize();
700 const int dpp = in.getNumDataPointsPerSample();
701 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
702
703 if (numComp > 1 || dpp > 1)
704 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
705
706 // from here on we know that each sample consists of one value
707 FileWriter fw;
708 fw.openFile(filename, fileSize);
709 MPIBarrier();
710
711 for (index_t z=0; z<myN2; z++) {
712 for (index_t y=0; y<myN1; y++) {
713 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
714 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
715 ostringstream oss;
716
717 for (index_t x=0; x<myN0; x++) {
718 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
719 ValueType fvalue = static_cast<ValueType>(*sample);
720 if (byteOrder == BYTEORDER_NATIVE) {
721 oss.write((char*)&fvalue, sizeof(fvalue));
722 } else {
723 char* value = reinterpret_cast<char*>(&fvalue);
724 oss.write(byte_swap32(value), sizeof(fvalue));
725 }
726 }
727 fw.writeAt(oss, fileofs);
728 }
729 }
730 fw.close();
731 }
732
733 void Brick::dump(const string& fileName) const
734 {
735 #if USE_SILO
736 string fn(fileName);
737 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
738 fn+=".silo";
739 }
740
741 int driver=DB_HDF5;
742 string siloPath;
743 DBfile* dbfile = NULL;
744
745 #ifdef ESYS_MPI
746 PMPIO_baton_t* baton = NULL;
747 const int NUM_SILO_FILES = 1;
748 const char* blockDirFmt = "/block%04d";
749 #endif
750
751 if (m_mpiInfo->size > 1) {
752 #ifdef ESYS_MPI
753 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
754 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
755 PMPIO_DefaultClose, (void*)&driver);
756 // try the fallback driver in case of error
757 if (!baton && driver != DB_PDB) {
758 driver = DB_PDB;
759 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
760 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
761 PMPIO_DefaultClose, (void*)&driver);
762 }
763 if (baton) {
764 char str[64];
765 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
766 siloPath = str;
767 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
768 }
769 #endif
770 } else {
771 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
772 getDescription().c_str(), driver);
773 // try the fallback driver in case of error
774 if (!dbfile && driver != DB_PDB) {
775 driver = DB_PDB;
776 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
777 getDescription().c_str(), driver);
778 }
779 }
780
781 if (!dbfile)
782 throw RipleyException("dump: Could not create Silo file");
783
784 /*
785 if (driver==DB_HDF5) {
786 // gzip level 1 already provides good compression with minimal
787 // performance penalty. Some tests showed that gzip levels >3 performed
788 // rather badly on escript data both in terms of time and space
789 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
790 }
791 */
792
793 boost::scoped_ptr<double> x(new double[m_NN[0]]);
794 boost::scoped_ptr<double> y(new double[m_NN[1]]);
795 boost::scoped_ptr<double> z(new double[m_NN[2]]);
796 double* coords[3] = { x.get(), y.get(), z.get() };
797 #pragma omp parallel
798 {
799 #pragma omp for
800 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
801 coords[0][i0]=getLocalCoordinate(i0, 0);
802 }
803 #pragma omp for
804 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
805 coords[1][i1]=getLocalCoordinate(i1, 1);
806 }
807 #pragma omp for
808 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
809 coords[2][i2]=getLocalCoordinate(i2, 2);
810 }
811 }
812 int* dims = const_cast<int*>(getNumNodesPerDim());
813
814 // write mesh
815 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
816 DB_COLLINEAR, NULL);
817
818 // write node ids
819 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
820 NULL, 0, DB_INT, DB_NODECENT, NULL);
821
822 // write element ids
823 dims = const_cast<int*>(getNumElementsPerDim());
824 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
825 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
826
827 // rank 0 writes multimesh and multivar
828 if (m_mpiInfo->rank == 0) {
829 vector<string> tempstrings;
830 vector<char*> names;
831 for (dim_t i=0; i<m_mpiInfo->size; i++) {
832 stringstream path;
833 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
834 tempstrings.push_back(path.str());
835 names.push_back((char*)tempstrings.back().c_str());
836 }
837 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
838 DBSetDir(dbfile, "/");
839 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
840 &types[0], NULL);
841 tempstrings.clear();
842 names.clear();
843 for (dim_t i=0; i<m_mpiInfo->size; i++) {
844 stringstream path;
845 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
846 tempstrings.push_back(path.str());
847 names.push_back((char*)tempstrings.back().c_str());
848 }
849 types.assign(m_mpiInfo->size, DB_QUADVAR);
850 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
851 &types[0], NULL);
852 tempstrings.clear();
853 names.clear();
854 for (dim_t i=0; i<m_mpiInfo->size; i++) {
855 stringstream path;
856 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
857 tempstrings.push_back(path.str());
858 names.push_back((char*)tempstrings.back().c_str());
859 }
860 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
861 &types[0], NULL);
862 }
863
864 if (m_mpiInfo->size > 1) {
865 #ifdef ESYS_MPI
866 PMPIO_HandOffBaton(baton, dbfile);
867 PMPIO_Finish(baton);
868 #endif
869 } else {
870 DBClose(dbfile);
871 }
872
873 #else // USE_SILO
874 throw RipleyException("dump: no Silo support");
875 #endif
876 }
877
878 const int* Brick::borrowSampleReferenceIDs(int fsType) const
879 {
880 switch (fsType) {
881 case Nodes:
882 case ReducedNodes: //FIXME: reduced
883 return &m_nodeId[0];
884 case DegreesOfFreedom:
885 case ReducedDegreesOfFreedom: //FIXME: reduced
886 return &m_dofId[0];
887 case Elements:
888 case ReducedElements:
889 return &m_elementId[0];
890 case FaceElements:
891 case ReducedFaceElements:
892 return &m_faceId[0];
893 case Points:
894 return &m_diracPointNodeIDs[0];
895 default:
896 break;
897 }
898
899 stringstream msg;
900 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
901 throw RipleyException(msg.str());
902 }
903
904 bool Brick::ownSample(int fsType, index_t id) const
905 {
906 if (getMPISize()==1)
907 return true;
908
909 switch (fsType) {
910 case Nodes:
911 case ReducedNodes: //FIXME: reduced
912 return (m_dofMap[id] < getNumDOF());
913 case DegreesOfFreedom:
914 case ReducedDegreesOfFreedom:
915 return true;
916 case Elements:
917 case ReducedElements:
918 {
919 // check ownership of element's _last_ node
920 const index_t x=id%m_NE[0] + 1;
921 const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
922 const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
923 return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
924 }
925 case FaceElements:
926 case ReducedFaceElements:
927 {
928 // check ownership of face element's last node
929 dim_t n=0;
930 for (size_t i=0; i<6; i++) {
931 n+=m_faceCount[i];
932 if (id<n) {
933 const index_t j=id-n+m_faceCount[i];
934 if (i>=4) { // front or back
935 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
936 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
937 } else if (i>=2) { // bottom or top
938 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
939 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
940 } else { // left or right
941 const index_t first=(i==0 ? 0 : m_NN[0]-1);
942 return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
943 }
944 }
945 }
946 return false;
947 }
948 default:
949 break;
950 }
951
952 stringstream msg;
953 msg << "ownSample: invalid function space type " << fsType;
954 throw RipleyException(msg.str());
955 }
956
957 void Brick::setToNormal(escript::Data& out) const
958 {
959 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
960 out.requireWrite();
961 #pragma omp parallel
962 {
963 if (m_faceOffset[0] > -1) {
964 #pragma omp for nowait
965 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
966 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
967 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
968 // set vector at four quadrature points
969 *o++ = -1.; *o++ = 0.; *o++ = 0.;
970 *o++ = -1.; *o++ = 0.; *o++ = 0.;
971 *o++ = -1.; *o++ = 0.; *o++ = 0.;
972 *o++ = -1.; *o++ = 0.; *o = 0.;
973 }
974 }
975 }
976
977 if (m_faceOffset[1] > -1) {
978 #pragma omp for nowait
979 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
980 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
981 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
982 // set vector at four quadrature points
983 *o++ = 1.; *o++ = 0.; *o++ = 0.;
984 *o++ = 1.; *o++ = 0.; *o++ = 0.;
985 *o++ = 1.; *o++ = 0.; *o++ = 0.;
986 *o++ = 1.; *o++ = 0.; *o = 0.;
987 }
988 }
989 }
990
991 if (m_faceOffset[2] > -1) {
992 #pragma omp for nowait
993 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
994 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
995 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
996 // set vector at four quadrature points
997 *o++ = 0.; *o++ = -1.; *o++ = 0.;
998 *o++ = 0.; *o++ = -1.; *o++ = 0.;
999 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1000 *o++ = 0.; *o++ = -1.; *o = 0.;
1001 }
1002 }
1003 }
1004
1005 if (m_faceOffset[3] > -1) {
1006 #pragma omp for nowait
1007 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1008 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1009 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1010 // set vector at four quadrature points
1011 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1012 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1013 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1014 *o++ = 0.; *o++ = 1.; *o = 0.;
1015 }
1016 }
1017 }
1018
1019 if (m_faceOffset[4] > -1) {
1020 #pragma omp for nowait
1021 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1022 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1023 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1024 // set vector at four quadrature points
1025 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1026 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1027 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1028 *o++ = 0.; *o++ = 0.; *o = -1.;
1029 }
1030 }
1031 }
1032
1033 if (m_faceOffset[5] > -1) {
1034 #pragma omp for nowait
1035 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1036 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1037 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1038 // set vector at four quadrature points
1039 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1040 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1041 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1042 *o++ = 0.; *o++ = 0.; *o = 1.;
1043 }
1044 }
1045 }
1046 } // end of parallel section
1047 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1048 out.requireWrite();
1049 #pragma omp parallel
1050 {
1051 if (m_faceOffset[0] > -1) {
1052 #pragma omp for nowait
1053 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1054 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1055 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1056 *o++ = -1.;
1057 *o++ = 0.;
1058 *o = 0.;
1059 }
1060 }
1061 }
1062
1063 if (m_faceOffset[1] > -1) {
1064 #pragma omp for nowait
1065 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1066 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1067 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1068 *o++ = 1.;
1069 *o++ = 0.;
1070 *o = 0.;
1071 }
1072 }
1073 }
1074
1075 if (m_faceOffset[2] > -1) {
1076 #pragma omp for nowait
1077 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1078 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1079 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1080 *o++ = 0.;
1081 *o++ = -1.;
1082 *o = 0.;
1083 }
1084 }
1085 }
1086
1087 if (m_faceOffset[3] > -1) {
1088 #pragma omp for nowait
1089 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1090 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1091 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1092 *o++ = 0.;
1093 *o++ = 1.;
1094 *o = 0.;
1095 }
1096 }
1097 }
1098
1099 if (m_faceOffset[4] > -1) {
1100 #pragma omp for nowait
1101 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1102 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1103 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1104 *o++ = 0.;
1105 *o++ = 0.;
1106 *o = -1.;
1107 }
1108 }
1109 }
1110
1111 if (m_faceOffset[5] > -1) {
1112 #pragma omp for nowait
1113 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1114 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1115 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1116 *o++ = 0.;
1117 *o++ = 0.;
1118 *o = 1.;
1119 }
1120 }
1121 }
1122 } // end of parallel section
1123
1124 } else {
1125 stringstream msg;
1126 msg << "setToNormal: invalid function space type "
1127 << out.getFunctionSpace().getTypeCode();
1128 throw RipleyException(msg.str());
1129 }
1130 }
1131
1132 void Brick::setToSize(escript::Data& out) const
1133 {
1134 if (out.getFunctionSpace().getTypeCode() == Elements
1135 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1136 out.requireWrite();
1137 const dim_t numQuad=out.getNumDataPointsPerSample();
1138 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1139 #pragma omp parallel for
1140 for (index_t k = 0; k < getNumElements(); ++k) {
1141 double* o = out.getSampleDataRW(k);
1142 fill(o, o+numQuad, size);
1143 }
1144 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1145 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1146 out.requireWrite();
1147 const dim_t numQuad=out.getNumDataPointsPerSample();
1148 #pragma omp parallel
1149 {
1150 if (m_faceOffset[0] > -1) {
1151 const double size=min(m_dx[1],m_dx[2]);
1152 #pragma omp for nowait
1153 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1154 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1155 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1156 fill(o, o+numQuad, size);
1157 }
1158 }
1159 }
1160
1161 if (m_faceOffset[1] > -1) {
1162 const double size=min(m_dx[1],m_dx[2]);
1163 #pragma omp for nowait
1164 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1165 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1166 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1167 fill(o, o+numQuad, size);
1168 }
1169 }
1170 }
1171
1172 if (m_faceOffset[2] > -1) {
1173 const double size=min(m_dx[0],m_dx[2]);
1174 #pragma omp for nowait
1175 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1176 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1177 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1178 fill(o, o+numQuad, size);
1179 }
1180 }
1181 }
1182
1183 if (m_faceOffset[3] > -1) {
1184 const double size=min(m_dx[0],m_dx[2]);
1185 #pragma omp for nowait
1186 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1187 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1188 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1189 fill(o, o+numQuad, size);
1190 }
1191 }
1192 }
1193
1194 if (m_faceOffset[4] > -1) {
1195 const double size=min(m_dx[0],m_dx[1]);
1196 #pragma omp for nowait
1197 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1198 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1199 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1200 fill(o, o+numQuad, size);
1201 }
1202 }
1203 }
1204
1205 if (m_faceOffset[5] > -1) {
1206 const double size=min(m_dx[0],m_dx[1]);
1207 #pragma omp for nowait
1208 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1209 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1210 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1211 fill(o, o+numQuad, size);
1212 }
1213 }
1214 }
1215 } // end of parallel section
1216
1217 } else {
1218 stringstream msg;
1219 msg << "setToSize: invalid function space type "
1220 << out.getFunctionSpace().getTypeCode();
1221 throw RipleyException(msg.str());
1222 }
1223 }
1224
1225 void Brick::Print_Mesh_Info(const bool full) const
1226 {
1227 RipleyDomain::Print_Mesh_Info(full);
1228 if (full) {
1229 cout << " Id Coordinates" << endl;
1230 cout.precision(15);
1231 cout.setf(ios::scientific, ios::floatfield);
1232 for (index_t i=0; i < getNumNodes(); i++) {
1233 cout << " " << setw(5) << m_nodeId[i]
1234 << " " << getLocalCoordinate(i%m_NN[0], 0)
1235 << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1236 << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1237 }
1238 }
1239 }
1240
1241
1242 //protected
1243 void Brick::assembleCoordinates(escript::Data& arg) const
1244 {
1245 escriptDataC x = arg.getDataC();
1246 int numDim = m_numDim;
1247 if (!isDataPointShapeEqual(&x, 1, &numDim))
1248 throw RipleyException("setToX: Invalid Data object shape");
1249 if (!numSamplesEqual(&x, 1, getNumNodes()))
1250 throw RipleyException("setToX: Illegal number of samples in Data object");
1251
1252 arg.requireWrite();
1253 #pragma omp parallel for
1254 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1255 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1256 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1257 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1258 point[0] = getLocalCoordinate(i0, 0);
1259 point[1] = getLocalCoordinate(i1, 1);
1260 point[2] = getLocalCoordinate(i2, 2);
1261 }
1262 }
1263 }
1264 }
1265
1266 //protected
1267 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1268 {
1269 const dim_t numComp = in.getDataPointSize();
1270 const double C0 = .044658198738520451079;
1271 const double C1 = .16666666666666666667;
1272 const double C2 = .21132486540518711775;
1273 const double C3 = .25;
1274 const double C4 = .5;
1275 const double C5 = .62200846792814621559;
1276 const double C6 = .78867513459481288225;
1277
1278 if (out.getFunctionSpace().getTypeCode() == Elements) {
1279 out.requireWrite();
1280 #pragma omp parallel
1281 {
1282 vector<double> f_000(numComp);
1283 vector<double> f_001(numComp);
1284 vector<double> f_010(numComp);
1285 vector<double> f_011(numComp);
1286 vector<double> f_100(numComp);
1287 vector<double> f_101(numComp);
1288 vector<double> f_110(numComp);
1289 vector<double> f_111(numComp);
1290 #pragma omp for
1291 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1292 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1293 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1294 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1295 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1296 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1297 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1298 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1299 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1300 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1301 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1302 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1303 for (index_t i=0; i < numComp; ++i) {
1304 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1305 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1306 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1307 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1308 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1309 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1310 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1311 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1312 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1313 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1314 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1315 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1316 o[INDEX3(i,0,0,numComp,3)] = V0;
1317 o[INDEX3(i,1,0,numComp,3)] = V4;
1318 o[INDEX3(i,2,0,numComp,3)] = V8;
1319 o[INDEX3(i,0,1,numComp,3)] = V0;
1320 o[INDEX3(i,1,1,numComp,3)] = V5;
1321 o[INDEX3(i,2,1,numComp,3)] = V9;
1322 o[INDEX3(i,0,2,numComp,3)] = V1;
1323 o[INDEX3(i,1,2,numComp,3)] = V4;
1324 o[INDEX3(i,2,2,numComp,3)] = V10;
1325 o[INDEX3(i,0,3,numComp,3)] = V1;
1326 o[INDEX3(i,1,3,numComp,3)] = V5;
1327 o[INDEX3(i,2,3,numComp,3)] = V11;
1328 o[INDEX3(i,0,4,numComp,3)] = V2;
1329 o[INDEX3(i,1,4,numComp,3)] = V6;
1330 o[INDEX3(i,2,4,numComp,3)] = V8;
1331 o[INDEX3(i,0,5,numComp,3)] = V2;
1332 o[INDEX3(i,1,5,numComp,3)] = V7;
1333 o[INDEX3(i,2,5,numComp,3)] = V9;
1334 o[INDEX3(i,0,6,numComp,3)] = V3;
1335 o[INDEX3(i,1,6,numComp,3)] = V6;
1336 o[INDEX3(i,2,6,numComp,3)] = V10;
1337 o[INDEX3(i,0,7,numComp,3)] = V3;
1338 o[INDEX3(i,1,7,numComp,3)] = V7;
1339 o[INDEX3(i,2,7,numComp,3)] = V11;
1340 } // end of component loop i
1341 } // end of k0 loop
1342 } // end of k1 loop
1343 } // end of k2 loop
1344 } // end of parallel section
1345 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1346 out.requireWrite();
1347 #pragma omp parallel
1348 {
1349 vector<double> f_000(numComp);
1350 vector<double> f_001(numComp);
1351 vector<double> f_010(numComp);
1352 vector<double> f_011(numComp);
1353 vector<double> f_100(numComp);
1354 vector<double> f_101(numComp);
1355 vector<double> f_110(numComp);
1356 vector<double> f_111(numComp);
1357 #pragma omp for
1358 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1359 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1360 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1361 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1362 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1363 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1364 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1365 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1366 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1367 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1368 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1369 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1370 for (index_t i=0; i < numComp; ++i) {
1371 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1372 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1373 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1374 } // end of component loop i
1375 } // end of k0 loop
1376 } // end of k1 loop
1377 } // end of k2 loop
1378 } // end of parallel section
1379 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1380 out.requireWrite();
1381 #pragma omp parallel
1382 {
1383 vector<double> f_000(numComp);
1384 vector<double> f_001(numComp);
1385 vector<double> f_010(numComp);
1386 vector<double> f_011(numComp);
1387 vector<double> f_100(numComp);
1388 vector<double> f_101(numComp);
1389 vector<double> f_110(numComp);
1390 vector<double> f_111(numComp);
1391 if (m_faceOffset[0] > -1) {
1392 #pragma omp for nowait
1393 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1394 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1395 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1396 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1397 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1398 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1399 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1400 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1401 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1402 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1403 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1404 for (index_t i=0; i < numComp; ++i) {
1405 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1406 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1407 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1408 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1409 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1410 o[INDEX3(i,1,0,numComp,3)] = V0;
1411 o[INDEX3(i,2,0,numComp,3)] = V2;
1412 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1413 o[INDEX3(i,1,1,numComp,3)] = V0;
1414 o[INDEX3(i,2,1,numComp,3)] = V3;
1415 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1416 o[INDEX3(i,1,2,numComp,3)] = V1;
1417 o[INDEX3(i,2,2,numComp,3)] = V2;
1418 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1419 o[INDEX3(i,1,3,numComp,3)] = V1;
1420 o[INDEX3(i,2,3,numComp,3)] = V3;
1421 } // end of component loop i
1422 } // end of k1 loop
1423 } // end of k2 loop
1424 } // end of face 0
1425 if (m_faceOffset[1] > -1) {
1426 #pragma omp for nowait
1427 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1428 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1429 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1430 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1431 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1432 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1433 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1434 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1435 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1436 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1437 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1438 for (index_t i=0; i < numComp; ++i) {
1439 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1440 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1441 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1442 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1443 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1444 o[INDEX3(i,1,0,numComp,3)] = V0;
1445 o[INDEX3(i,2,0,numComp,3)] = V2;
1446 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1447 o[INDEX3(i,1,1,numComp,3)] = V0;
1448 o[INDEX3(i,2,1,numComp,3)] = V3;
1449 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1450 o[INDEX3(i,1,2,numComp,3)] = V1;
1451 o[INDEX3(i,2,2,numComp,3)] = V2;
1452 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1453 o[INDEX3(i,1,3,numComp,3)] = V1;
1454 o[INDEX3(i,2,3,numComp,3)] = V3;
1455 } // end of component loop i
1456 } // end of k1 loop
1457 } // end of k2 loop
1458 } // end of face 1
1459 if (m_faceOffset[2] > -1) {
1460 #pragma omp for nowait
1461 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1462 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1463 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1464 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1465 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1466 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1467 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1468 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1469 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1470 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1471 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1472 for (index_t i=0; i < numComp; ++i) {
1473 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1474 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1475 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1476 o[INDEX3(i,0,0,numComp,3)] = V0;
1477 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1478 o[INDEX3(i,2,0,numComp,3)] = V1;
1479 o[INDEX3(i,0,1,numComp,3)] = V0;
1480 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1481 o[INDEX3(i,2,1,numComp,3)] = V2;
1482 o[INDEX3(i,0,2,numComp,3)] = V0;
1483 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1484 o[INDEX3(i,2,2,numComp,3)] = V1;
1485 o[INDEX3(i,0,3,numComp,3)] = V0;
1486 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1487 o[INDEX3(i,2,3,numComp,3)] = V2;
1488 } // end of component loop i
1489 } // end of k0 loop
1490 } // end of k2 loop
1491 } // end of face 2
1492 if (m_faceOffset[3] > -1) {
1493 #pragma omp for nowait
1494 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1495 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1496 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1497 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1498 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1499 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1500 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1501 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1502 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1503 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1504 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1505 for (index_t i=0; i < numComp; ++i) {
1506 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1507 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1508 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1509 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1510 o[INDEX3(i,0,0,numComp,3)] = V0;
1511 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1512 o[INDEX3(i,2,0,numComp,3)] = V2;
1513 o[INDEX3(i,0,1,numComp,3)] = V0;
1514 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1515 o[INDEX3(i,2,1,numComp,3)] = V3;
1516 o[INDEX3(i,0,2,numComp,3)] = V1;
1517 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1518 o[INDEX3(i,2,2,numComp,3)] = V2;
1519 o[INDEX3(i,0,3,numComp,3)] = V1;
1520 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1521 o[INDEX3(i,2,3,numComp,3)] = V3;
1522 } // end of component loop i
1523 } // end of k0 loop
1524 } // end of k2 loop
1525 } // end of face 3
1526 if (m_faceOffset[4] > -1) {
1527 #pragma omp for nowait
1528 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1529 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1530 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1531 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1532 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1533 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1534 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1535 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1536 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1537 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1538 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1539 for (index_t i=0; i < numComp; ++i) {
1540 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1541 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1542 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1543 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1544 o[INDEX3(i,0,0,numComp,3)] = V0;
1545 o[INDEX3(i,1,0,numComp,3)] = V2;
1546 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1547 o[INDEX3(i,0,1,numComp,3)] = V0;
1548 o[INDEX3(i,1,1,numComp,3)] = V3;
1549 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1550 o[INDEX3(i,0,2,numComp,3)] = V1;
1551 o[INDEX3(i,1,2,numComp,3)] = V2;
1552 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1553 o[INDEX3(i,0,3,numComp,3)] = V1;
1554 o[INDEX3(i,1,3,numComp,3)] = V3;
1555 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1556 } // end of component loop i
1557 } // end of k0 loop
1558 } // end of k1 loop
1559 } // end of face 4
1560 if (m_faceOffset[5] > -1) {
1561 #pragma omp for nowait
1562 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1563 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1564 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1565 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1566 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1567 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1568 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1569 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1570 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1571 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1572 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1573 for (index_t i=0; i < numComp; ++i) {
1574 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1575 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1576 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1577 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1578 o[INDEX3(i,0,0,numComp,3)] = V0;
1579 o[INDEX3(i,1,0,numComp,3)] = V2;
1580 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1581 o[INDEX3(i,0,1,numComp,3)] = V0;
1582 o[INDEX3(i,1,1,numComp,3)] = V3;
1583 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1584 o[INDEX3(i,0,2,numComp,3)] = V1;
1585 o[INDEX3(i,1,2,numComp,3)] = V2;
1586 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1587 o[INDEX3(i,0,3,numComp,3)] = V1;
1588 o[INDEX3(i,1,3,numComp,3)] = V3;
1589 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1590 } // end of component loop i
1591 } // end of k0 loop
1592 } // end of k1 loop
1593 } // end of face 5
1594 } // end of parallel section
1595 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1596 out.requireWrite();
1597 #pragma omp parallel
1598 {
1599 vector<double> f_000(numComp);
1600 vector<double> f_001(numComp);
1601 vector<double> f_010(numComp);
1602 vector<double> f_011(numComp);
1603 vector<double> f_100(numComp);
1604 vector<double> f_101(numComp);
1605 vector<double> f_110(numComp);
1606 vector<double> f_111(numComp);
1607 if (m_faceOffset[0] > -1) {
1608 #pragma omp for nowait
1609 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1610 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1611 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1612 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1613 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1614 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1615 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1616 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1617 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1618 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1619 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1620 for (index_t i=0; i < numComp; ++i) {
1621 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1622 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1623 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1624 } // end of component loop i
1625 } // end of k1 loop
1626 } // end of k2 loop
1627 } // end of face 0
1628 if (m_faceOffset[1] > -1) {
1629 #pragma omp for nowait
1630 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1631 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1632 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1633 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1634 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1635 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1636 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1637 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1638 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1639 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1640 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1641 for (index_t i=0; i < numComp; ++i) {
1642 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1643 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1644 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1645 } // end of component loop i
1646 } // end of k1 loop
1647 } // end of k2 loop
1648 } // end of face 1
1649 if (m_faceOffset[2] > -1) {
1650 #pragma omp for nowait
1651 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1652 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1653 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1654 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1655 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1656 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1657 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1658 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1659 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1660 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1661 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1662 for (index_t i=0; i < numComp; ++i) {
1663 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / m_dx[0];
1664 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1665 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / m_dx[2];
1666 } // end of component loop i
1667 } // end of k0 loop
1668 } // end of k2 loop
1669 } // end of face 2
1670 if (m_faceOffset[3] > -1) {
1671 #pragma omp for nowait
1672 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1673 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1674 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1675 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1676 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1677 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1678 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1679 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1680 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1681 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1682 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1683 for (index_t i=0; i < numComp; ++i) {
1684 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / m_dx[0];
1685 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1686 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / m_dx[2];
1687 } // end of component loop i
1688 } // end of k0 loop
1689 } // end of k2 loop
1690 } // end of face 3
1691 if (m_faceOffset[4] > -1) {
1692 #pragma omp for nowait
1693 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1694 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1695 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1696 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1697 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1698 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1699 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1700 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1701 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1702 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1703 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1704 for (index_t i=0; i < numComp; ++i) {
1705 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / m_dx[0];
1706 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / m_dx[1];
1707 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1708 } // end of component loop i
1709 } // end of k0 loop
1710 } // end of k1 loop
1711 } // end of face 4
1712 if (m_faceOffset[5] > -1) {
1713 #pragma omp for nowait
1714 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1715 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1716 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1717 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1718 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1719 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1720 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1721 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1722 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1723 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1724 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1725 for (index_t i=0; i < numComp; ++i) {
1726 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / m_dx[0];
1727 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / m_dx[1];
1728 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1729 } // end of component loop i
1730 } // end of k0 loop
1731 } // end of k1 loop
1732 } // end of face 5
1733 } // end of parallel section
1734 }
1735 }
1736
1737 //protected
1738 void Brick::assembleIntegrate(vector<double>& integrals, const escript::Data& arg) const
1739 {
1740 const dim_t numComp = arg.getDataPointSize();
1741 const index_t left = (m_offset[0]==0 ? 0 : 1);
1742 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1743 const index_t front = (m_offset[2]==0 ? 0 : 1);
1744 const int fs = arg.getFunctionSpace().getTypeCode();
1745 if (fs == Elements && arg.actsExpanded()) {
1746 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2]/8.;
1747 #pragma omp parallel
1748 {
1749 vector<double> int_local(numComp, 0);
1750 #pragma omp for nowait
1751 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1752 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1753 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1754 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1755 for (index_t i=0; i < numComp; ++i) {
1756 const double f_0 = f[INDEX2(i,0,numComp)];
1757 const double f_1 = f[INDEX2(i,1,numComp)];
1758 const double f_2 = f[INDEX2(i,2,numComp)];
1759 const double f_3 = f[INDEX2(i,3,numComp)];
1760 const double f_4 = f[INDEX2(i,4,numComp)];
1761 const double f_5 = f[INDEX2(i,5,numComp)];
1762 const double f_6 = f[INDEX2(i,6,numComp)];
1763 const double f_7 = f[INDEX2(i,7,numComp)];
1764 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
1765 } // end of component loop i
1766 } // end of k0 loop
1767 } // end of k1 loop
1768 } // end of k2 loop
1769
1770 #pragma omp critical
1771 for (index_t i=0; i<numComp; i++)
1772 integrals[i]+=int_local[i];
1773 } // end of parallel section
1774
1775 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1776 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2];
1777 #pragma omp parallel
1778 {
1779 vector<double> int_local(numComp, 0);
1780 #pragma omp for nowait
1781 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1782 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1783 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1784 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1785 for (index_t i=0; i < numComp; ++i) {
1786 int_local[i]+=f[i]*w_0;
1787 } // end of component loop i
1788 } // end of k0 loop
1789 } // end of k1 loop
1790 } // end of k2 loop
1791
1792 #pragma omp critical
1793 for (index_t i=0; i<numComp; i++)
1794 integrals[i]+=int_local[i];
1795 } // end of parallel section
1796
1797 } else if (fs == FaceElements && arg.actsExpanded()) {
1798 const double w_0 = m_dx[1]*m_dx[2]/4.;
1799 const double w_1 = m_dx[0]*m_dx[2]/4.;
1800 const double w_2 = m_dx[0]*m_dx[1]/4.;
1801 #pragma omp parallel
1802 {
1803 vector<double> int_local(numComp, 0);
1804 if (m_faceOffset[0] > -1) {
1805 #pragma omp for nowait
1806 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1807 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1808 const double* f = arg.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1809 for (index_t i=0; i < numComp; ++i) {
1810 const double f_0 = f[INDEX2(i,0,numComp)];
1811 const double f_1 = f[INDEX2(i,1,numComp)];
1812 const double f_2 = f[INDEX2(i,2,numComp)];
1813 const double f_3 = f[INDEX2(i,3,numComp)];
1814 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
1815 } // end of component loop i