/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5136 - (show annotations)
Tue Sep 9 07:13:55 2014 UTC (4 years, 5 months ago) by caltinay
File size: 169249 byte(s)
ripley now supports paso solvers again and returns an appropriate matrix type
id. Changed the getSystemMatrixTypeId() method to take a full SolverBuddy
instance and made some other simplifications.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <ripley/Brick.h>
18 #include <ripley/DefaultAssembler3D.h>
19 #include <ripley/LameAssembler3D.h>
20 #include <ripley/WaveAssembler3D.h>
21 #include <ripley/blocktools.h>
22 #include <ripley/domainhelpers.h>
23 #include <esysUtils/esysFileWriter.h>
24 #include <esysUtils/EsysRandom.h>
25 #include <paso/SystemMatrix.h>
26
27 #include <boost/scoped_array.hpp>
28
29 #ifdef USE_NETCDF
30 #include <netcdfcpp.h>
31 #endif
32
33 #if USE_SILO
34 #include <silo.h>
35 #ifdef ESYS_MPI
36 #include <pmpio.h>
37 #endif
38 #endif
39
40 #include <iomanip>
41 #include <limits>
42
43 namespace bp = boost::python;
44 using namespace std;
45 using esysUtils::FileWriter;
46 using escript::AbstractSystemMatrix;
47
48 namespace ripley {
49
50 inline int indexOfMax(dim_t a, dim_t b, dim_t c)
51 {
52 if (a > b) {
53 if (c > a) {
54 return 2;
55 }
56 return 0;
57 } else if (b > c) {
58 return 1;
59 }
60 return 2;
61 }
62
63 Brick::Brick(dim_t n0, dim_t n1, dim_t n2, double x0, double y0, double z0,
64 double x1, double y1, double z1, int d0, int d1, int d2,
65 const vector<double>& points, const vector<int>& tags,
66 const TagMap& tagnamestonums,
67 escript::SubWorld_ptr w) :
68 RipleyDomain(3, w)
69 {
70 if (static_cast<long>(n0 + 1) * static_cast<long>(n1 + 1)
71 * static_cast<long>(n2 + 1) > numeric_limits<int>::max())
72 throw RipleyException("The number of elements has overflowed, this "
73 "limit may be raised in future releases.");
74
75 if (n0 <= 0 || n1 <= 0 || n2 <= 0)
76 throw RipleyException("Number of elements in each spatial dimension "
77 "must be positive");
78
79 // ignore subdivision parameters for serial run
80 if (m_mpiInfo->size == 1) {
81 d0=1;
82 d1=1;
83 d2=1;
84 }
85 bool warn=false;
86
87 vector<int> factors;
88 int ranks = m_mpiInfo->size;
89 dim_t epr[3] = {n0,n1,n2};
90 int d[3] = {d0,d1,d2};
91 if (d0<=0 || d1<=0 || d2<=0) {
92 for (int i = 0; i < 3; i++) {
93 if (d[i] < 1) {
94 d[i] = 1;
95 continue;
96 }
97 epr[i] = -1; // can no longer be max
98 if (ranks % d[i] != 0) {
99 throw RipleyException("Invalid number of spatial subdivisions");
100 }
101 //remove
102 ranks /= d[i];
103 }
104 factorise(factors, ranks);
105 if (factors.size() != 0) {
106 warn = true;
107 }
108 }
109 while (factors.size() > 0) {
110 int i = indexOfMax(epr[0],epr[1],epr[2]);
111 int f = factors.back();
112 factors.pop_back();
113 d[i] *= f;
114 epr[i] /= f;
115 }
116 d0 = d[0]; d1 = d[1]; d2 = d[2];
117
118 // ensure number of subdivisions is valid and nodes can be distributed
119 // among number of ranks
120 if (d0*d1*d2 != m_mpiInfo->size){
121 throw RipleyException("Invalid number of spatial subdivisions");
122 }
123 if (warn) {
124 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
125 << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
126 }
127
128 double l0 = x1-x0;
129 double l1 = y1-y0;
130 double l2 = z1-z0;
131 m_dx[0] = l0/n0;
132 m_dx[1] = l1/n1;
133 m_dx[2] = l2/n2;
134
135 if ((n0+1)%d0 > 0) {
136 n0=(dim_t)round((float)(n0+1)/d0+0.5)*d0-1;
137 l0=m_dx[0]*n0;
138 cout << "Warning: Adjusted number of elements and length. N0="
139 << n0 << ", l0=" << l0 << endl;
140 }
141 if ((n1+1)%d1 > 0) {
142 n1=(dim_t)round((float)(n1+1)/d1+0.5)*d1-1;
143 l1=m_dx[1]*n1;
144 cout << "Warning: Adjusted number of elements and length. N1="
145 << n1 << ", l1=" << l1 << endl;
146 }
147 if ((n2+1)%d2 > 0) {
148 n2=(dim_t)round((float)(n2+1)/d2+0.5)*d2-1;
149 l2=m_dx[2]*n2;
150 cout << "Warning: Adjusted number of elements and length. N2="
151 << n2 << ", l2=" << l2 << endl;
152 }
153
154 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
155 throw RipleyException("Too few elements for the number of ranks");
156
157 m_gNE[0] = n0;
158 m_gNE[1] = n1;
159 m_gNE[2] = n2;
160 m_origin[0] = x0;
161 m_origin[1] = y0;
162 m_origin[2] = z0;
163 m_length[0] = l0;
164 m_length[1] = l1;
165 m_length[2] = l2;
166 m_NX[0] = d0;
167 m_NX[1] = d1;
168 m_NX[2] = d2;
169
170 // local number of elements (including overlap)
171 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
172 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
173 m_NE[0]++;
174 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
175 m_ownNE[0]--;
176
177 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
178 if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
179 m_NE[1]++;
180 else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
181 m_ownNE[1]--;
182
183 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
184 if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
185 m_NE[2]++;
186 else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
187 m_ownNE[2]--;
188
189 // local number of nodes
190 m_NN[0] = m_NE[0]+1;
191 m_NN[1] = m_NE[1]+1;
192 m_NN[2] = m_NE[2]+1;
193
194 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
195 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
196 if (m_offset[0] > 0)
197 m_offset[0]--;
198 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
199 if (m_offset[1] > 0)
200 m_offset[1]--;
201 m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
202 if (m_offset[2] > 0)
203 m_offset[2]--;
204
205 populateSampleIds();
206
207 for (TagMap::const_iterator i = tagnamestonums.begin();
208 i != tagnamestonums.end(); i++) {
209 setTagMap(i->first, i->second);
210 }
211 addPoints(points, tags);
212 }
213
214 Brick::~Brick()
215 {
216 }
217
218 string Brick::getDescription() const
219 {
220 return "ripley::Brick";
221 }
222
223 bool Brick::operator==(const AbstractDomain& other) const
224 {
225 const Brick* o=dynamic_cast<const Brick*>(&other);
226 if (o) {
227 return (RipleyDomain::operator==(other) &&
228 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
229 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
230 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
231 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
232 }
233
234 return false;
235 }
236
237 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
238 const ReaderParameters& params) const
239 {
240 #ifdef USE_NETCDF
241 // check destination function space
242 dim_t myN0, myN1, myN2;
243 if (out.getFunctionSpace().getTypeCode() == Nodes) {
244 myN0 = m_NN[0];
245 myN1 = m_NN[1];
246 myN2 = m_NN[2];
247 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
248 out.getFunctionSpace().getTypeCode() == ReducedElements) {
249 myN0 = m_NE[0];
250 myN1 = m_NE[1];
251 myN2 = m_NE[2];
252 } else
253 throw RipleyException("readNcGrid(): invalid function space for output data object");
254
255 if (params.first.size() != 3)
256 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
257
258 if (params.numValues.size() != 3)
259 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
260
261 if (params.multiplier.size() != 3)
262 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
263 for (size_t i=0; i<params.multiplier.size(); i++)
264 if (params.multiplier[i]<1)
265 throw RipleyException("readNcGrid(): all multipliers must be positive");
266
267 // check file existence and size
268 NcFile f(filename.c_str(), NcFile::ReadOnly);
269 if (!f.is_valid())
270 throw RipleyException("readNcGrid(): cannot open file");
271
272 NcVar* var = f.get_var(varname.c_str());
273 if (!var)
274 throw RipleyException("readNcGrid(): invalid variable name");
275
276 // TODO: rank>0 data support
277 const int numComp = out.getDataPointSize();
278 if (numComp > 1)
279 throw RipleyException("readNcGrid(): only scalar data supported");
280
281 const int dims = var->num_dims();
282 boost::scoped_array<long> edges(var->edges());
283
284 // is this a slice of the data object (dims!=3)?
285 // note the expected ordering of edges (as in numpy: z,y,x)
286 if ( (dims==3 && (params.numValues[2] > edges[0] ||
287 params.numValues[1] > edges[1] ||
288 params.numValues[0] > edges[2]))
289 || (dims==2 && params.numValues[2]>1)
290 || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
291 throw RipleyException("readNcGrid(): not enough data in file");
292 }
293
294 // check if this rank contributes anything
295 if (params.first[0] >= m_offset[0]+myN0 ||
296 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
297 params.first[1] >= m_offset[1]+myN1 ||
298 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
299 params.first[2] >= m_offset[2]+myN2 ||
300 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
301 return;
302 }
303
304 // now determine how much this rank has to write
305
306 // first coordinates in data object to write to
307 const dim_t first0 = max(0, params.first[0]-m_offset[0]);
308 const dim_t first1 = max(0, params.first[1]-m_offset[1]);
309 const dim_t first2 = max(0, params.first[2]-m_offset[2]);
310 // indices to first value in file (not accounting for reverse yet)
311 dim_t idx0 = max(0, m_offset[0]-params.first[0]);
312 dim_t idx1 = max(0, m_offset[1]-params.first[1]);
313 dim_t idx2 = max(0, m_offset[2]-params.first[2]);
314 // number of values to read
315 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
316 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
317 const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
318
319 // make sure we read the right block if going backwards through file
320 if (params.reverse[0])
321 idx0 = edges[dims-1]-num0-idx0;
322 if (dims>1 && params.reverse[1])
323 idx1 = edges[dims-2]-num1-idx1;
324 if (dims>2 && params.reverse[2])
325 idx2 = edges[dims-3]-num2-idx2;
326
327
328 vector<double> values(num0*num1*num2);
329 if (dims==3) {
330 var->set_cur(idx2, idx1, idx0);
331 var->get(&values[0], num2, num1, num0);
332 } else if (dims==2) {
333 var->set_cur(idx1, idx0);
334 var->get(&values[0], num1, num0);
335 } else {
336 var->set_cur(idx0);
337 var->get(&values[0], num0);
338 }
339
340 const int dpp = out.getNumDataPointsPerSample();
341 out.requireWrite();
342
343 // helpers for reversing
344 const dim_t x0 = (params.reverse[0] ? num0-1 : 0);
345 const int x_mult = (params.reverse[0] ? -1 : 1);
346 const dim_t y0 = (params.reverse[1] ? num1-1 : 0);
347 const int y_mult = (params.reverse[1] ? -1 : 1);
348 const dim_t z0 = (params.reverse[2] ? num2-1 : 0);
349 const int z_mult = (params.reverse[2] ? -1 : 1);
350
351 for (index_t z=0; z<num2; z++) {
352 for (index_t y=0; y<num1; y++) {
353 #pragma omp parallel for
354 for (index_t x=0; x<num0; x++) {
355 const dim_t baseIndex = first0+x*params.multiplier[0]
356 +(first1+y*params.multiplier[1])*myN0
357 +(first2+z*params.multiplier[2])*myN0*myN1;
358 const dim_t srcIndex=(z0+z_mult*z)*num1*num0
359 +(y0+y_mult*y)*num0
360 +(x0+x_mult*x);
361 if (!isnan(values[srcIndex])) {
362 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
363 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
364 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
365 const dim_t dataIndex = baseIndex+m0
366 +m1*myN0
367 +m2*myN0*myN1;
368 double* dest = out.getSampleDataRW(dataIndex);
369 for (index_t q=0; q<dpp; q++) {
370 *dest++ = values[srcIndex];
371 }
372 }
373 }
374 }
375 }
376 }
377 }
378 }
379 #else
380 throw RipleyException("readNcGrid(): not compiled with netCDF support");
381 #endif
382 }
383
384 #ifdef USE_BOOSTIO
385 void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
386 const ReaderParameters& params) const
387 {
388 // the mapping is not universally correct but should work on our
389 // supported platforms
390 switch (params.dataType) {
391 case DATATYPE_INT32:
392 readBinaryGridZippedImpl<int>(out, filename, params);
393 break;
394 case DATATYPE_FLOAT32:
395 readBinaryGridZippedImpl<float>(out, filename, params);
396 break;
397 case DATATYPE_FLOAT64:
398 readBinaryGridZippedImpl<double>(out, filename, params);
399 break;
400 default:
401 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
402 }
403 }
404 #endif
405
406 void Brick::readBinaryGrid(escript::Data& out, string filename,
407 const ReaderParameters& params) const
408 {
409 // the mapping is not universally correct but should work on our
410 // supported platforms
411 switch (params.dataType) {
412 case DATATYPE_INT32:
413 readBinaryGridImpl<int>(out, filename, params);
414 break;
415 case DATATYPE_FLOAT32:
416 readBinaryGridImpl<float>(out, filename, params);
417 break;
418 case DATATYPE_FLOAT64:
419 readBinaryGridImpl<double>(out, filename, params);
420 break;
421 default:
422 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
423 }
424 }
425
426 template<typename ValueType>
427 void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
428 const ReaderParameters& params) const
429 {
430 // check destination function space
431 dim_t myN0, myN1, myN2;
432 if (out.getFunctionSpace().getTypeCode() == Nodes) {
433 myN0 = m_NN[0];
434 myN1 = m_NN[1];
435 myN2 = m_NN[2];
436 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
437 out.getFunctionSpace().getTypeCode() == ReducedElements) {
438 myN0 = m_NE[0];
439 myN1 = m_NE[1];
440 myN2 = m_NE[2];
441 } else
442 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
443
444 if (params.first.size() != 3)
445 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
446
447 if (params.numValues.size() != 3)
448 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
449
450 if (params.multiplier.size() != 3)
451 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
452 for (size_t i=0; i<params.multiplier.size(); i++)
453 if (params.multiplier[i]<1)
454 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
455 if (params.reverse[0] != 0 || params.reverse[1] != 0)
456 throw RipleyException("readBinaryGrid(): reversing only supported in Z-direction currently");
457
458 // check file existence and size
459 ifstream f(filename.c_str(), ifstream::binary);
460 if (f.fail()) {
461 throw RipleyException("readBinaryGrid(): cannot open file");
462 }
463 f.seekg(0, ios::end);
464 const int numComp = out.getDataPointSize();
465 const dim_t filesize = f.tellg();
466 const dim_t reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
467 if (filesize < reqsize) {
468 f.close();
469 throw RipleyException("readBinaryGrid(): not enough data in file");
470 }
471
472 // check if this rank contributes anything
473 if (params.first[0] >= m_offset[0]+myN0 ||
474 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
475 params.first[1] >= m_offset[1]+myN1 ||
476 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
477 params.first[2] >= m_offset[2]+myN2 ||
478 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
479 f.close();
480 return;
481 }
482
483 // now determine how much this rank has to write
484
485 // first coordinates in data object to write to
486 const dim_t first0 = max(0, params.first[0]-m_offset[0]);
487 const dim_t first1 = max(0, params.first[1]-m_offset[1]);
488 const dim_t first2 = max(0, params.first[2]-m_offset[2]);
489 // indices to first value in file (not accounting for reverse yet)
490 dim_t idx0 = max(0, (m_offset[0]/params.multiplier[0])-params.first[0]);
491 dim_t idx1 = max(0, (m_offset[1]/params.multiplier[1])-params.first[1]);
492 dim_t idx2 = max(0, (m_offset[2]/params.multiplier[2])-params.first[2]);
493 // if restX > 0 the first value in the respective dimension has been
494 // written restX times already in a previous rank so this rank only
495 // contributes (multiplier-rank) copies of that value
496 const dim_t rest0 = m_offset[0]%params.multiplier[0];
497 const dim_t rest1 = m_offset[1]%params.multiplier[1];
498 const dim_t rest2 = m_offset[2]%params.multiplier[2];
499
500 // number of values to read
501 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
502 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
503 const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
504
505 // make sure we read the right block if going backwards through file
506 if (params.reverse[2])
507 idx2 = params.numValues[2]-idx2-1;
508
509 // helpers for reversing
510 const int z_mult = (params.reverse[2] ? -1 : 1);
511
512 out.requireWrite();
513 vector<ValueType> values(num0*numComp);
514 const int dpp = out.getNumDataPointsPerSample();
515
516 for (dim_t z=0; z<num2; z++) {
517 const dim_t m2limit = (z==0 ? params.multiplier[2]-rest2 : params.multiplier[2]);
518 dim_t dataZbase = first2 + z*params.multiplier[2];
519 if (z>0)
520 dataZbase -= rest2;
521
522 for (dim_t y=0; y<num1; y++) {
523 const dim_t fileofs = numComp*(idx0 +
524 (idx1+y)*params.numValues[0] +
525 (idx2+z_mult*z)*params.numValues[0]*params.numValues[1]);
526 f.seekg(fileofs*sizeof(ValueType));
527 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
528 const dim_t m1limit = (y==0 ? params.multiplier[1]-rest1 : params.multiplier[1]);
529 dim_t dataYbase = first1 + y*params.multiplier[1];
530 if (y>0)
531 dataYbase -= rest1;
532
533 for (dim_t x=0; x<num0; x++) {
534 const dim_t m0limit = (x==0 ? params.multiplier[0]-rest0 : params.multiplier[0]);
535 dim_t dataXbase = first0 + x*params.multiplier[0];
536 if (x>0)
537 dataXbase -= rest0;
538 // write a block of mult0 x mult1 x mult2 identical values into
539 // Data object
540 for (dim_t m2=0; m2 < m2limit; m2++) {
541 const dim_t dataZ = dataZbase + m2;
542 if (dataZ >= myN2)
543 break;
544 for (dim_t m1=0; m1 < m1limit; m1++) {
545 const dim_t dataY = dataYbase + m1;
546 if (dataY >= myN1)
547 break;
548 for (dim_t m0=0; m0 < m0limit; m0++) {
549 const dim_t dataX = dataXbase + m0;
550 if (dataX >= myN0)
551 break;
552 const dim_t dataIndex = dataX + dataY*myN0 + dataZ*myN0*myN1;
553 double* dest = out.getSampleDataRW(dataIndex);
554 for (int c=0; c<numComp; c++) {
555 ValueType val = values[x*numComp+c];
556
557 if (params.byteOrder != BYTEORDER_NATIVE) {
558 char* cval = reinterpret_cast<char*>(&val);
559 // this will alter val!!
560 if (sizeof(ValueType)>4) {
561 byte_swap64(cval);
562 } else {
563 byte_swap32(cval);
564 }
565 }
566 if (!isnan(val)) {
567 for (int q=0; q<dpp; q++) {
568 *dest++ = static_cast<double>(val);
569 }
570 }
571 }
572 }
573 }
574 }
575 }
576 }
577 }
578
579 f.close();
580 }
581
582 #ifdef USE_BOOSTIO
583 template<typename ValueType>
584 void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
585 const ReaderParameters& params) const
586 {
587 // check destination function space
588 dim_t myN0, myN1, myN2;
589 if (out.getFunctionSpace().getTypeCode() == Nodes) {
590 myN0 = m_NN[0];
591 myN1 = m_NN[1];
592 myN2 = m_NN[2];
593 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
594 out.getFunctionSpace().getTypeCode() == ReducedElements) {
595 myN0 = m_NE[0];
596 myN1 = m_NE[1];
597 myN2 = m_NE[2];
598 } else
599 throw RipleyException("readBinaryGridFromZipped(): invalid function space for output data object");
600
601 if (params.first.size() != 3)
602 throw RipleyException("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
603
604 if (params.numValues.size() != 3)
605 throw RipleyException("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
606
607 if (params.multiplier.size() != 3)
608 throw RipleyException("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
609 for (size_t i=0; i<params.multiplier.size(); i++)
610 if (params.multiplier[i]<1)
611 throw RipleyException("readBinaryGridFromZipped(): all multipliers must be positive");
612
613 // check file existence and size
614 ifstream f(filename.c_str(), ifstream::binary);
615 if (f.fail()) {
616 throw RipleyException("readBinaryGridFromZipped(): cannot open file");
617 }
618 f.seekg(0, ios::end);
619 const int numComp = out.getDataPointSize();
620 dim_t filesize = f.tellg();
621 f.seekg(0, ios::beg);
622 vector<char> compressed(filesize);
623 f.read((char*)&compressed[0], filesize);
624 f.close();
625 vector<char> decompressed = unzip(compressed);
626 filesize = decompressed.size();
627 const dim_t reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
628 if (filesize < reqsize) {
629 throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
630 }
631
632 // check if this rank contributes anything
633 if (params.first[0] >= m_offset[0]+myN0 ||
634 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
635 params.first[1] >= m_offset[1]+myN1 ||
636 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
637 params.first[2] >= m_offset[2]+myN2 ||
638 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
639 return;
640 }
641
642 // now determine how much this rank has to write
643
644 // first coordinates in data object to write to
645 const dim_t first0 = max(0, params.first[0]-m_offset[0]);
646 const dim_t first1 = max(0, params.first[1]-m_offset[1]);
647 const dim_t first2 = max(0, params.first[2]-m_offset[2]);
648 // indices to first value in file
649 const dim_t idx0 = max(0, m_offset[0]-params.first[0]);
650 const dim_t idx1 = max(0, m_offset[1]-params.first[1]);
651 const dim_t idx2 = max(0, m_offset[2]-params.first[2]);
652 // number of values to read
653 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
654 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
655 const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
656
657 out.requireWrite();
658 vector<ValueType> values(num0*numComp);
659 const int dpp = out.getNumDataPointsPerSample();
660
661 for (dim_t z=0; z<num2; z++) {
662 for (dim_t y=0; y<num1; y++) {
663 const dim_t fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
664 +(idx2+z)*params.numValues[0]*params.numValues[1]);
665 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
666
667 for (dim_t x=0; x<num0; x++) {
668 const dim_t baseIndex = first0+x*params.multiplier[0]
669 +(first1+y*params.multiplier[1])*myN0
670 +(first2+z*params.multiplier[2])*myN0*myN1;
671 for (dim_t m2=0; m2<params.multiplier[2]; m2++) {
672 for (dim_t m1=0; m1<params.multiplier[1]; m1++) {
673 for (dim_t m0=0; m0<params.multiplier[0]; m0++) {
674 const dim_t dataIndex = baseIndex+m0
675 +m1*myN0
676 +m2*myN0*myN1;
677 double* dest = out.getSampleDataRW(dataIndex);
678 for (int c=0; c<numComp; c++) {
679 ValueType val = values[x*numComp+c];
680
681 if (params.byteOrder != BYTEORDER_NATIVE) {
682 char* cval = reinterpret_cast<char*>(&val);
683 // this will alter val!!
684 byte_swap32(cval);
685 }
686 if (!isnan(val)) {
687 for (int q=0; q<dpp; q++) {
688 *dest++ = static_cast<double>(val);
689 }
690 }
691 }
692 }
693 }
694 }
695 }
696 }
697 }
698 }
699 #endif
700
701 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
702 int byteOrder, int dataType) const
703 {
704 // the mapping is not universally correct but should work on our
705 // supported platforms
706 switch (dataType) {
707 case DATATYPE_INT32:
708 writeBinaryGridImpl<int>(in, filename, byteOrder);
709 break;
710 case DATATYPE_FLOAT32:
711 writeBinaryGridImpl<float>(in, filename, byteOrder);
712 break;
713 case DATATYPE_FLOAT64:
714 writeBinaryGridImpl<double>(in, filename, byteOrder);
715 break;
716 default:
717 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
718 }
719 }
720
721 template<typename ValueType>
722 void Brick::writeBinaryGridImpl(const escript::Data& in,
723 const string& filename, int byteOrder) const
724 {
725 // check function space and determine number of points
726 dim_t myN0, myN1, myN2;
727 dim_t totalN0, totalN1, totalN2;
728 if (in.getFunctionSpace().getTypeCode() == Nodes) {
729 myN0 = m_NN[0];
730 myN1 = m_NN[1];
731 myN2 = m_NN[2];
732 totalN0 = m_gNE[0]+1;
733 totalN1 = m_gNE[1]+1;
734 totalN2 = m_gNE[2]+1;
735 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
736 in.getFunctionSpace().getTypeCode() == ReducedElements) {
737 myN0 = m_NE[0];
738 myN1 = m_NE[1];
739 myN2 = m_NE[2];
740 totalN0 = m_gNE[0];
741 totalN1 = m_gNE[1];
742 totalN2 = m_gNE[2];
743 } else
744 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
745
746 const int numComp = in.getDataPointSize();
747 const int dpp = in.getNumDataPointsPerSample();
748 const dim_t fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
749
750 if (numComp > 1 || dpp > 1)
751 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
752
753 // from here on we know that each sample consists of one value
754 FileWriter fw;
755 fw.openFile(filename, fileSize);
756 MPIBarrier();
757
758 for (index_t z=0; z<myN2; z++) {
759 for (index_t y=0; y<myN1; y++) {
760 const dim_t fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
761 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
762 ostringstream oss;
763
764 for (index_t x=0; x<myN0; x++) {
765 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
766 ValueType fvalue = static_cast<ValueType>(*sample);
767 if (byteOrder == BYTEORDER_NATIVE) {
768 oss.write((char*)&fvalue, sizeof(fvalue));
769 } else {
770 char* value = reinterpret_cast<char*>(&fvalue);
771 if (sizeof(fvalue)>4) {
772 byte_swap64(value);
773 } else {
774 byte_swap32(value);
775 }
776 oss.write(value, sizeof(fvalue));
777 }
778 }
779 fw.writeAt(oss, fileofs);
780 }
781 }
782 fw.close();
783 }
784
785 void Brick::write(const std::string& filename) const
786 {
787 throw RipleyException("write: not supported");
788 }
789
790 void Brick::dump(const string& fileName) const
791 {
792 #if USE_SILO
793 string fn(fileName);
794 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
795 fn+=".silo";
796 }
797
798 int driver=DB_HDF5;
799 string siloPath;
800 DBfile* dbfile = NULL;
801
802 #ifdef ESYS_MPI
803 PMPIO_baton_t* baton = NULL;
804 const int NUM_SILO_FILES = 1;
805 const char* blockDirFmt = "/block%04d";
806 #endif
807
808 if (m_mpiInfo->size > 1) {
809 #ifdef ESYS_MPI
810 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
811 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
812 PMPIO_DefaultClose, (void*)&driver);
813 // try the fallback driver in case of error
814 if (!baton && driver != DB_PDB) {
815 driver = DB_PDB;
816 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
817 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
818 PMPIO_DefaultClose, (void*)&driver);
819 }
820 if (baton) {
821 char str[64];
822 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
823 siloPath = str;
824 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
825 }
826 #endif
827 } else {
828 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
829 getDescription().c_str(), driver);
830 // try the fallback driver in case of error
831 if (!dbfile && driver != DB_PDB) {
832 driver = DB_PDB;
833 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
834 getDescription().c_str(), driver);
835 }
836 }
837
838 if (!dbfile)
839 throw RipleyException("dump: Could not create Silo file");
840
841 /*
842 if (driver==DB_HDF5) {
843 // gzip level 1 already provides good compression with minimal
844 // performance penalty. Some tests showed that gzip levels >3 performed
845 // rather badly on escript data both in terms of time and space
846 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
847 }
848 */
849
850 boost::scoped_ptr<double> x(new double[m_NN[0]]);
851 boost::scoped_ptr<double> y(new double[m_NN[1]]);
852 boost::scoped_ptr<double> z(new double[m_NN[2]]);
853 double* coords[3] = { x.get(), y.get(), z.get() };
854 const dim_t NN0 = m_NN[0];
855 const dim_t NN1 = m_NN[1];
856 const dim_t NN2 = m_NN[2];
857
858 #pragma omp parallel
859 {
860 #pragma omp for
861 for (dim_t i0 = 0; i0 < NN0; i0++) {
862 coords[0][i0]=getLocalCoordinate(i0, 0);
863 }
864 #pragma omp for
865 for (dim_t i1 = 0; i1 < NN1; i1++) {
866 coords[1][i1]=getLocalCoordinate(i1, 1);
867 }
868 #pragma omp for
869 for (dim_t i2 = 0; i2 < NN2; i2++) {
870 coords[2][i2]=getLocalCoordinate(i2, 2);
871 }
872 }
873 dim_t* dims = const_cast<dim_t*>(getNumNodesPerDim());
874
875 // write mesh
876 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
877 DB_COLLINEAR, NULL);
878
879 // write node ids
880 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
881 NULL, 0, DB_INT, DB_NODECENT, NULL);
882
883 // write element ids
884 dims = const_cast<dim_t*>(getNumElementsPerDim());
885 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
886 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
887
888 // rank 0 writes multimesh and multivar
889 if (m_mpiInfo->rank == 0) {
890 vector<string> tempstrings;
891 vector<char*> names;
892 for (dim_t i=0; i<m_mpiInfo->size; i++) {
893 stringstream path;
894 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
895 tempstrings.push_back(path.str());
896 names.push_back((char*)tempstrings.back().c_str());
897 }
898 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
899 DBSetDir(dbfile, "/");
900 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
901 &types[0], NULL);
902 tempstrings.clear();
903 names.clear();
904 for (dim_t i=0; i<m_mpiInfo->size; i++) {
905 stringstream path;
906 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
907 tempstrings.push_back(path.str());
908 names.push_back((char*)tempstrings.back().c_str());
909 }
910 types.assign(m_mpiInfo->size, DB_QUADVAR);
911 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
912 &types[0], NULL);
913 tempstrings.clear();
914 names.clear();
915 for (dim_t i=0; i<m_mpiInfo->size; i++) {
916 stringstream path;
917 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
918 tempstrings.push_back(path.str());
919 names.push_back((char*)tempstrings.back().c_str());
920 }
921 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
922 &types[0], NULL);
923 }
924
925 if (m_mpiInfo->size > 1) {
926 #ifdef ESYS_MPI
927 PMPIO_HandOffBaton(baton, dbfile);
928 PMPIO_Finish(baton);
929 #endif
930 } else {
931 DBClose(dbfile);
932 }
933
934 #else // USE_SILO
935 throw RipleyException("dump: no Silo support");
936 #endif
937 }
938
939 const dim_t* Brick::borrowSampleReferenceIDs(int fsType) const
940 {
941 switch (fsType) {
942 case Nodes:
943 case ReducedNodes: //FIXME: reduced
944 return &m_nodeId[0];
945 case DegreesOfFreedom:
946 case ReducedDegreesOfFreedom: //FIXME: reduced
947 return &m_dofId[0];
948 case Elements:
949 case ReducedElements:
950 return &m_elementId[0];
951 case FaceElements:
952 case ReducedFaceElements:
953 return &m_faceId[0];
954 case Points:
955 return &m_diracPointNodeIDs[0];
956 default:
957 break;
958 }
959
960 stringstream msg;
961 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
962 throw RipleyException(msg.str());
963 }
964
965 bool Brick::ownSample(int fsType, index_t id) const
966 {
967 if (getMPISize()==1)
968 return true;
969
970 switch (fsType) {
971 case Nodes:
972 case ReducedNodes: //FIXME: reduced
973 return (m_dofMap[id] < getNumDOF());
974 case DegreesOfFreedom:
975 case ReducedDegreesOfFreedom:
976 return true;
977 case Elements:
978 case ReducedElements:
979 {
980 // check ownership of element's _last_ node
981 const index_t x=id%m_NE[0] + 1;
982 const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
983 const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
984 return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
985 }
986 case FaceElements:
987 case ReducedFaceElements:
988 {
989 // check ownership of face element's last node
990 dim_t n=0;
991 for (size_t i=0; i<6; i++) {
992 n+=m_faceCount[i];
993 if (id<n) {
994 const index_t j=id-n+m_faceCount[i];
995 if (i>=4) { // front or back
996 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
997 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
998 } else if (i>=2) { // bottom or top
999 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
1000 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
1001 } else { // left or right
1002 const index_t first=(i==0 ? 0 : m_NN[0]-1);
1003 return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
1004 }
1005 }
1006 }
1007 return false;
1008 }
1009 default:
1010 break;
1011 }
1012
1013 stringstream msg;
1014 msg << "ownSample: invalid function space type " << fsType;
1015 throw RipleyException(msg.str());
1016 }
1017
1018 void Brick::setToNormal(escript::Data& out) const
1019 {
1020 const dim_t NE0 = m_NE[0];
1021 const dim_t NE1 = m_NE[1];
1022 const dim_t NE2 = m_NE[2];
1023
1024 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1025 out.requireWrite();
1026 #pragma omp parallel
1027 {
1028 if (m_faceOffset[0] > -1) {
1029 #pragma omp for nowait
1030 for (index_t k2 = 0; k2 < NE2; ++k2) {
1031 for (index_t k1 = 0; k1 < NE1; ++k1) {
1032 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1033 // set vector at four quadrature points
1034 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1035 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1036 *o++ = -1.; *o++ = 0.; *o++ = 0.;
1037 *o++ = -1.; *o++ = 0.; *o = 0.;
1038 }
1039 }
1040 }
1041
1042 if (m_faceOffset[1] > -1) {
1043 #pragma omp for nowait
1044 for (index_t k2 = 0; k2 < NE2; ++k2) {
1045 for (index_t k1 = 0; k1 < NE1; ++k1) {
1046 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1047 // set vector at four quadrature points
1048 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1049 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1050 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1051 *o++ = 1.; *o++ = 0.; *o = 0.;
1052 }
1053 }
1054 }
1055
1056 if (m_faceOffset[2] > -1) {
1057 #pragma omp for nowait
1058 for (index_t k2 = 0; k2 < NE2; ++k2) {
1059 for (index_t k0 = 0; k0 < NE0; ++k0) {
1060 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1061 // set vector at four quadrature points
1062 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1063 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1064 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1065 *o++ = 0.; *o++ = -1.; *o = 0.;
1066 }
1067 }
1068 }
1069
1070 if (m_faceOffset[3] > -1) {
1071 #pragma omp for nowait
1072 for (index_t k2 = 0; k2 < NE2; ++k2) {
1073 for (index_t k0 = 0; k0 < NE0; ++k0) {
1074 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1075 // set vector at four quadrature points
1076 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1077 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1078 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1079 *o++ = 0.; *o++ = 1.; *o = 0.;
1080 }
1081 }
1082 }
1083
1084 if (m_faceOffset[4] > -1) {
1085 #pragma omp for nowait
1086 for (index_t k1 = 0; k1 < NE1; ++k1) {
1087 for (index_t k0 = 0; k0 < NE0; ++k0) {
1088 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1089 // set vector at four quadrature points
1090 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1091 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1092 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1093 *o++ = 0.; *o++ = 0.; *o = -1.;
1094 }
1095 }
1096 }
1097
1098 if (m_faceOffset[5] > -1) {
1099 #pragma omp for nowait
1100 for (index_t k1 = 0; k1 < NE1; ++k1) {
1101 for (index_t k0 = 0; k0 < NE0; ++k0) {
1102 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1103 // set vector at four quadrature points
1104 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1105 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1106 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1107 *o++ = 0.; *o++ = 0.; *o = 1.;
1108 }
1109 }
1110 }
1111 } // end of parallel section
1112 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1113 out.requireWrite();
1114 #pragma omp parallel
1115 {
1116 if (m_faceOffset[0] > -1) {
1117 #pragma omp for nowait
1118 for (index_t k2 = 0; k2 < NE2; ++k2) {
1119 for (index_t k1 = 0; k1 < NE1; ++k1) {
1120 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1121 *o++ = -1.;
1122 *o++ = 0.;
1123 *o = 0.;
1124 }
1125 }
1126 }
1127
1128 if (m_faceOffset[1] > -1) {
1129 #pragma omp for nowait
1130 for (index_t k2 = 0; k2 < NE2; ++k2) {
1131 for (index_t k1 = 0; k1 < NE1; ++k1) {
1132 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1133 *o++ = 1.;
1134 *o++ = 0.;
1135 *o = 0.;
1136 }
1137 }
1138 }
1139
1140 if (m_faceOffset[2] > -1) {
1141 #pragma omp for nowait
1142 for (index_t k2 = 0; k2 < NE2; ++k2) {
1143 for (index_t k0 = 0; k0 < NE0; ++k0) {
1144 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1145 *o++ = 0.;
1146 *o++ = -1.;
1147 *o = 0.;
1148 }
1149 }
1150 }
1151
1152 if (m_faceOffset[3] > -1) {
1153 #pragma omp for nowait
1154 for (index_t k2 = 0; k2 < NE2; ++k2) {
1155 for (index_t k0 = 0; k0 < NE0; ++k0) {
1156 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1157 *o++ = 0.;
1158 *o++ = 1.;
1159 *o = 0.;
1160 }
1161 }
1162 }
1163
1164 if (m_faceOffset[4] > -1) {
1165 #pragma omp for nowait
1166 for (index_t k1 = 0; k1 < NE1; ++k1) {
1167 for (index_t k0 = 0; k0 < NE0; ++k0) {
1168 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1169 *o++ = 0.;
1170 *o++ = 0.;
1171 *o = -1.;
1172 }
1173 }
1174 }
1175
1176 if (m_faceOffset[5] > -1) {
1177 #pragma omp for nowait
1178 for (index_t k1 = 0; k1 < NE1; ++k1) {
1179 for (index_t k0 = 0; k0 < NE0; ++k0) {
1180 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1181 *o++ = 0.;
1182 *o++ = 0.;
1183 *o = 1.;
1184 }
1185 }
1186 }
1187 } // end of parallel section
1188
1189 } else {
1190 stringstream msg;
1191 msg << "setToNormal: invalid function space type "
1192 << out.getFunctionSpace().getTypeCode();
1193 throw RipleyException(msg.str());
1194 }
1195 }
1196
1197 void Brick::setToSize(escript::Data& out) const
1198 {
1199 if (out.getFunctionSpace().getTypeCode() == Elements
1200 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1201 out.requireWrite();
1202 const dim_t numQuad=out.getNumDataPointsPerSample();
1203 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1204 const dim_t NE = getNumElements();
1205 #pragma omp parallel for
1206 for (index_t k = 0; k < NE; ++k) {
1207 double* o = out.getSampleDataRW(k);
1208 fill(o, o+numQuad, size);
1209 }
1210 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1211 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1212 out.requireWrite();
1213 const dim_t numQuad=out.getNumDataPointsPerSample();
1214 const dim_t NE0 = m_NE[0];
1215 const dim_t NE1 = m_NE[1];
1216 const dim_t NE2 = m_NE[2];
1217 #pragma omp parallel
1218 {
1219 if (m_faceOffset[0] > -1) {
1220 const double size=min(m_dx[1],m_dx[2]);
1221 #pragma omp for nowait
1222 for (index_t k2 = 0; k2 < NE2; ++k2) {
1223 for (index_t k1 = 0; k1 < NE1; ++k1) {
1224 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1225 fill(o, o+numQuad, size);
1226 }
1227 }
1228 }
1229
1230 if (m_faceOffset[1] > -1) {
1231 const double size=min(m_dx[1],m_dx[2]);
1232 #pragma omp for nowait
1233 for (index_t k2 = 0; k2 < NE2; ++k2) {
1234 for (index_t k1 = 0; k1 < NE1; ++k1) {
1235 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1236 fill(o, o+numQuad, size);
1237 }
1238 }
1239 }
1240
1241 if (m_faceOffset[2] > -1) {
1242 const double size=min(m_dx[0],m_dx[2]);
1243 #pragma omp for nowait
1244 for (index_t k2 = 0; k2 < NE2; ++k2) {
1245 for (index_t k0 = 0; k0 < NE0; ++k0) {
1246 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1247 fill(o, o+numQuad, size);
1248 }
1249 }
1250 }
1251
1252 if (m_faceOffset[3] > -1) {
1253 const double size=min(m_dx[0],m_dx[2]);
1254 #pragma omp for nowait
1255 for (index_t k2 = 0; k2 < NE2; ++k2) {
1256 for (index_t k0 = 0; k0 < NE0; ++k0) {
1257 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1258 fill(o, o+numQuad, size);
1259 }
1260 }
1261 }
1262
1263 if (m_faceOffset[4] > -1) {
1264 const double size=min(m_dx[0],m_dx[1]);
1265 #pragma omp for nowait
1266 for (index_t k1 = 0; k1 < NE1; ++k1) {
1267 for (index_t k0 = 0; k0 < NE0; ++k0) {
1268 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1269 fill(o, o+numQuad, size);
1270 }
1271 }
1272 }
1273
1274 if (m_faceOffset[5] > -1) {
1275 const double size=min(m_dx[0],m_dx[1]);
1276 #pragma omp for nowait
1277 for (index_t k1 = 0; k1 < NE1; ++k1) {
1278 for (index_t k0 = 0; k0 < NE0; ++k0) {
1279 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1280 fill(o, o+numQuad, size);
1281 }
1282 }
1283 }
1284 } // end of parallel section
1285
1286 } else {
1287 stringstream msg;
1288 msg << "setToSize: invalid function space type "
1289 << out.getFunctionSpace().getTypeCode();
1290 throw RipleyException(msg.str());
1291 }
1292 }
1293
1294 void Brick::Print_Mesh_Info(const bool full) const
1295 {
1296 RipleyDomain::Print_Mesh_Info(full);
1297 if (full) {
1298 cout << " Id Coordinates" << endl;
1299 cout.precision(15);
1300 cout.setf(ios::scientific, ios::floatfield);
1301 for (index_t i=0; i < getNumNodes(); i++) {
1302 cout << " " << setw(5) << m_nodeId[i]
1303 << " " << getLocalCoordinate(i%m_NN[0], 0)
1304 << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1305 << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1306 }
1307 }
1308 }
1309
1310
1311 //protected
1312 void Brick::assembleCoordinates(escript::Data& arg) const
1313 {
1314 escriptDataC x = arg.getDataC();
1315 int numDim = m_numDim;
1316 if (!isDataPointShapeEqual(&x, 1, &numDim))
1317 throw RipleyException("setToX: Invalid Data object shape");
1318 if (!numSamplesEqual(&x, 1, getNumNodes()))
1319 throw RipleyException("setToX: Illegal number of samples in Data object");
1320
1321 const dim_t NN0 = m_NN[0];
1322 const dim_t NN1 = m_NN[1];
1323 const dim_t NN2 = m_NN[2];
1324 arg.requireWrite();
1325 #pragma omp parallel for
1326 for (dim_t i2 = 0; i2 < NN2; i2++) {
1327 for (dim_t i1 = 0; i1 < NN1; i1++) {
1328 for (dim_t i0 = 0; i0 < NN0; i0++) {
1329 double* point = arg.getSampleDataRW(i0+NN0*i1+NN0*NN1*i2);
1330 point[0] = getLocalCoordinate(i0, 0);
1331 point[1] = getLocalCoordinate(i1, 1);
1332 point[2] = getLocalCoordinate(i2, 2);
1333 }
1334 }
1335 }
1336 }
1337
1338 //protected
1339 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1340 {
1341 const dim_t numComp = in.getDataPointSize();
1342 const double C0 = .044658198738520451079;
1343 const double C1 = .16666666666666666667;
1344 const double C2 = .21132486540518711775;
1345 const double C3 = .25;
1346 const double C4 = .5;
1347 const double C5 = .62200846792814621559;
1348 const double C6 = .78867513459481288225;
1349 const dim_t NE0 = m_NE[0];
1350 const dim_t NE1 = m_NE[1];
1351 const dim_t NE2 = m_NE[2];
1352
1353 if (out.getFunctionSpace().getTypeCode() == Elements) {
1354 out.requireWrite();
1355 #pragma omp parallel
1356 {
1357 vector<double> f_000(numComp);
1358 vector<double> f_001(numComp);
1359 vector<double> f_010(numComp);
1360 vector<double> f_011(numComp);
1361 vector<double> f_100(numComp);
1362 vector<double> f_101(numComp);
1363 vector<double> f_110(numComp);
1364 vector<double> f_111(numComp);
1365 #pragma omp for
1366 for (index_t k2=0; k2 < NE2; ++k2) {
1367 for (index_t k1=0; k1 < NE1; ++k1) {
1368 for (index_t k0=0; k0 < NE0; ++k0) {
1369 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1370 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1371 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1372 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1373 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1374 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1375 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1376 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1377 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,NE0,NE1));
1378 for (index_t i=0; i < numComp; ++i) {
1379 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1380 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1381 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1382 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1383 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1384 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1385 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1386 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1387 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1388 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1389 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1390 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1391 o[INDEX3(i,0,0,numComp,3)] = V0;
1392 o[INDEX3(i,1,0,numComp,3)] = V4;
1393 o[INDEX3(i,2,0,numComp,3)] = V8;
1394 o[INDEX3(i,0,1,numComp,3)] = V0;
1395 o[INDEX3(i,1,1,numComp,3)] = V5;
1396 o[INDEX3(i,2,1,numComp,3)] = V9;
1397 o[INDEX3(i,0,2,numComp,3)] = V1;
1398 o[INDEX3(i,1,2,numComp,3)] = V4;
1399 o[INDEX3(i,2,2,numComp,3)] = V10;
1400 o[INDEX3(i,0,3,numComp,3)] = V1;
1401 o[INDEX3(i,1,3,numComp,3)] = V5;
1402 o[INDEX3(i,2,3,numComp,3)] = V11;
1403 o[INDEX3(i,0,4,numComp,3)] = V2;
1404 o[INDEX3(i,1,4,numComp,3)] = V6;
1405 o[INDEX3(i,2,4,numComp,3)] = V8;
1406 o[INDEX3(i,0,5,numComp,3)] = V2;
1407 o[INDEX3(i,1,5,numComp,3)] = V7;
1408 o[INDEX3(i,2,5,numComp,3)] = V9;
1409 o[INDEX3(i,0,6,numComp,3)] = V3;
1410 o[INDEX3(i,1,6,numComp,3)] = V6;
1411 o[INDEX3(i,2,6,numComp,3)] = V10;
1412 o[INDEX3(i,0,7,numComp,3)] = V3;
1413 o[INDEX3(i,1,7,numComp,3)] = V7;
1414 o[INDEX3(i,2,7,numComp,3)] = V11;
1415 } // end of component loop i
1416 } // end of k0 loop
1417 } // end of k1 loop
1418 } // end of k2 loop
1419 } // end of parallel section
1420 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1421 out.requireWrite();
1422 #pragma omp parallel
1423 {
1424 vector<double> f_000(numComp);
1425 vector<double> f_001(numComp);
1426 vector<double> f_010(numComp);
1427 vector<double> f_011(numComp);
1428 vector<double> f_100(numComp);
1429 vector<double> f_101(numComp);
1430 vector<double> f_110(numComp);
1431 vector<double> f_111(numComp);
1432 #pragma omp for
1433 for (index_t k2=0; k2 < NE2; ++k2) {
1434 for (index_t k1=0; k1 < NE1; ++k1) {
1435 for (index_t k0=0; k0 < NE0; ++k0) {
1436 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1437 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1438 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1439 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1440 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1441 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1442 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1443 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1444 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,NE0,NE1));
1445 for (index_t i=0; i < numComp; ++i) {
1446 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1447 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1448 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1449 } // end of component loop i
1450 } // end of k0 loop
1451 } // end of k1 loop
1452 } // end of k2 loop
1453 } // end of parallel section
1454 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1455 out.requireWrite();
1456 #pragma omp parallel
1457 {
1458 vector<double> f_000(numComp);
1459 vector<double> f_001(numComp);
1460 vector<double> f_010(numComp);
1461 vector<double> f_011(numComp);
1462 vector<double> f_100(numComp);
1463 vector<double> f_101(numComp);
1464 vector<double> f_110(numComp);
1465 vector<double> f_111(numComp);
1466 if (m_faceOffset[0] > -1) {
1467 #pragma omp for nowait
1468 for (index_t k2=0; k2 < NE2; ++k2) {
1469 for (index_t k1=0; k1 < NE1; ++k1) {
1470 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1471 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1472 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1473 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1474 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1475 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1476 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1477 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1478 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,NE1));
1479 for (index_t i=0; i < numComp; ++i) {
1480 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1481 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1482 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1483 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1484 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1485 o[INDEX3(i,1,0,numComp,3)] = V0;
1486 o[INDEX3(i,2,0,numComp,3)] = V2;
1487 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1488 o[INDEX3(i,1,1,numComp,3)] = V0;
1489 o[INDEX3(i,2,1,numComp,3)] = V3;
1490 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1491 o[INDEX3(i,1,2,numComp,3)] = V1;
1492 o[INDEX3(i,2,2,numComp,3)] = V2;
1493 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1494 o[INDEX3(i,1,3,numComp,3)] = V1;
1495 o[INDEX3(i,2,3,numComp,3)] = V3;
1496 } // end of component loop i
1497 } // end of k1 loop
1498 } // end of k2 loop
1499 } // end of face 0
1500 if (m_faceOffset[1] > -1) {
1501 #pragma omp for nowait
1502 for (index_t k2=0; k2 < NE2; ++k2) {
1503 for (index_t k1=0; k1 < NE1; ++k1) {
1504 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1505 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1506 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1507 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1508 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1509 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1510 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1511 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1512 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,NE1));
1513 for (index_t i=0; i < numComp; ++i) {
1514 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1515 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1516 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1517 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1518 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1519 o[INDEX3(i,1,0,numComp,3)] = V0;
1520 o[INDEX3(i,2,0,numComp,3)] = V2;
1521 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1522 o[INDEX3(i,1,1,numComp,3)] = V0;
1523 o[INDEX3(i,2,1,numComp,3)] = V3;
1524 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1525 o[INDEX3(i,1,2,numComp,3)] = V1;
1526 o[INDEX3(i,2,2,numComp,3)] = V2;
1527 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1528 o[INDEX3(i,1,3,numComp,3)] = V1;
1529 o[INDEX3(i,2,3,numComp,3)] = V3;
1530 } // end of component loop i
1531 } // end of k1 loop
1532 } // end of k2 loop
1533 } // end of face 1
1534 if (m_faceOffset[2] > -1) {
1535 #pragma omp for nowait
1536 for (index_t k2=0; k2 < NE2; ++k2) {
1537 for (index_t k0=0; k0 < NE0; ++k0) {
1538 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1539 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1540 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1541 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1542 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1543 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1544 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1545 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1546 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,NE0));
1547 for (index_t i=0; i < numComp; ++i) {
1548 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1549 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1550 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1551 o[INDEX3(i,0,0,numComp,3)] = V0;
1552 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1553 o[INDEX3(i,2,0,numComp,3)] = V1;
1554 o[INDEX3(i,0,1,numComp,3)] = V0;
1555 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1556 o[INDEX3(i,2,1,numComp,3)] = V2;
1557 o[INDEX3(i,0,2,numComp,3)] = V0;
1558 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1559 o[INDEX3(i,2,2,numComp,3)] = V1;
1560 o[INDEX3(i,0,3,numComp,3)] = V0;
1561 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1562 o[INDEX3(i,2,3,numComp,3)] = V2;
1563 } // end of component loop i
1564 } // end of k0 loop
1565 } // end of k2 loop
1566 } // end of face 2
1567 if (m_faceOffset[3] > -1) {
1568 #pragma omp for nowait
1569 for (index_t k2=0; k2 < NE2; ++k2) {
1570 for (index_t k0=0; k0 < NE0; ++k0) {
1571 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1572 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1573 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1574 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1575 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1576 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1577 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1578 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1579 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,NE0));
1580 for (index_t i=0; i < numComp; ++i) {
1581 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1582 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1583 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1584 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1585 o[INDEX3(i,0,0,numComp,3)] = V0;
1586 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1587 o[INDEX3(i,2,0,numComp,3)] = V2;
1588 o[INDEX3(i,0,1,numComp,3)] = V0;
1589 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1590 o[INDEX3(i,2,1,numComp,3)] = V3;
1591 o[INDEX3(i,0,2,numComp,3)] = V1;
1592 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1593 o[INDEX3(i,2,2,numComp,3)] = V2;
1594 o[INDEX3(i,0,3,numComp,3)] = V1;
1595 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1596 o[INDEX3(i,2,3,numComp,3)] = V3;
1597 } // end of component loop i
1598 } // end of k0 loop
1599 } // end of k2 loop
1600 } // end of face 3
1601 if (m_faceOffset[4] > -1) {
1602 #pragma omp for nowait
1603 for (index_t k1=0; k1 < NE1; ++k1) {
1604 for (index_t k0=0; k0 < NE0; ++k0) {
1605 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1606 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1607 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1608 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1609 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1610 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1611 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1612 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1613 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,NE0));
1614 for (index_t i=0; i < numComp; ++i) {
1615 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1616 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1617 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1618 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1619 o[INDEX3(i,0,0,numComp,3)] = V0;
1620 o[INDEX3(i,1,0,numComp,3)] = V2;
1621 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1622 o[INDEX3(i,0,1,numComp,3)] = V0;
1623 o[INDEX3(i,1,1,numComp,3)] = V3;
1624 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1625 o[INDEX3(i,0,2,numComp,3)] = V1;
1626 o[INDEX3(i,1,2,numComp,3)] = V2;
1627 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1628 o[INDEX3(i,0,3,numComp,3)] = V1;
1629 o[INDEX3(i,1,3,numComp,3)] = V3;
1630 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1631 } // end of component loop i
1632 } // end of k0 loop
1633 } // end of k1 loop
1634 } // end of face 4
1635 if (m_faceOffset[5] > -1) {
1636 #pragma omp for nowait
1637 for (index_t k1=0; k1 < NE1; ++k1) {
1638 for (index_t k0=0; k0 < NE0; ++k0) {
1639 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1640 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1641 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1642 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1643 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1644 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1645 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1646 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1647 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,NE0));
1648 for (index_t i=0; i < numComp; ++i) {
1649 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1650 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1651 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1652 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1653 o[INDEX3(i,0,0,numComp,3)] = V0;
1654 o[INDEX3(i,1,0,numComp,3)] = V2;
1655 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1656 o[INDEX3(i,0,1,numComp,3)] = V0;
1657 o[INDEX3(i,1,1,numComp,3)] = V3;
1658 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1659 o[INDEX3(i,0,2,numComp,3)] = V1;
1660 o[INDEX3(i,1,2,numComp,3)] = V2;
1661 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1662 o[INDEX3(i,0,3,numComp,3)] = V1;
1663 o[INDEX3(i,1,3,numComp,3)] = V3;
1664 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1665 } // end of component loop i
1666 } // end of k0 loop
1667 } // end of k1 loop
1668 } // end of face 5
1669 } // end of parallel section
1670 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1671 out.requireWrite();
1672 #pragma omp parallel
1673 {
1674 vector<double> f_000(numComp);
1675 vector<double> f_001(numComp);
1676 vector<double> f_010(numComp);
1677 vector<double> f_011(numComp);
1678 vector<double> f_100(numComp);
1679 vector<double> f_101(numComp);
1680 vector<double> f_110(numComp);
1681 vector<double> f_111(numComp);
1682 if (m_faceOffset[0] > -1) {
1683 #pragma omp for nowait
1684 for (index_t k2=0; k2 < NE2; ++k2) {
1685 for (index_t k1=0; k1 < NE1; ++k1) {
1686 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1687 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1688 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1689 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1690 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1691 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1692 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1693 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1694 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,NE1));
1695 for (index_t i=0; i < numComp; ++i) {
1696 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1697 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1698 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1699 } // end of component loop i
1700 } // end of k1 loop
1701 } // end of k2 loop
1702 } // end of face 0
1703 if (m_faceOffset[1] > -1) {
1704 #pragma omp for nowait
1705 for (index_t k2=0; k2 < NE2; ++k2) {
1706 for (index_t k1=0; k1 < NE1; ++k1) {
1707 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1708 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1709 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1710 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1711 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1712 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1713 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1714 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1715 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,NE1));
1716 for (index_t i=0; i < numComp; ++i) {
1717 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1718 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1719 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1720 } // end of component loop i
1721 } // end of k1 loop
1722 } // end of k2 loop
1723 } // end of face 1
1724 if (m_faceOffset[2] > -1) {
1725 #pragma omp for nowait
1726 for (index_t k2=0; k2 < NE2; ++k2) {
1727 for (index_t k0=0; k0 < NE0; ++k0) {
1728 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1729 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1730 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1731 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1732 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1733 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1734 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1735 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1736 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,NE0));
1737 for (index_t i=0; i < numComp; ++i) {
1738 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / m_dx[0];
1739 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1740 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / m_dx[2];
1741 } // end of component loop i
1742 } // end of k0 loop
1743 } // end of k2 loop
1744 } // end of face 2
1745 if (m_faceOffset[3] > -1) {
1746 #pragma omp for nowait
1747 for (index_t k2=0; k2 < NE2; ++k2) {
1748 for (index_t k0=0; k0 < NE0; ++k0) {
1749 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1750 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1751 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1752 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1753 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1754 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1755 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1756 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1757 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,NE0));
1758 for (index_t i=0; i < numComp; ++i) {
1759 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / m_dx[0];
1760 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1761 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / m_dx[2];
1762 } // end of component loop i
1763 } // end of k0 loop
1764 } // end of k2 loop
1765 } // end of face 3
1766 if (m_faceOffset[4] > -1) {
1767 #pragma omp for nowait
1768 for (index_t k1=0; k1 < NE1; ++k1) {
1769 for (index_t k0=0; k0 < NE0; ++k0) {
1770 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1771 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1772 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1773 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1774 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1775 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1776 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1777 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1778 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,NE0));
1779 for (index_t i=0; i < numComp; ++i) {
1780 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / m_dx[0];
1781 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / m_dx[1];
1782 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1783 } // end of component loop i
1784 } // end of k0 loop
1785 } // end of k1 loop
1786 } // end of face 4
1787 if (m_faceOffset[5] > -1) {
1788 #pragma omp for nowait
1789 for (index_t k1=0; k1 < NE1; ++k1) {
1790 for (index_t k0=0; k0 < NE0; ++k0) {
1791 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1792 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1793 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1794 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1795 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1796 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1797 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1798 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1799 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,NE0));
1800 for (index_t i=0; i < numComp; ++i) {
1801 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / m_dx[0];
1802 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / m_dx[1];
1803 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1804 } // end of component loop i
1805 } // end of k0 loop
1806 } // end of k1 loop
1807 } // end of face 5
1808 } // end of parallel section
1809 }
1810 }
1811
1812 //protected
1813 void Brick::assembleIntegrate(vector<double>& integrals, const escript::Data& arg) const
1814 {
1815 const dim_t numComp = arg.getDataPointSize();
1816 const index_t left = (m_offset[0]==0 ? 0 : 1);
1817 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1818 const index_t front = (m_offset[2]==0 ? 0 : 1);
1819 const int fs = arg.getFunctionSpace().getTypeCode();
1820 if (fs == Elements && arg.actsExpanded()) {
1821 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2]/8.;
1822 #pragma omp parallel
1823 {
1824 vector<double> int_local(numComp, 0);
1825 #pragma omp for nowait
1826 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1827 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1828 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1829 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1830 for (index_t i=0; i < numComp; ++i) {
1831 const double f_0 = f[INDEX2(i,0,numComp)];
1832 const double f_1 = f[INDEX2(i,1,numComp)];
1833 const double f_2 = f[INDEX2(i,2,numComp)];
1834 const double f_3 = f[INDEX2(i,3,numComp)];
1835 const double f_4 = f[INDEX2(i,4,numComp)];
1836 const double f_5 = f[INDEX2(i,5,numComp)];
1837 const double f_6 = f[INDEX2(i,6,numComp)];
1838 const double f_7 = f[INDEX2(i,7,numComp)];
1839 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
1840 } // end of component loop i
1841 } // end of k0 loop
1842 } // end of k1 loop
1843 } // end of k2 loop
1844
1845 #pragma omp critical
1846