/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4940 - (show annotations)
Thu May 15 01:40:06 2014 UTC (4 years, 10 months ago) by caltinay
File size: 166324 byte(s)
A branch to have fun with diagonal storage for ripley.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <limits>
18
19 #include <ripley/Brick.h>
20 #include <paso/SystemMatrix.h>
21 #include <esysUtils/esysFileWriter.h>
22 #include <ripley/DefaultAssembler3D.h>
23 #include <ripley/WaveAssembler3D.h>
24 #include <ripley/LameAssembler3D.h>
25 #include <ripley/domainhelpers.h>
26 #include <boost/scoped_array.hpp>
27
28 #ifdef USE_NETCDF
29 #include <netcdfcpp.h>
30 #endif
31
32 #if USE_SILO
33 #include <silo.h>
34 #ifdef ESYS_MPI
35 #include <pmpio.h>
36 #endif
37 #endif
38
39 #include <iomanip>
40
41 #include "esysUtils/EsysRandom.h"
42 #include "blocktools.h"
43
44
45 using namespace std;
46 using esysUtils::FileWriter;
47
48 namespace ripley {
49
50 int indexOfMax(int a, int b, int c) {
51 if (a > b) {
52 if (c > a) {
53 return 2;
54 }
55 return 0;
56 } else if (b > c) {
57 return 1;
58 }
59 return 2;
60 }
61
62 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
63 double x1, double y1, double z1, int d0, int d1, int d2,
64 const std::vector<double>& points, const std::vector<int>& tags,
65 const simap_t& tagnamestonums,
66 escript::SubWorld_ptr w) :
67 RipleyDomain(3, w)
68 {
69 if (static_cast<long>(n0 + 1) * static_cast<long>(n1 + 1)
70 * static_cast<long>(n2 + 1) > std::numeric_limits<int>::max())
71 throw RipleyException("The number of elements has overflowed, this "
72 "limit may be raised in future releases.");
73
74 if (n0 <= 0 || n1 <= 0 || n2 <= 0)
75 throw RipleyException("Number of elements in each spatial dimension "
76 "must be positive");
77
78 // ignore subdivision parameters for serial run
79 if (m_mpiInfo->size == 1) {
80 d0=1;
81 d1=1;
82 d2=1;
83 }
84 bool warn=false;
85
86 std::vector<int> factors;
87 int ranks = m_mpiInfo->size;
88 int epr[3] = {n0,n1,n2};
89 int d[3] = {d0,d1,d2};
90 if (d0<=0 || d1<=0 || d2<=0) {
91 for (int i = 0; i < 3; i++) {
92 if (d[i] < 1) {
93 d[i] = 1;
94 continue;
95 }
96 epr[i] = -1; // can no longer be max
97 if (ranks % d[i] != 0) {
98 throw RipleyException("Invalid number of spatial subdivisions");
99 }
100 //remove
101 ranks /= d[i];
102 }
103 factorise(factors, ranks);
104 if (factors.size() != 0) {
105 warn = true;
106 }
107 }
108 while (factors.size() > 0) {
109 int i = indexOfMax(epr[0],epr[1],epr[2]);
110 int f = factors.back();
111 factors.pop_back();
112 d[i] *= f;
113 epr[i] /= f;
114 }
115 d0 = d[0]; d1 = d[1]; d2 = d[2];
116
117 // ensure number of subdivisions is valid and nodes can be distributed
118 // among number of ranks
119 if (d0*d1*d2 != m_mpiInfo->size){
120 throw RipleyException("Invalid number of spatial subdivisions");
121 }
122 if (warn) {
123 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
124 << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
125 }
126
127 double l0 = x1-x0;
128 double l1 = y1-y0;
129 double l2 = z1-z0;
130 m_dx[0] = l0/n0;
131 m_dx[1] = l1/n1;
132 m_dx[2] = l2/n2;
133
134 if ((n0+1)%d0 > 0) {
135 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
136 l0=m_dx[0]*n0;
137 cout << "Warning: Adjusted number of elements and length. N0="
138 << n0 << ", l0=" << l0 << endl;
139 }
140 if ((n1+1)%d1 > 0) {
141 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
142 l1=m_dx[1]*n1;
143 cout << "Warning: Adjusted number of elements and length. N1="
144 << n1 << ", l1=" << l1 << endl;
145 }
146 if ((n2+1)%d2 > 0) {
147 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
148 l2=m_dx[2]*n2;
149 cout << "Warning: Adjusted number of elements and length. N2="
150 << n2 << ", l2=" << l2 << endl;
151 }
152
153 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
154 throw RipleyException("Too few elements for the number of ranks");
155
156 m_gNE[0] = n0;
157 m_gNE[1] = n1;
158 m_gNE[2] = n2;
159 m_origin[0] = x0;
160 m_origin[1] = y0;
161 m_origin[2] = z0;
162 m_length[0] = l0;
163 m_length[1] = l1;
164 m_length[2] = l2;
165 m_NX[0] = d0;
166 m_NX[1] = d1;
167 m_NX[2] = d2;
168
169 // local number of elements (including overlap)
170 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
171 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
172 m_NE[0]++;
173 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
174 m_ownNE[0]--;
175
176 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
177 if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
178 m_NE[1]++;
179 else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
180 m_ownNE[1]--;
181
182 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
183 if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
184 m_NE[2]++;
185 else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
186 m_ownNE[2]--;
187
188 // local number of nodes
189 m_NN[0] = m_NE[0]+1;
190 m_NN[1] = m_NE[1]+1;
191 m_NN[2] = m_NE[2]+1;
192
193 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
194 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
195 if (m_offset[0] > 0)
196 m_offset[0]--;
197 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
198 if (m_offset[1] > 0)
199 m_offset[1]--;
200 m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
201 if (m_offset[2] > 0)
202 m_offset[2]--;
203
204 populateSampleIds();
205 createPattern();
206
207 assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
208 for (map<string, int>::const_iterator i = tagnamestonums.begin();
209 i != tagnamestonums.end(); i++) {
210 setTagMap(i->first, i->second);
211 }
212 addPoints(tags.size(), &points[0], &tags[0]);
213 }
214
215
216 Brick::~Brick()
217 {
218 delete assembler;
219 }
220
221 string Brick::getDescription() const
222 {
223 return "ripley::Brick";
224 }
225
226 bool Brick::operator==(const AbstractDomain& other) const
227 {
228 const Brick* o=dynamic_cast<const Brick*>(&other);
229 if (o) {
230 return (RipleyDomain::operator==(other) &&
231 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
232 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
233 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
234 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
235 }
236
237 return false;
238 }
239
240 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
241 const ReaderParameters& params) const
242 {
243 #ifdef USE_NETCDF
244 // check destination function space
245 int myN0, myN1, myN2;
246 if (out.getFunctionSpace().getTypeCode() == Nodes) {
247 myN0 = m_NN[0];
248 myN1 = m_NN[1];
249 myN2 = m_NN[2];
250 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
251 out.getFunctionSpace().getTypeCode() == ReducedElements) {
252 myN0 = m_NE[0];
253 myN1 = m_NE[1];
254 myN2 = m_NE[2];
255 } else
256 throw RipleyException("readNcGrid(): invalid function space for output data object");
257
258 if (params.first.size() != 3)
259 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
260
261 if (params.numValues.size() != 3)
262 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
263
264 if (params.multiplier.size() != 3)
265 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
266 for (size_t i=0; i<params.multiplier.size(); i++)
267 if (params.multiplier[i]<1)
268 throw RipleyException("readNcGrid(): all multipliers must be positive");
269
270 // check file existence and size
271 NcFile f(filename.c_str(), NcFile::ReadOnly);
272 if (!f.is_valid())
273 throw RipleyException("readNcGrid(): cannot open file");
274
275 NcVar* var = f.get_var(varname.c_str());
276 if (!var)
277 throw RipleyException("readNcGrid(): invalid variable name");
278
279 // TODO: rank>0 data support
280 const int numComp = out.getDataPointSize();
281 if (numComp > 1)
282 throw RipleyException("readNcGrid(): only scalar data supported");
283
284 const int dims = var->num_dims();
285 boost::scoped_array<long> edges(var->edges());
286
287 // is this a slice of the data object (dims!=3)?
288 // note the expected ordering of edges (as in numpy: z,y,x)
289 if ( (dims==3 && (params.numValues[2] > edges[0] ||
290 params.numValues[1] > edges[1] ||
291 params.numValues[0] > edges[2]))
292 || (dims==2 && params.numValues[2]>1)
293 || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
294 throw RipleyException("readNcGrid(): not enough data in file");
295 }
296
297 // check if this rank contributes anything
298 if (params.first[0] >= m_offset[0]+myN0 ||
299 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
300 params.first[1] >= m_offset[1]+myN1 ||
301 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
302 params.first[2] >= m_offset[2]+myN2 ||
303 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
304 return;
305 }
306
307 // now determine how much this rank has to write
308
309 // first coordinates in data object to write to
310 const int first0 = max(0, params.first[0]-m_offset[0]);
311 const int first1 = max(0, params.first[1]-m_offset[1]);
312 const int first2 = max(0, params.first[2]-m_offset[2]);
313 // indices to first value in file (not accounting for reverse yet)
314 int idx0 = max(0, m_offset[0]-params.first[0]);
315 int idx1 = max(0, m_offset[1]-params.first[1]);
316 int idx2 = max(0, m_offset[2]-params.first[2]);
317 // number of values to read
318 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
319 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
320 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
321
322 // make sure we read the right block if going backwards through file
323 if (params.reverse[0])
324 idx0 = edges[dims-1]-num0-idx0;
325 if (dims>1 && params.reverse[1])
326 idx1 = edges[dims-2]-num1-idx1;
327 if (dims>2 && params.reverse[2])
328 idx2 = edges[dims-3]-num2-idx2;
329
330
331 vector<double> values(num0*num1*num2);
332 if (dims==3) {
333 var->set_cur(idx2, idx1, idx0);
334 var->get(&values[0], num2, num1, num0);
335 } else if (dims==2) {
336 var->set_cur(idx1, idx0);
337 var->get(&values[0], num1, num0);
338 } else {
339 var->set_cur(idx0);
340 var->get(&values[0], num0);
341 }
342
343 const int dpp = out.getNumDataPointsPerSample();
344 out.requireWrite();
345
346 // helpers for reversing
347 const int x0 = (params.reverse[0] ? num0-1 : 0);
348 const int x_mult = (params.reverse[0] ? -1 : 1);
349 const int y0 = (params.reverse[1] ? num1-1 : 0);
350 const int y_mult = (params.reverse[1] ? -1 : 1);
351 const int z0 = (params.reverse[2] ? num2-1 : 0);
352 const int z_mult = (params.reverse[2] ? -1 : 1);
353
354 for (index_t z=0; z<num2; z++) {
355 for (index_t y=0; y<num1; y++) {
356 #pragma omp parallel for
357 for (index_t x=0; x<num0; x++) {
358 const int baseIndex = first0+x*params.multiplier[0]
359 +(first1+y*params.multiplier[1])*myN0
360 +(first2+z*params.multiplier[2])*myN0*myN1;
361 const int srcIndex=(z0+z_mult*z)*num1*num0
362 +(y0+y_mult*y)*num0
363 +(x0+x_mult*x);
364 if (!isnan(values[srcIndex])) {
365 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
366 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
367 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
368 const int dataIndex = baseIndex+m0
369 +m1*myN0
370 +m2*myN0*myN1;
371 double* dest = out.getSampleDataRW(dataIndex);
372 for (index_t q=0; q<dpp; q++) {
373 *dest++ = values[srcIndex];
374 }
375 }
376 }
377 }
378 }
379 }
380 }
381 }
382 #else
383 throw RipleyException("readNcGrid(): not compiled with netCDF support");
384 #endif
385 }
386
387 #ifdef USE_BOOSTIO
388 void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
389 const ReaderParameters& params) const
390 {
391 // the mapping is not universally correct but should work on our
392 // supported platforms
393 switch (params.dataType) {
394 case DATATYPE_INT32:
395 readBinaryGridZippedImpl<int>(out, filename, params);
396 break;
397 case DATATYPE_FLOAT32:
398 readBinaryGridZippedImpl<float>(out, filename, params);
399 break;
400 case DATATYPE_FLOAT64:
401 readBinaryGridZippedImpl<double>(out, filename, params);
402 break;
403 default:
404 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
405 }
406 }
407 #endif
408
409 void Brick::readBinaryGrid(escript::Data& out, string filename,
410 const ReaderParameters& params) const
411 {
412 // the mapping is not universally correct but should work on our
413 // supported platforms
414 switch (params.dataType) {
415 case DATATYPE_INT32:
416 readBinaryGridImpl<int>(out, filename, params);
417 break;
418 case DATATYPE_FLOAT32:
419 readBinaryGridImpl<float>(out, filename, params);
420 break;
421 case DATATYPE_FLOAT64:
422 readBinaryGridImpl<double>(out, filename, params);
423 break;
424 default:
425 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
426 }
427 }
428
429 template<typename ValueType>
430 void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
431 const ReaderParameters& params) const
432 {
433 // check destination function space
434 int myN0, myN1, myN2;
435 if (out.getFunctionSpace().getTypeCode() == Nodes) {
436 myN0 = m_NN[0];
437 myN1 = m_NN[1];
438 myN2 = m_NN[2];
439 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
440 out.getFunctionSpace().getTypeCode() == ReducedElements) {
441 myN0 = m_NE[0];
442 myN1 = m_NE[1];
443 myN2 = m_NE[2];
444 } else
445 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
446
447 if (params.first.size() != 3)
448 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
449
450 if (params.numValues.size() != 3)
451 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
452
453 if (params.multiplier.size() != 3)
454 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
455 for (size_t i=0; i<params.multiplier.size(); i++)
456 if (params.multiplier[i]<1)
457 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
458
459 // check file existence and size
460 ifstream f(filename.c_str(), ifstream::binary);
461 if (f.fail()) {
462 throw RipleyException("readBinaryGrid(): cannot open file");
463 }
464 f.seekg(0, ios::end);
465 const int numComp = out.getDataPointSize();
466 const int filesize = f.tellg();
467 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
468 if (filesize < reqsize) {
469 f.close();
470 throw RipleyException("readBinaryGrid(): not enough data in file");
471 }
472
473 // check if this rank contributes anything
474 if (params.first[0] >= m_offset[0]+myN0 ||
475 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
476 params.first[1] >= m_offset[1]+myN1 ||
477 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
478 params.first[2] >= m_offset[2]+myN2 ||
479 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
480 f.close();
481 return;
482 }
483
484 // now determine how much this rank has to write
485
486 // first coordinates in data object to write to
487 const int first0 = max(0, params.first[0]-m_offset[0]);
488 const int first1 = max(0, params.first[1]-m_offset[1]);
489 const int first2 = max(0, params.first[2]-m_offset[2]);
490 // indices to first value in file
491 const int idx0 = max(0, m_offset[0]-params.first[0]);
492 const int idx1 = max(0, m_offset[1]-params.first[1]);
493 const int idx2 = max(0, m_offset[2]-params.first[2]);
494 // number of values to read
495 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
496 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
497 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
498
499 out.requireWrite();
500 vector<ValueType> values(num0*numComp);
501 const int dpp = out.getNumDataPointsPerSample();
502
503 for (int z=0; z<num2; z++) {
504 for (int y=0; y<num1; y++) {
505 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
506 +(idx2+z)*params.numValues[0]*params.numValues[1]);
507 f.seekg(fileofs*sizeof(ValueType));
508 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
509
510 for (int x=0; x<num0; x++) {
511 const int baseIndex = first0+x*params.multiplier[0]
512 +(first1+y*params.multiplier[1])*myN0
513 +(first2+z*params.multiplier[2])*myN0*myN1;
514 for (int m2=0; m2<params.multiplier[2]; m2++) {
515 for (int m1=0; m1<params.multiplier[1]; m1++) {
516 for (int m0=0; m0<params.multiplier[0]; m0++) {
517 const int dataIndex = baseIndex+m0
518 +m1*myN0
519 +m2*myN0*myN1;
520 double* dest = out.getSampleDataRW(dataIndex);
521 for (int c=0; c<numComp; c++) {
522 ValueType val = values[x*numComp+c];
523
524 if (params.byteOrder != BYTEORDER_NATIVE) {
525 char* cval = reinterpret_cast<char*>(&val);
526 // this will alter val!!
527 byte_swap32(cval);
528 }
529 if (!std::isnan(val)) {
530 for (int q=0; q<dpp; q++) {
531 *dest++ = static_cast<double>(val);
532 }
533 }
534 }
535 }
536 }
537 }
538 }
539 }
540 }
541
542 f.close();
543 }
544
545 #ifdef USE_BOOSTIO
546 template<typename ValueType>
547 void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
548 const ReaderParameters& params) const
549 {
550 // check destination function space
551 int myN0, myN1, myN2;
552 if (out.getFunctionSpace().getTypeCode() == Nodes) {
553 myN0 = m_NN[0];
554 myN1 = m_NN[1];
555 myN2 = m_NN[2];
556 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
557 out.getFunctionSpace().getTypeCode() == ReducedElements) {
558 myN0 = m_NE[0];
559 myN1 = m_NE[1];
560 myN2 = m_NE[2];
561 } else
562 throw RipleyException("readBinaryGridFromZipped(): invalid function space for output data object");
563
564 if (params.first.size() != 3)
565 throw RipleyException("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
566
567 if (params.numValues.size() != 3)
568 throw RipleyException("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
569
570 if (params.multiplier.size() != 3)
571 throw RipleyException("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
572 for (size_t i=0; i<params.multiplier.size(); i++)
573 if (params.multiplier[i]<1)
574 throw RipleyException("readBinaryGridFromZipped(): all multipliers must be positive");
575
576 // check file existence and size
577 ifstream f(filename.c_str(), ifstream::binary);
578 if (f.fail()) {
579 throw RipleyException("readBinaryGridFromZipped(): cannot open file");
580 }
581 f.seekg(0, ios::end);
582 const int numComp = out.getDataPointSize();
583 int filesize = f.tellg();
584 f.seekg(0, ios::beg);
585 std::vector<char> compressed(filesize);
586 f.read((char*)&compressed[0], filesize);
587 f.close();
588 std::vector<char> decompressed = unzip(compressed);
589 filesize = decompressed.size();
590 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
591 if (filesize < reqsize) {
592 throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
593 }
594
595 // check if this rank contributes anything
596 if (params.first[0] >= m_offset[0]+myN0 ||
597 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
598 params.first[1] >= m_offset[1]+myN1 ||
599 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
600 params.first[2] >= m_offset[2]+myN2 ||
601 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
602 return;
603 }
604
605 // now determine how much this rank has to write
606
607 // first coordinates in data object to write to
608 const int first0 = max(0, params.first[0]-m_offset[0]);
609 const int first1 = max(0, params.first[1]-m_offset[1]);
610 const int first2 = max(0, params.first[2]-m_offset[2]);
611 // indices to first value in file
612 const int idx0 = max(0, m_offset[0]-params.first[0]);
613 const int idx1 = max(0, m_offset[1]-params.first[1]);
614 const int idx2 = max(0, m_offset[2]-params.first[2]);
615 // number of values to read
616 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
617 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
618 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
619
620 out.requireWrite();
621 vector<ValueType> values(num0*numComp);
622 const int dpp = out.getNumDataPointsPerSample();
623
624 for (int z=0; z<num2; z++) {
625 for (int y=0; y<num1; y++) {
626 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
627 +(idx2+z)*params.numValues[0]*params.numValues[1]);
628 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
629
630 for (int x=0; x<num0; x++) {
631 const int baseIndex = first0+x*params.multiplier[0]
632 +(first1+y*params.multiplier[1])*myN0
633 +(first2+z*params.multiplier[2])*myN0*myN1;
634 for (int m2=0; m2<params.multiplier[2]; m2++) {
635 for (int m1=0; m1<params.multiplier[1]; m1++) {
636 for (int m0=0; m0<params.multiplier[0]; m0++) {
637 const int dataIndex = baseIndex+m0
638 +m1*myN0
639 +m2*myN0*myN1;
640 double* dest = out.getSampleDataRW(dataIndex);
641 for (int c=0; c<numComp; c++) {
642 ValueType val = values[x*numComp+c];
643
644 if (params.byteOrder != BYTEORDER_NATIVE) {
645 char* cval = reinterpret_cast<char*>(&val);
646 // this will alter val!!
647 byte_swap32(cval);
648 }
649 if (!std::isnan(val)) {
650 for (int q=0; q<dpp; q++) {
651 *dest++ = static_cast<double>(val);
652 }
653 }
654 }
655 }
656 }
657 }
658 }
659 }
660 }
661 }
662 #endif
663
664 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
665 int byteOrder, int dataType) const
666 {
667 // the mapping is not universally correct but should work on our
668 // supported platforms
669 switch (dataType) {
670 case DATATYPE_INT32:
671 writeBinaryGridImpl<int>(in, filename, byteOrder);
672 break;
673 case DATATYPE_FLOAT32:
674 writeBinaryGridImpl<float>(in, filename, byteOrder);
675 break;
676 case DATATYPE_FLOAT64:
677 writeBinaryGridImpl<double>(in, filename, byteOrder);
678 break;
679 default:
680 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
681 }
682 }
683
684 template<typename ValueType>
685 void Brick::writeBinaryGridImpl(const escript::Data& in,
686 const string& filename, int byteOrder) const
687 {
688 // check function space and determine number of points
689 int myN0, myN1, myN2;
690 int totalN0, totalN1, totalN2;
691 if (in.getFunctionSpace().getTypeCode() == Nodes) {
692 myN0 = m_NN[0];
693 myN1 = m_NN[1];
694 myN2 = m_NN[2];
695 totalN0 = m_gNE[0]+1;
696 totalN1 = m_gNE[1]+1;
697 totalN2 = m_gNE[2]+1;
698 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
699 in.getFunctionSpace().getTypeCode() == ReducedElements) {
700 myN0 = m_NE[0];
701 myN1 = m_NE[1];
702 myN2 = m_NE[2];
703 totalN0 = m_gNE[0];
704 totalN1 = m_gNE[1];
705 totalN2 = m_gNE[2];
706 } else
707 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
708
709 const int numComp = in.getDataPointSize();
710 const int dpp = in.getNumDataPointsPerSample();
711 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
712
713 if (numComp > 1 || dpp > 1)
714 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
715
716 // from here on we know that each sample consists of one value
717 FileWriter fw;
718 fw.openFile(filename, fileSize);
719 MPIBarrier();
720
721 for (index_t z=0; z<myN2; z++) {
722 for (index_t y=0; y<myN1; y++) {
723 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
724 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
725 ostringstream oss;
726
727 for (index_t x=0; x<myN0; x++) {
728 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
729 ValueType fvalue = static_cast<ValueType>(*sample);
730 if (byteOrder == BYTEORDER_NATIVE) {
731 oss.write((char*)&fvalue, sizeof(fvalue));
732 } else {
733 char* value = reinterpret_cast<char*>(&fvalue);
734 oss.write(byte_swap32(value), sizeof(fvalue));
735 }
736 }
737 fw.writeAt(oss, fileofs);
738 }
739 }
740 fw.close();
741 }
742
743 void Brick::dump(const string& fileName) const
744 {
745 #if USE_SILO
746 string fn(fileName);
747 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
748 fn+=".silo";
749 }
750
751 int driver=DB_HDF5;
752 string siloPath;
753 DBfile* dbfile = NULL;
754
755 #ifdef ESYS_MPI
756 PMPIO_baton_t* baton = NULL;
757 const int NUM_SILO_FILES = 1;
758 const char* blockDirFmt = "/block%04d";
759 #endif
760
761 if (m_mpiInfo->size > 1) {
762 #ifdef ESYS_MPI
763 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
764 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
765 PMPIO_DefaultClose, (void*)&driver);
766 // try the fallback driver in case of error
767 if (!baton && driver != DB_PDB) {
768 driver = DB_PDB;
769 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
770 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
771 PMPIO_DefaultClose, (void*)&driver);
772 }
773 if (baton) {
774 char str[64];
775 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
776 siloPath = str;
777 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
778 }
779 #endif
780 } else {
781 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
782 getDescription().c_str(), driver);
783 // try the fallback driver in case of error
784 if (!dbfile && driver != DB_PDB) {
785 driver = DB_PDB;
786 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
787 getDescription().c_str(), driver);
788 }
789 }
790
791 if (!dbfile)
792 throw RipleyException("dump: Could not create Silo file");
793
794 /*
795 if (driver==DB_HDF5) {
796 // gzip level 1 already provides good compression with minimal
797 // performance penalty. Some tests showed that gzip levels >3 performed
798 // rather badly on escript data both in terms of time and space
799 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
800 }
801 */
802
803 boost::scoped_ptr<double> x(new double[m_NN[0]]);
804 boost::scoped_ptr<double> y(new double[m_NN[1]]);
805 boost::scoped_ptr<double> z(new double[m_NN[2]]);
806 double* coords[3] = { x.get(), y.get(), z.get() };
807 #pragma omp parallel
808 {
809 #pragma omp for
810 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
811 coords[0][i0]=getLocalCoordinate(i0, 0);
812 }
813 #pragma omp for
814 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
815 coords[1][i1]=getLocalCoordinate(i1, 1);
816 }
817 #pragma omp for
818 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
819 coords[2][i2]=getLocalCoordinate(i2, 2);
820 }
821 }
822 int* dims = const_cast<int*>(getNumNodesPerDim());
823
824 // write mesh
825 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
826 DB_COLLINEAR, NULL);
827
828 // write node ids
829 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
830 NULL, 0, DB_INT, DB_NODECENT, NULL);
831
832 // write element ids
833 dims = const_cast<int*>(getNumElementsPerDim());
834 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
835 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
836
837 // rank 0 writes multimesh and multivar
838 if (m_mpiInfo->rank == 0) {
839 vector<string> tempstrings;
840 vector<char*> names;
841 for (dim_t i=0; i<m_mpiInfo->size; i++) {
842 stringstream path;
843 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
844 tempstrings.push_back(path.str());
845 names.push_back((char*)tempstrings.back().c_str());
846 }
847 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
848 DBSetDir(dbfile, "/");
849 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
850 &types[0], NULL);
851 tempstrings.clear();
852 names.clear();
853 for (dim_t i=0; i<m_mpiInfo->size; i++) {
854 stringstream path;
855 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
856 tempstrings.push_back(path.str());
857 names.push_back((char*)tempstrings.back().c_str());
858 }
859 types.assign(m_mpiInfo->size, DB_QUADVAR);
860 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
861 &types[0], NULL);
862 tempstrings.clear();
863 names.clear();
864 for (dim_t i=0; i<m_mpiInfo->size; i++) {
865 stringstream path;
866 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
867 tempstrings.push_back(path.str());
868 names.push_back((char*)tempstrings.back().c_str());
869 }
870 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
871 &types[0], NULL);
872 }
873
874 if (m_mpiInfo->size > 1) {
875 #ifdef ESYS_MPI
876 PMPIO_HandOffBaton(baton, dbfile);
877 PMPIO_Finish(baton);
878 #endif
879 } else {
880 DBClose(dbfile);
881 }
882
883 #else // USE_SILO
884 throw RipleyException("dump: no Silo support");
885 #endif
886 }
887
888 const int* Brick::borrowSampleReferenceIDs(int fsType) const
889 {
890 switch (fsType) {
891 case Nodes:
892 case ReducedNodes: //FIXME: reduced
893 return &m_nodeId[0];
894 case DegreesOfFreedom:
895 case ReducedDegreesOfFreedom: //FIXME: reduced
896 return &m_dofId[0];
897 case Elements:
898 case ReducedElements:
899 return &m_elementId[0];
900 case FaceElements:
901 case ReducedFaceElements:
902 return &m_faceId[0];
903 case Points:
904 return &m_diracPointNodeIDs[0];
905 default:
906 break;
907 }
908
909 stringstream msg;
910 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
911 throw RipleyException(msg.str());
912 }
913
914 bool Brick::ownSample(int fsType, index_t id) const
915 {
916 if (getMPISize()==1)
917 return true;
918
919 switch (fsType) {
920 case Nodes:
921 case ReducedNodes: //FIXME: reduced
922 return (m_dofMap[id] < getNumDOF());
923 case DegreesOfFreedom:
924 case ReducedDegreesOfFreedom:
925 return true;
926 case Elements:
927 case ReducedElements:
928 {
929 // check ownership of element's _last_ node
930 const index_t x=id%m_NE[0] + 1;
931 const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
932 const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
933 return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
934 }
935 case FaceElements:
936 case ReducedFaceElements:
937 {
938 // check ownership of face element's last node
939 dim_t n=0;
940 for (size_t i=0; i<6; i++) {
941 n+=m_faceCount[i];
942 if (id<n) {
943 const index_t j=id-n+m_faceCount[i];
944 if (i>=4) { // front or back
945 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
946 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
947 } else if (i>=2) { // bottom or top
948 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
949 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
950 } else { // left or right
951 const index_t first=(i==0 ? 0 : m_NN[0]-1);
952 return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
953 }
954 }
955 }
956 return false;
957 }
958 default:
959 break;
960 }
961
962 stringstream msg;
963 msg << "ownSample: invalid function space type " << fsType;
964 throw RipleyException(msg.str());
965 }
966
967 void Brick::setToNormal(escript::Data& out) const
968 {
969 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
970 out.requireWrite();
971 #pragma omp parallel
972 {
973 if (m_faceOffset[0] > -1) {
974 #pragma omp for nowait
975 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
976 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
977 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
978 // set vector at four quadrature points
979 *o++ = -1.; *o++ = 0.; *o++ = 0.;
980 *o++ = -1.; *o++ = 0.; *o++ = 0.;
981 *o++ = -1.; *o++ = 0.; *o++ = 0.;
982 *o++ = -1.; *o++ = 0.; *o = 0.;
983 }
984 }
985 }
986
987 if (m_faceOffset[1] > -1) {
988 #pragma omp for nowait
989 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
990 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
991 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
992 // set vector at four quadrature points
993 *o++ = 1.; *o++ = 0.; *o++ = 0.;
994 *o++ = 1.; *o++ = 0.; *o++ = 0.;
995 *o++ = 1.; *o++ = 0.; *o++ = 0.;
996 *o++ = 1.; *o++ = 0.; *o = 0.;
997 }
998 }
999 }
1000
1001 if (m_faceOffset[2] > -1) {
1002 #pragma omp for nowait
1003 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1004 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1005 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1006 // set vector at four quadrature points
1007 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1008 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1009 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1010 *o++ = 0.; *o++ = -1.; *o = 0.;
1011 }
1012 }
1013 }
1014
1015 if (m_faceOffset[3] > -1) {
1016 #pragma omp for nowait
1017 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1018 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1019 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1020 // set vector at four quadrature points
1021 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1022 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1023 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1024 *o++ = 0.; *o++ = 1.; *o = 0.;
1025 }
1026 }
1027 }
1028
1029 if (m_faceOffset[4] > -1) {
1030 #pragma omp for nowait
1031 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1032 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1033 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1034 // set vector at four quadrature points
1035 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1036 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1037 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1038 *o++ = 0.; *o++ = 0.; *o = -1.;
1039 }
1040 }
1041 }
1042
1043 if (m_faceOffset[5] > -1) {
1044 #pragma omp for nowait
1045 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1046 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1047 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1048 // set vector at four quadrature points
1049 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1050 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1051 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1052 *o++ = 0.; *o++ = 0.; *o = 1.;
1053 }
1054 }
1055 }
1056 } // end of parallel section
1057 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1058 out.requireWrite();
1059 #pragma omp parallel
1060 {
1061 if (m_faceOffset[0] > -1) {
1062 #pragma omp for nowait
1063 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1064 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1065 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1066 *o++ = -1.;
1067 *o++ = 0.;
1068 *o = 0.;
1069 }
1070 }
1071 }
1072
1073 if (m_faceOffset[1] > -1) {
1074 #pragma omp for nowait
1075 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1076 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1077 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1078 *o++ = 1.;
1079 *o++ = 0.;
1080 *o = 0.;
1081 }
1082 }
1083 }
1084
1085 if (m_faceOffset[2] > -1) {
1086 #pragma omp for nowait
1087 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1088 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1089 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1090 *o++ = 0.;
1091 *o++ = -1.;
1092 *o = 0.;
1093 }
1094 }
1095 }
1096
1097 if (m_faceOffset[3] > -1) {
1098 #pragma omp for nowait
1099 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1100 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1101 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1102 *o++ = 0.;
1103 *o++ = 1.;
1104 *o = 0.;
1105 }
1106 }
1107 }
1108
1109 if (m_faceOffset[4] > -1) {
1110 #pragma omp for nowait
1111 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1112 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1113 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1114 *o++ = 0.;
1115 *o++ = 0.;
1116 *o = -1.;
1117 }
1118 }
1119 }
1120
1121 if (m_faceOffset[5] > -1) {
1122 #pragma omp for nowait
1123 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1124 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1125 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1126 *o++ = 0.;
1127 *o++ = 0.;
1128 *o = 1.;
1129 }
1130 }
1131 }
1132 } // end of parallel section
1133
1134 } else {
1135 stringstream msg;
1136 msg << "setToNormal: invalid function space type "
1137 << out.getFunctionSpace().getTypeCode();
1138 throw RipleyException(msg.str());
1139 }
1140 }
1141
1142 void Brick::setToSize(escript::Data& out) const
1143 {
1144 if (out.getFunctionSpace().getTypeCode() == Elements
1145 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1146 out.requireWrite();
1147 const dim_t numQuad=out.getNumDataPointsPerSample();
1148 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1149 #pragma omp parallel for
1150 for (index_t k = 0; k < getNumElements(); ++k) {
1151 double* o = out.getSampleDataRW(k);
1152 fill(o, o+numQuad, size);
1153 }
1154 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1155 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1156 out.requireWrite();
1157 const dim_t numQuad=out.getNumDataPointsPerSample();
1158 #pragma omp parallel
1159 {
1160 if (m_faceOffset[0] > -1) {
1161 const double size=min(m_dx[1],m_dx[2]);
1162 #pragma omp for nowait
1163 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1164 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1165 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1166 fill(o, o+numQuad, size);
1167 }
1168 }
1169 }
1170
1171 if (m_faceOffset[1] > -1) {
1172 const double size=min(m_dx[1],m_dx[2]);
1173 #pragma omp for nowait
1174 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1175 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1176 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1177 fill(o, o+numQuad, size);
1178 }
1179 }
1180 }
1181
1182 if (m_faceOffset[2] > -1) {
1183 const double size=min(m_dx[0],m_dx[2]);
1184 #pragma omp for nowait
1185 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1186 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1187 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1188 fill(o, o+numQuad, size);
1189 }
1190 }
1191 }
1192
1193 if (m_faceOffset[3] > -1) {
1194 const double size=min(m_dx[0],m_dx[2]);
1195 #pragma omp for nowait
1196 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1197 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1198 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1199 fill(o, o+numQuad, size);
1200 }
1201 }
1202 }
1203
1204 if (m_faceOffset[4] > -1) {
1205 const double size=min(m_dx[0],m_dx[1]);
1206 #pragma omp for nowait
1207 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1208 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1209 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1210 fill(o, o+numQuad, size);
1211 }
1212 }
1213 }
1214
1215 if (m_faceOffset[5] > -1) {
1216 const double size=min(m_dx[0],m_dx[1]);
1217 #pragma omp for nowait
1218 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1219 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1220 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1221 fill(o, o+numQuad, size);
1222 }
1223 }
1224 }
1225 } // end of parallel section
1226
1227 } else {
1228 stringstream msg;
1229 msg << "setToSize: invalid function space type "
1230 << out.getFunctionSpace().getTypeCode();
1231 throw RipleyException(msg.str());
1232 }
1233 }
1234
1235 void Brick::Print_Mesh_Info(const bool full) const
1236 {
1237 RipleyDomain::Print_Mesh_Info(full);
1238 if (full) {
1239 cout << " Id Coordinates" << endl;
1240 cout.precision(15);
1241 cout.setf(ios::scientific, ios::floatfield);
1242 for (index_t i=0; i < getNumNodes(); i++) {
1243 cout << " " << setw(5) << m_nodeId[i]
1244 << " " << getLocalCoordinate(i%m_NN[0], 0)
1245 << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1246 << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1247 }
1248 }
1249 }
1250
1251
1252 //protected
1253 void Brick::assembleCoordinates(escript::Data& arg) const
1254 {
1255 escriptDataC x = arg.getDataC();
1256 int numDim = m_numDim;
1257 if (!isDataPointShapeEqual(&x, 1, &numDim))
1258 throw RipleyException("setToX: Invalid Data object shape");
1259 if (!numSamplesEqual(&x, 1, getNumNodes()))
1260 throw RipleyException("setToX: Illegal number of samples in Data object");
1261
1262 arg.requireWrite();
1263 #pragma omp parallel for
1264 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1265 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1266 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1267 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1268 point[0] = getLocalCoordinate(i0, 0);
1269 point[1] = getLocalCoordinate(i1, 1);
1270 point[2] = getLocalCoordinate(i2, 2);
1271 }
1272 }
1273 }
1274 }
1275
1276 //protected
1277 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1278 {
1279 const dim_t numComp = in.getDataPointSize();
1280 const double C0 = .044658198738520451079;
1281 const double C1 = .16666666666666666667;
1282 const double C2 = .21132486540518711775;
1283 const double C3 = .25;
1284 const double C4 = .5;
1285 const double C5 = .62200846792814621559;
1286 const double C6 = .78867513459481288225;
1287
1288 if (out.getFunctionSpace().getTypeCode() == Elements) {
1289 out.requireWrite();
1290 #pragma omp parallel
1291 {
1292 vector<double> f_000(numComp);
1293 vector<double> f_001(numComp);
1294 vector<double> f_010(numComp);
1295 vector<double> f_011(numComp);
1296 vector<double> f_100(numComp);
1297 vector<double> f_101(numComp);
1298 vector<double> f_110(numComp);
1299 vector<double> f_111(numComp);
1300 #pragma omp for
1301 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1302 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1303 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1304 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1305 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1306 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1307 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1308 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1309 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1310 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1311 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1312 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1313 for (index_t i=0; i < numComp; ++i) {
1314 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1315 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1316 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1317 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1318 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1319 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1320 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1321 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1322 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1323 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1324 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1325 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1326 o[INDEX3(i,0,0,numComp,3)] = V0;
1327 o[INDEX3(i,1,0,numComp,3)] = V4;
1328 o[INDEX3(i,2,0,numComp,3)] = V8;
1329 o[INDEX3(i,0,1,numComp,3)] = V0;
1330 o[INDEX3(i,1,1,numComp,3)] = V5;
1331 o[INDEX3(i,2,1,numComp,3)] = V9;
1332 o[INDEX3(i,0,2,numComp,3)] = V1;
1333 o[INDEX3(i,1,2,numComp,3)] = V4;
1334 o[INDEX3(i,2,2,numComp,3)] = V10;
1335 o[INDEX3(i,0,3,numComp,3)] = V1;
1336 o[INDEX3(i,1,3,numComp,3)] = V5;
1337 o[INDEX3(i,2,3,numComp,3)] = V11;
1338 o[INDEX3(i,0,4,numComp,3)] = V2;
1339 o[INDEX3(i,1,4,numComp,3)] = V6;
1340 o[INDEX3(i,2,4,numComp,3)] = V8;
1341 o[INDEX3(i,0,5,numComp,3)] = V2;
1342 o[INDEX3(i,1,5,numComp,3)] = V7;
1343 o[INDEX3(i,2,5,numComp,3)] = V9;
1344 o[INDEX3(i,0,6,numComp,3)] = V3;
1345 o[INDEX3(i,1,6,numComp,3)] = V6;
1346 o[INDEX3(i,2,6,numComp,3)] = V10;
1347 o[INDEX3(i,0,7,numComp,3)] = V3;
1348 o[INDEX3(i,1,7,numComp,3)] = V7;
1349 o[INDEX3(i,2,7,numComp,3)] = V11;
1350 } // end of component loop i
1351 } // end of k0 loop
1352 } // end of k1 loop
1353 } // end of k2 loop
1354 } // end of parallel section
1355 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1356 out.requireWrite();
1357 #pragma omp parallel
1358 {
1359 vector<double> f_000(numComp);
1360 vector<double> f_001(numComp);
1361 vector<double> f_010(numComp);
1362 vector<double> f_011(numComp);
1363 vector<double> f_100(numComp);
1364 vector<double> f_101(numComp);
1365 vector<double> f_110(numComp);
1366 vector<double> f_111(numComp);
1367 #pragma omp for
1368 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1369 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1370 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1371 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1372 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1373 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1374 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1375 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1376 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1377 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1378 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1379 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1380 for (index_t i=0; i < numComp; ++i) {
1381 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1382 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1383 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1384 } // end of component loop i
1385 } // end of k0 loop
1386 } // end of k1 loop
1387 } // end of k2 loop
1388 } // end of parallel section
1389 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1390 out.requireWrite();
1391 #pragma omp parallel
1392 {
1393 vector<double> f_000(numComp);
1394 vector<double> f_001(numComp);
1395 vector<double> f_010(numComp);
1396 vector<double> f_011(numComp);
1397 vector<double> f_100(numComp);
1398 vector<double> f_101(numComp);
1399 vector<double> f_110(numComp);
1400 vector<double> f_111(numComp);
1401 if (m_faceOffset[0] > -1) {
1402 #pragma omp for nowait
1403 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1404 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1405 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1406 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1407 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1408 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1409 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1410 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1411 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1412 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1413 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1414 for (index_t i=0; i < numComp; ++i) {
1415 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1416 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1417 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1418 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1419 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1420 o[INDEX3(i,1,0,numComp,3)] = V0;
1421 o[INDEX3(i,2,0,numComp,3)] = V2;
1422 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1423 o[INDEX3(i,1,1,numComp,3)] = V0;
1424 o[INDEX3(i,2,1,numComp,3)] = V3;
1425 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1426 o[INDEX3(i,1,2,numComp,3)] = V1;
1427 o[INDEX3(i,2,2,numComp,3)] = V2;
1428 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1429 o[INDEX3(i,1,3,numComp,3)] = V1;
1430 o[INDEX3(i,2,3,numComp,3)] = V3;
1431 } // end of component loop i
1432 } // end of k1 loop
1433 } // end of k2 loop
1434 } // end of face 0
1435 if (m_faceOffset[1] > -1) {
1436 #pragma omp for nowait
1437 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1438 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1439 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1440 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1441 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1442 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1443 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1444 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1445 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1446 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1447 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1448 for (index_t i=0; i < numComp; ++i) {
1449 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1450 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1451 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1452 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1453 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1454 o[INDEX3(i,1,0,numComp,3)] = V0;
1455 o[INDEX3(i,2,0,numComp,3)] = V2;
1456 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1457 o[INDEX3(i,1,1,numComp,3)] = V0;
1458 o[INDEX3(i,2,1,numComp,3)] = V3;
1459 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1460 o[INDEX3(i,1,2,numComp,3)] = V1;
1461 o[INDEX3(i,2,2,numComp,3)] = V2;
1462 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1463 o[INDEX3(i,1,3,numComp,3)] = V1;
1464 o[INDEX3(i,2,3,numComp,3)] = V3;
1465 } // end of component loop i
1466 } // end of k1 loop
1467 } // end of k2 loop
1468 } // end of face 1
1469 if (m_faceOffset[2] > -1) {
1470 #pragma omp for nowait
1471 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1472 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1473 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1474 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1475 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1476 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1477 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1478 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1479 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1480 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1481 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1482 for (index_t i=0; i < numComp; ++i) {
1483 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1484 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1485 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1486 o[INDEX3(i,0,0,numComp,3)] = V0;
1487 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1488 o[INDEX3(i,2,0,numComp,3)] = V1;
1489 o[INDEX3(i,0,1,numComp,3)] = V0;
1490 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1491 o[INDEX3(i,2,1,numComp,3)] = V2;
1492 o[INDEX3(i,0,2,numComp,3)] = V0;
1493 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1494 o[INDEX3(i,2,2,numComp,3)] = V1;
1495 o[INDEX3(i,0,3,numComp,3)] = V0;
1496 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1497 o[INDEX3(i,2,3,numComp,3)] = V2;
1498 } // end of component loop i
1499 } // end of k0 loop
1500 } // end of k2 loop
1501 } // end of face 2
1502 if (m_faceOffset[3] > -1) {
1503 #pragma omp for nowait
1504 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1505 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1506 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1507 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1508 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1509 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1510 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1511 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1512 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1513 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1514 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1515 for (index_t i=0; i < numComp; ++i) {
1516 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1517 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1518 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1519 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1520 o[INDEX3(i,0,0,numComp,3)] = V0;
1521 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1522 o[INDEX3(i,2,0,numComp,3)] = V2;
1523 o[INDEX3(i,0,1,numComp,3)] = V0;
1524 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1525 o[INDEX3(i,2,1,numComp,3)] = V3;
1526 o[INDEX3(i,0,2,numComp,3)] = V1;
1527 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1528 o[INDEX3(i,2,2,numComp,3)] = V2;
1529 o[INDEX3(i,0,3,numComp,3)] = V1;
1530 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1531 o[INDEX3(i,2,3,numComp,3)] = V3;
1532 } // end of component loop i
1533 } // end of k0 loop
1534 } // end of k2 loop
1535 } // end of face 3
1536 if (m_faceOffset[4] > -1) {
1537 #pragma omp for nowait
1538 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1539 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1540 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1541 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1542 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1543 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1544 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1545 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1546 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1547 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1548 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1549 for (index_t i=0; i < numComp; ++i) {
1550 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1551 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1552 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1553 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1554 o[INDEX3(i,0,0,numComp,3)] = V0;
1555 o[INDEX3(i,1,0,numComp,3)] = V2;
1556 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1557 o[INDEX3(i,0,1,numComp,3)] = V0;
1558 o[INDEX3(i,1,1,numComp,3)] = V3;
1559 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1560 o[INDEX3(i,0,2,numComp,3)] = V1;
1561 o[INDEX3(i,1,2,numComp,3)] = V2;
1562 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1563 o[INDEX3(i,0,3,numComp,3)] = V1;
1564 o[INDEX3(i,1,3,numComp,3)] = V3;
1565 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1566 } // end of component loop i
1567 } // end of k0 loop
1568 } // end of k1 loop
1569 } // end of face 4
1570 if (m_faceOffset[5] > -1) {
1571 #pragma omp for nowait
1572 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1573 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1574 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1575 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1576 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1577 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1578 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1579 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1580 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1581 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1582 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1583 for (index_t i=0; i < numComp; ++i) {
1584 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1585 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1586 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1587 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1588 o[INDEX3(i,0,0,numComp,3)] = V0;
1589 o[INDEX3(i,1,0,numComp,3)] = V2;
1590 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1591 o[INDEX3(i,0,1,numComp,3)] = V0;
1592 o[INDEX3(i,1,1,numComp,3)] = V3;
1593 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1594 o[INDEX3(i,0,2,numComp,3)] = V1;
1595 o[INDEX3(i,1,2,numComp,3)] = V2;
1596 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1597 o[INDEX3(i,0,3,numComp,3)] = V1;
1598 o[INDEX3(i,1,3,numComp,3)] = V3;
1599 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1600 } // end of component loop i
1601 } // end of k0 loop
1602 } // end of k1 loop
1603 } // end of face 5
1604 } // end of parallel section
1605 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1606 out.requireWrite();
1607 #pragma omp parallel
1608 {
1609 vector<double> f_000(numComp);
1610 vector<double> f_001(numComp);
1611 vector<double> f_010(numComp);
1612 vector<double> f_011(numComp);
1613 vector<double> f_100(numComp);
1614 vector<double> f_101(numComp);
1615 vector<double> f_110(numComp);
1616 vector<double> f_111(numComp);
1617 if (m_faceOffset[0] > -1) {
1618 #pragma omp for nowait
1619 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1620 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1621 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1622 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1623 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1624 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1625 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1626 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1627 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1628 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1629 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1630 for (index_t i=0; i < numComp; ++i) {
1631 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1632 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1633 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1634 } // end of component loop i
1635 } // end of k1 loop
1636 } // end of k2 loop
1637 } // end of face 0
1638 if (m_faceOffset[1] > -1) {
1639 #pragma omp for nowait
1640 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1641 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1642 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1643 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1644 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1645 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1646 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1647 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1648 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1649 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1650 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1651 for (index_t i=0; i < numComp; ++i) {
1652 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1653 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1654 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1655 } // end of component loop i
1656 } // end of k1 loop
1657 } // end of k2 loop
1658 } // end of face 1
1659 if (m_faceOffset[2] > -1) {
1660 #pragma omp for nowait
1661 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1662 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1663 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1664 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1665 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1666 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1667 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1668 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1669 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1670 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1671 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1672 for (index_t i=0; i < numComp; ++i) {
1673 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / m_dx[0];
1674 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1675 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / m_dx[2];
1676 } // end of component loop i
1677 } // end of k0 loop
1678 } // end of k2 loop
1679 } // end of face 2
1680 if (m_faceOffset[3] > -1) {
1681 #pragma omp for nowait
1682 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1683 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1684 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1685 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1686 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1687 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1688 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1689 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1690 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1691 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1692 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1693 for (index_t i=0; i < numComp; ++i) {
1694 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / m_dx[0];
1695 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1696 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / m_dx[2];
1697 } // end of component loop i
1698 } // end of k0 loop
1699 } // end of k2 loop
1700 } // end of face 3
1701 if (m_faceOffset[4] > -1) {
1702 #pragma omp for nowait
1703 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1704 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1705 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1706 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1707 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1708 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1709 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1710 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1711 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1712 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1713 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1714 for (index_t i=0; i < numComp; ++i) {
1715 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / m_dx[0];
1716 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / m_dx[1];
1717 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1718 } // end of component loop i
1719 } // end of k0 loop
1720 } // end of k1 loop
1721 } // end of face 4
1722 if (m_faceOffset[5] > -1) {
1723 #pragma omp for nowait
1724 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1725 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1726 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1727 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1728 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1729 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1730 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1731 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1732 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1733 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1734 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1735 for (index_t i=0; i < numComp; ++i) {
1736 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / m_dx[0];
1737 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / m_dx[1];
1738 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1739 } // end of component loop i
1740 } // end of k0 loop
1741 } // end of k1 loop
1742 } // end of face 5
1743 } // end of parallel section
1744 }
1745 }
1746
1747 //protected
1748 void Brick::assembleIntegrate(vector<double>& integrals, const escript::Data& arg) const
1749 {
1750 const dim_t numComp = arg.getDataPointSize();
1751 const index_t left = (m_offset[0]==0 ? 0 : 1);
1752 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1753 const index_t front = (m_offset[2]==0 ? 0 : 1);
1754 const int fs = arg.getFunctionSpace().getTypeCode();
1755 if (fs == Elements && arg.actsExpanded()) {
1756 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2]/8.;
1757 #pragma omp parallel
1758 {
1759 vector<double> int_local(numComp, 0);
1760 #pragma omp for nowait
1761 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1762 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1763 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1764 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1765 for (index_t i=0; i < numComp; ++i) {
1766 const double f_0 = f[INDEX2(i,0,numComp)];
1767 const double f_1 = f[INDEX2(i,1,numComp)];
1768 const double f_2 = f[INDEX2(i,2,numComp)];
1769 const double f_3 = f[INDEX2(i,3,numComp)];
1770 const double f_4 = f[INDEX2(i,4,numComp)];
1771 const double f_5 = f[INDEX2(i,5,numComp)];
1772 const double f_6 = f[INDEX2(i,6,numComp)];
1773 const double f_7 = f[INDEX2(i,7,numComp)];
1774 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
1775 } // end of component loop i
1776 } // end of k0 loop
1777 } // end of k1 loop
1778 } // end of k2 loop
1779
1780 #pragma omp critical
1781 for (index_t i=0; i<numComp; i++)
1782 integrals[i]+=int_local[i];
1783 } // end of parallel section
1784
1785 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1786 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2];
1787 #pragma omp parallel
1788 {
1789 vector<double> int_local(numComp, 0);
1790 #pragma omp for nowait
1791 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1792 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1793 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1794 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1795 for (index_t i=0; i < numComp; ++i) {
1796 int_local[i]+=f[i]*w_0;
1797 } // end of component loop i
1798 } // end of k0 loop
1799 } // end of k1 loop
1800 } // end of k2 loop
1801
1802 #pragma omp critical
1803 for (index_t i=0; i<numComp; i++)
1804 integrals[i]+=int_local[i];
1805 } // end of parallel section
1806
1807 } else if (fs == FaceElements && arg.actsExpanded()) {
1808 const double w_0 = m_dx[1]*m_dx[2]/4.;
1809 const double w_1 = m_dx[0]*m_dx[2]/4.;
1810 const double w_2 = m_dx[0]*m_dx[1]/4.;
1811 #pragma omp parallel
1812 {
1813 vector<double> int_local(numComp, 0);
1814 if (m_faceOffset[0] > -1) {
1815 #pragma omp for nowait
1816 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1817 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1818 const double* f = arg.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1819 for (index_t i=0