/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4851 - (show annotations)
Wed Apr 9 03:30:36 2014 UTC (5 years ago) by sshaw
Original Path: trunk/ripley/src/Brick.cpp
File size: 165810 byte(s)
added exceptions to ripley for more informative deaths to negative element counts
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <ripley/Brick.h>
18 #include <paso/SystemMatrix.h>
19 #include <esysUtils/esysFileWriter.h>
20 #include <ripley/DefaultAssembler3D.h>
21 #include <ripley/WaveAssembler3D.h>
22 #include <ripley/LameAssembler3D.h>
23 #include <ripley/domainhelpers.h>
24 #include <boost/scoped_array.hpp>
25
26 #ifdef USE_NETCDF
27 #include <netcdfcpp.h>
28 #endif
29
30 #if USE_SILO
31 #include <silo.h>
32 #ifdef ESYS_MPI
33 #include <pmpio.h>
34 #endif
35 #endif
36
37 #include <iomanip>
38
39 #include "esysUtils/EsysRandom.h"
40 #include "blocktools.h"
41
42
43 using namespace std;
44 using esysUtils::FileWriter;
45
46 namespace ripley {
47
48 int indexOfMax(int a, int b, int c) {
49 if (a > b) {
50 if (c > a) {
51 return 2;
52 }
53 return 0;
54 } else if (b > c) {
55 return 1;
56 }
57 return 2;
58 }
59
60 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
61 double x1, double y1, double z1, int d0, int d1, int d2,
62 const std::vector<double>& points, const std::vector<int>& tags,
63 const simap_t& tagnamestonums) :
64 RipleyDomain(3)
65 {
66 if (n0 <= 0 || n1 <= 0 || n2 <= 0)
67 throw RipleyException("Number of elements in each spatial dimension "
68 "must be positive");
69
70 // ignore subdivision parameters for serial run
71 if (m_mpiInfo->size == 1) {
72 d0=1;
73 d1=1;
74 d2=1;
75 }
76 bool warn=false;
77
78 std::vector<int> factors;
79 int ranks = m_mpiInfo->size;
80 int epr[3] = {n0,n1,n2};
81 int d[3] = {d0,d1,d2};
82 if (d0<=0 || d1<=0 || d2<=0) {
83 for (int i = 0; i < 3; i++) {
84 if (d[i] < 1) {
85 d[i] = 1;
86 continue;
87 }
88 epr[i] = -1; // can no longer be max
89 if (ranks % d[i] != 0) {
90 throw RipleyException("Invalid number of spatial subdivisions");
91 }
92 //remove
93 ranks /= d[i];
94 }
95 factorise(factors, ranks);
96 if (factors.size() != 0) {
97 warn = true;
98 }
99 }
100 while (factors.size() > 0) {
101 int i = indexOfMax(epr[0],epr[1],epr[2]);
102 int f = factors.back();
103 factors.pop_back();
104 d[i] *= f;
105 epr[i] /= f;
106 }
107 d0 = d[0]; d1 = d[1]; d2 = d[2];
108
109 // ensure number of subdivisions is valid and nodes can be distributed
110 // among number of ranks
111 if (d0*d1*d2 != m_mpiInfo->size){
112 throw RipleyException("Invalid number of spatial subdivisions");
113 }
114 if (warn) {
115 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
116 << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
117 }
118
119 double l0 = x1-x0;
120 double l1 = y1-y0;
121 double l2 = z1-z0;
122 m_dx[0] = l0/n0;
123 m_dx[1] = l1/n1;
124 m_dx[2] = l2/n2;
125
126 if ((n0+1)%d0 > 0) {
127 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
128 l0=m_dx[0]*n0;
129 cout << "Warning: Adjusted number of elements and length. N0="
130 << n0 << ", l0=" << l0 << endl;
131 }
132 if ((n1+1)%d1 > 0) {
133 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
134 l1=m_dx[1]*n1;
135 cout << "Warning: Adjusted number of elements and length. N1="
136 << n1 << ", l1=" << l1 << endl;
137 }
138 if ((n2+1)%d2 > 0) {
139 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
140 l2=m_dx[2]*n2;
141 cout << "Warning: Adjusted number of elements and length. N2="
142 << n2 << ", l2=" << l2 << endl;
143 }
144
145 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
146 throw RipleyException("Too few elements for the number of ranks");
147
148 m_gNE[0] = n0;
149 m_gNE[1] = n1;
150 m_gNE[2] = n2;
151 m_origin[0] = x0;
152 m_origin[1] = y0;
153 m_origin[2] = z0;
154 m_length[0] = l0;
155 m_length[1] = l1;
156 m_length[2] = l2;
157 m_NX[0] = d0;
158 m_NX[1] = d1;
159 m_NX[2] = d2;
160
161 // local number of elements (including overlap)
162 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
163 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
164 m_NE[0]++;
165 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
166 m_ownNE[0]--;
167
168 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
169 if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
170 m_NE[1]++;
171 else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
172 m_ownNE[1]--;
173
174 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
175 if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
176 m_NE[2]++;
177 else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
178 m_ownNE[2]--;
179
180 // local number of nodes
181 m_NN[0] = m_NE[0]+1;
182 m_NN[1] = m_NE[1]+1;
183 m_NN[2] = m_NE[2]+1;
184
185 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
186 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
187 if (m_offset[0] > 0)
188 m_offset[0]--;
189 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
190 if (m_offset[1] > 0)
191 m_offset[1]--;
192 m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
193 if (m_offset[2] > 0)
194 m_offset[2]--;
195
196 populateSampleIds();
197 createPattern();
198
199 assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
200 for (map<string, int>::const_iterator i = tagnamestonums.begin();
201 i != tagnamestonums.end(); i++) {
202 setTagMap(i->first, i->second);
203 }
204 addPoints(tags.size(), &points[0], &tags[0]);
205 }
206
207
208 Brick::~Brick()
209 {
210 delete assembler;
211 }
212
213 string Brick::getDescription() const
214 {
215 return "ripley::Brick";
216 }
217
218 bool Brick::operator==(const AbstractDomain& other) const
219 {
220 const Brick* o=dynamic_cast<const Brick*>(&other);
221 if (o) {
222 return (RipleyDomain::operator==(other) &&
223 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
224 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
225 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
226 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
227 }
228
229 return false;
230 }
231
232 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
233 const ReaderParameters& params) const
234 {
235 #ifdef USE_NETCDF
236 // check destination function space
237 int myN0, myN1, myN2;
238 if (out.getFunctionSpace().getTypeCode() == Nodes) {
239 myN0 = m_NN[0];
240 myN1 = m_NN[1];
241 myN2 = m_NN[2];
242 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
243 out.getFunctionSpace().getTypeCode() == ReducedElements) {
244 myN0 = m_NE[0];
245 myN1 = m_NE[1];
246 myN2 = m_NE[2];
247 } else
248 throw RipleyException("readNcGrid(): invalid function space for output data object");
249
250 if (params.first.size() != 3)
251 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
252
253 if (params.numValues.size() != 3)
254 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
255
256 if (params.multiplier.size() != 3)
257 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
258 for (size_t i=0; i<params.multiplier.size(); i++)
259 if (params.multiplier[i]<1)
260 throw RipleyException("readNcGrid(): all multipliers must be positive");
261
262 // check file existence and size
263 NcFile f(filename.c_str(), NcFile::ReadOnly);
264 if (!f.is_valid())
265 throw RipleyException("readNcGrid(): cannot open file");
266
267 NcVar* var = f.get_var(varname.c_str());
268 if (!var)
269 throw RipleyException("readNcGrid(): invalid variable name");
270
271 // TODO: rank>0 data support
272 const int numComp = out.getDataPointSize();
273 if (numComp > 1)
274 throw RipleyException("readNcGrid(): only scalar data supported");
275
276 const int dims = var->num_dims();
277 boost::scoped_array<long> edges(var->edges());
278
279 // is this a slice of the data object (dims!=3)?
280 // note the expected ordering of edges (as in numpy: z,y,x)
281 if ( (dims==3 && (params.numValues[2] > edges[0] ||
282 params.numValues[1] > edges[1] ||
283 params.numValues[0] > edges[2]))
284 || (dims==2 && params.numValues[2]>1)
285 || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
286 throw RipleyException("readNcGrid(): not enough data in file");
287 }
288
289 // check if this rank contributes anything
290 if (params.first[0] >= m_offset[0]+myN0 ||
291 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
292 params.first[1] >= m_offset[1]+myN1 ||
293 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
294 params.first[2] >= m_offset[2]+myN2 ||
295 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
296 return;
297 }
298
299 // now determine how much this rank has to write
300
301 // first coordinates in data object to write to
302 const int first0 = max(0, params.first[0]-m_offset[0]);
303 const int first1 = max(0, params.first[1]-m_offset[1]);
304 const int first2 = max(0, params.first[2]-m_offset[2]);
305 // indices to first value in file (not accounting for reverse yet)
306 int idx0 = max(0, m_offset[0]-params.first[0]);
307 int idx1 = max(0, m_offset[1]-params.first[1]);
308 int idx2 = max(0, m_offset[2]-params.first[2]);
309 // number of values to read
310 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
311 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
312 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
313
314 // make sure we read the right block if going backwards through file
315 if (params.reverse[0])
316 idx0 = edges[dims-1]-num0-idx0;
317 if (dims>1 && params.reverse[1])
318 idx1 = edges[dims-2]-num1-idx1;
319 if (dims>2 && params.reverse[2])
320 idx2 = edges[dims-3]-num2-idx2;
321
322
323 vector<double> values(num0*num1*num2);
324 if (dims==3) {
325 var->set_cur(idx2, idx1, idx0);
326 var->get(&values[0], num2, num1, num0);
327 } else if (dims==2) {
328 var->set_cur(idx1, idx0);
329 var->get(&values[0], num1, num0);
330 } else {
331 var->set_cur(idx0);
332 var->get(&values[0], num0);
333 }
334
335 const int dpp = out.getNumDataPointsPerSample();
336 out.requireWrite();
337
338 // helpers for reversing
339 const int x0 = (params.reverse[0] ? num0-1 : 0);
340 const int x_mult = (params.reverse[0] ? -1 : 1);
341 const int y0 = (params.reverse[1] ? num1-1 : 0);
342 const int y_mult = (params.reverse[1] ? -1 : 1);
343 const int z0 = (params.reverse[2] ? num2-1 : 0);
344 const int z_mult = (params.reverse[2] ? -1 : 1);
345
346 for (index_t z=0; z<num2; z++) {
347 for (index_t y=0; y<num1; y++) {
348 #pragma omp parallel for
349 for (index_t x=0; x<num0; x++) {
350 const int baseIndex = first0+x*params.multiplier[0]
351 +(first1+y*params.multiplier[1])*myN0
352 +(first2+z*params.multiplier[2])*myN0*myN1;
353 const int srcIndex=(z0+z_mult*z)*num1*num0
354 +(y0+y_mult*y)*num0
355 +(x0+x_mult*x);
356 if (!isnan(values[srcIndex])) {
357 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
358 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
359 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
360 const int dataIndex = baseIndex+m0
361 +m1*myN0
362 +m2*myN0*myN1;
363 double* dest = out.getSampleDataRW(dataIndex);
364 for (index_t q=0; q<dpp; q++) {
365 *dest++ = values[srcIndex];
366 }
367 }
368 }
369 }
370 }
371 }
372 }
373 }
374 #else
375 throw RipleyException("readNcGrid(): not compiled with netCDF support");
376 #endif
377 }
378
379 #ifdef USE_BOOSTIO
380 void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
381 const ReaderParameters& params) const
382 {
383 // the mapping is not universally correct but should work on our
384 // supported platforms
385 switch (params.dataType) {
386 case DATATYPE_INT32:
387 readBinaryGridZippedImpl<int>(out, filename, params);
388 break;
389 case DATATYPE_FLOAT32:
390 readBinaryGridZippedImpl<float>(out, filename, params);
391 break;
392 case DATATYPE_FLOAT64:
393 readBinaryGridZippedImpl<double>(out, filename, params);
394 break;
395 default:
396 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
397 }
398 }
399 #endif
400
401 void Brick::readBinaryGrid(escript::Data& out, string filename,
402 const ReaderParameters& params) const
403 {
404 // the mapping is not universally correct but should work on our
405 // supported platforms
406 switch (params.dataType) {
407 case DATATYPE_INT32:
408 readBinaryGridImpl<int>(out, filename, params);
409 break;
410 case DATATYPE_FLOAT32:
411 readBinaryGridImpl<float>(out, filename, params);
412 break;
413 case DATATYPE_FLOAT64:
414 readBinaryGridImpl<double>(out, filename, params);
415 break;
416 default:
417 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
418 }
419 }
420
421 template<typename ValueType>
422 void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
423 const ReaderParameters& params) const
424 {
425 // check destination function space
426 int myN0, myN1, myN2;
427 if (out.getFunctionSpace().getTypeCode() == Nodes) {
428 myN0 = m_NN[0];
429 myN1 = m_NN[1];
430 myN2 = m_NN[2];
431 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
432 out.getFunctionSpace().getTypeCode() == ReducedElements) {
433 myN0 = m_NE[0];
434 myN1 = m_NE[1];
435 myN2 = m_NE[2];
436 } else
437 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
438
439 if (params.first.size() != 3)
440 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
441
442 if (params.numValues.size() != 3)
443 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
444
445 if (params.multiplier.size() != 3)
446 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
447 for (size_t i=0; i<params.multiplier.size(); i++)
448 if (params.multiplier[i]<1)
449 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
450
451 // check file existence and size
452 ifstream f(filename.c_str(), ifstream::binary);
453 if (f.fail()) {
454 throw RipleyException("readBinaryGrid(): cannot open file");
455 }
456 f.seekg(0, ios::end);
457 const int numComp = out.getDataPointSize();
458 const int filesize = f.tellg();
459 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
460 if (filesize < reqsize) {
461 f.close();
462 throw RipleyException("readBinaryGrid(): not enough data in file");
463 }
464
465 // check if this rank contributes anything
466 if (params.first[0] >= m_offset[0]+myN0 ||
467 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
468 params.first[1] >= m_offset[1]+myN1 ||
469 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
470 params.first[2] >= m_offset[2]+myN2 ||
471 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
472 f.close();
473 return;
474 }
475
476 // now determine how much this rank has to write
477
478 // first coordinates in data object to write to
479 const int first0 = max(0, params.first[0]-m_offset[0]);
480 const int first1 = max(0, params.first[1]-m_offset[1]);
481 const int first2 = max(0, params.first[2]-m_offset[2]);
482 // indices to first value in file
483 const int idx0 = max(0, m_offset[0]-params.first[0]);
484 const int idx1 = max(0, m_offset[1]-params.first[1]);
485 const int idx2 = max(0, m_offset[2]-params.first[2]);
486 // number of values to read
487 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
488 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
489 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
490
491 out.requireWrite();
492 vector<ValueType> values(num0*numComp);
493 const int dpp = out.getNumDataPointsPerSample();
494
495 for (int z=0; z<num2; z++) {
496 for (int y=0; y<num1; y++) {
497 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
498 +(idx2+z)*params.numValues[0]*params.numValues[1]);
499 f.seekg(fileofs*sizeof(ValueType));
500 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
501
502 for (int x=0; x<num0; x++) {
503 const int baseIndex = first0+x*params.multiplier[0]
504 +(first1+y*params.multiplier[1])*myN0
505 +(first2+z*params.multiplier[2])*myN0*myN1;
506 for (int m2=0; m2<params.multiplier[2]; m2++) {
507 for (int m1=0; m1<params.multiplier[1]; m1++) {
508 for (int m0=0; m0<params.multiplier[0]; m0++) {
509 const int dataIndex = baseIndex+m0
510 +m1*myN0
511 +m2*myN0*myN1;
512 double* dest = out.getSampleDataRW(dataIndex);
513 for (int c=0; c<numComp; c++) {
514 ValueType val = values[x*numComp+c];
515
516 if (params.byteOrder != BYTEORDER_NATIVE) {
517 char* cval = reinterpret_cast<char*>(&val);
518 // this will alter val!!
519 byte_swap32(cval);
520 }
521 if (!std::isnan(val)) {
522 for (int q=0; q<dpp; q++) {
523 *dest++ = static_cast<double>(val);
524 }
525 }
526 }
527 }
528 }
529 }
530 }
531 }
532 }
533
534 f.close();
535 }
536
537 #ifdef USE_BOOSTIO
538 template<typename ValueType>
539 void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
540 const ReaderParameters& params) const
541 {
542 // check destination function space
543 int myN0, myN1, myN2;
544 if (out.getFunctionSpace().getTypeCode() == Nodes) {
545 myN0 = m_NN[0];
546 myN1 = m_NN[1];
547 myN2 = m_NN[2];
548 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
549 out.getFunctionSpace().getTypeCode() == ReducedElements) {
550 myN0 = m_NE[0];
551 myN1 = m_NE[1];
552 myN2 = m_NE[2];
553 } else
554 throw RipleyException("readBinaryGridFromZipped(): invalid function space for output data object");
555
556 if (params.first.size() != 3)
557 throw RipleyException("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
558
559 if (params.numValues.size() != 3)
560 throw RipleyException("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
561
562 if (params.multiplier.size() != 3)
563 throw RipleyException("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
564 for (size_t i=0; i<params.multiplier.size(); i++)
565 if (params.multiplier[i]<1)
566 throw RipleyException("readBinaryGridFromZipped(): all multipliers must be positive");
567
568 // check file existence and size
569 ifstream f(filename.c_str(), ifstream::binary);
570 if (f.fail()) {
571 throw RipleyException("readBinaryGridFromZipped(): cannot open file");
572 }
573 f.seekg(0, ios::end);
574 const int numComp = out.getDataPointSize();
575 int filesize = f.tellg();
576 f.seekg(0, ios::beg);
577 std::vector<char> compressed(filesize);
578 f.read((char*)&compressed[0], filesize);
579 f.close();
580 std::vector<char> decompressed = unzip(compressed);
581 filesize = decompressed.size();
582 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
583 if (filesize < reqsize) {
584 throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
585 }
586
587 // check if this rank contributes anything
588 if (params.first[0] >= m_offset[0]+myN0 ||
589 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
590 params.first[1] >= m_offset[1]+myN1 ||
591 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
592 params.first[2] >= m_offset[2]+myN2 ||
593 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
594 return;
595 }
596
597 // now determine how much this rank has to write
598
599 // first coordinates in data object to write to
600 const int first0 = max(0, params.first[0]-m_offset[0]);
601 const int first1 = max(0, params.first[1]-m_offset[1]);
602 const int first2 = max(0, params.first[2]-m_offset[2]);
603 // indices to first value in file
604 const int idx0 = max(0, m_offset[0]-params.first[0]);
605 const int idx1 = max(0, m_offset[1]-params.first[1]);
606 const int idx2 = max(0, m_offset[2]-params.first[2]);
607 // number of values to read
608 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
609 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
610 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
611
612 out.requireWrite();
613 vector<ValueType> values(num0*numComp);
614 const int dpp = out.getNumDataPointsPerSample();
615
616 for (int z=0; z<num2; z++) {
617 for (int y=0; y<num1; y++) {
618 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
619 +(idx2+z)*params.numValues[0]*params.numValues[1]);
620 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
621
622 for (int x=0; x<num0; x++) {
623 const int baseIndex = first0+x*params.multiplier[0]
624 +(first1+y*params.multiplier[1])*myN0
625 +(first2+z*params.multiplier[2])*myN0*myN1;
626 for (int m2=0; m2<params.multiplier[2]; m2++) {
627 for (int m1=0; m1<params.multiplier[1]; m1++) {
628 for (int m0=0; m0<params.multiplier[0]; m0++) {
629 const int dataIndex = baseIndex+m0
630 +m1*myN0
631 +m2*myN0*myN1;
632 double* dest = out.getSampleDataRW(dataIndex);
633 for (int c=0; c<numComp; c++) {
634 ValueType val = values[x*numComp+c];
635
636 if (params.byteOrder != BYTEORDER_NATIVE) {
637 char* cval = reinterpret_cast<char*>(&val);
638 // this will alter val!!
639 byte_swap32(cval);
640 }
641 if (!std::isnan(val)) {
642 for (int q=0; q<dpp; q++) {
643 *dest++ = static_cast<double>(val);
644 }
645 }
646 }
647 }
648 }
649 }
650 }
651 }
652 }
653 }
654 #endif
655
656 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
657 int byteOrder, int dataType) const
658 {
659 // the mapping is not universally correct but should work on our
660 // supported platforms
661 switch (dataType) {
662 case DATATYPE_INT32:
663 writeBinaryGridImpl<int>(in, filename, byteOrder);
664 break;
665 case DATATYPE_FLOAT32:
666 writeBinaryGridImpl<float>(in, filename, byteOrder);
667 break;
668 case DATATYPE_FLOAT64:
669 writeBinaryGridImpl<double>(in, filename, byteOrder);
670 break;
671 default:
672 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
673 }
674 }
675
676 template<typename ValueType>
677 void Brick::writeBinaryGridImpl(const escript::Data& in,
678 const string& filename, int byteOrder) const
679 {
680 // check function space and determine number of points
681 int myN0, myN1, myN2;
682 int totalN0, totalN1, totalN2;
683 if (in.getFunctionSpace().getTypeCode() == Nodes) {
684 myN0 = m_NN[0];
685 myN1 = m_NN[1];
686 myN2 = m_NN[2];
687 totalN0 = m_gNE[0]+1;
688 totalN1 = m_gNE[1]+1;
689 totalN2 = m_gNE[2]+1;
690 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
691 in.getFunctionSpace().getTypeCode() == ReducedElements) {
692 myN0 = m_NE[0];
693 myN1 = m_NE[1];
694 myN2 = m_NE[2];
695 totalN0 = m_gNE[0];
696 totalN1 = m_gNE[1];
697 totalN2 = m_gNE[2];
698 } else
699 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
700
701 const int numComp = in.getDataPointSize();
702 const int dpp = in.getNumDataPointsPerSample();
703 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
704
705 if (numComp > 1 || dpp > 1)
706 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
707
708 // from here on we know that each sample consists of one value
709 FileWriter fw;
710 fw.openFile(filename, fileSize);
711 MPIBarrier();
712
713 for (index_t z=0; z<myN2; z++) {
714 for (index_t y=0; y<myN1; y++) {
715 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
716 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
717 ostringstream oss;
718
719 for (index_t x=0; x<myN0; x++) {
720 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
721 ValueType fvalue = static_cast<ValueType>(*sample);
722 if (byteOrder == BYTEORDER_NATIVE) {
723 oss.write((char*)&fvalue, sizeof(fvalue));
724 } else {
725 char* value = reinterpret_cast<char*>(&fvalue);
726 oss.write(byte_swap32(value), sizeof(fvalue));
727 }
728 }
729 fw.writeAt(oss, fileofs);
730 }
731 }
732 fw.close();
733 }
734
735 void Brick::dump(const string& fileName) const
736 {
737 #if USE_SILO
738 string fn(fileName);
739 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
740 fn+=".silo";
741 }
742
743 int driver=DB_HDF5;
744 string siloPath;
745 DBfile* dbfile = NULL;
746
747 #ifdef ESYS_MPI
748 PMPIO_baton_t* baton = NULL;
749 const int NUM_SILO_FILES = 1;
750 const char* blockDirFmt = "/block%04d";
751 #endif
752
753 if (m_mpiInfo->size > 1) {
754 #ifdef ESYS_MPI
755 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
756 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
757 PMPIO_DefaultClose, (void*)&driver);
758 // try the fallback driver in case of error
759 if (!baton && driver != DB_PDB) {
760 driver = DB_PDB;
761 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
762 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
763 PMPIO_DefaultClose, (void*)&driver);
764 }
765 if (baton) {
766 char str[64];
767 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
768 siloPath = str;
769 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
770 }
771 #endif
772 } else {
773 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
774 getDescription().c_str(), driver);
775 // try the fallback driver in case of error
776 if (!dbfile && driver != DB_PDB) {
777 driver = DB_PDB;
778 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
779 getDescription().c_str(), driver);
780 }
781 }
782
783 if (!dbfile)
784 throw RipleyException("dump: Could not create Silo file");
785
786 /*
787 if (driver==DB_HDF5) {
788 // gzip level 1 already provides good compression with minimal
789 // performance penalty. Some tests showed that gzip levels >3 performed
790 // rather badly on escript data both in terms of time and space
791 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
792 }
793 */
794
795 boost::scoped_ptr<double> x(new double[m_NN[0]]);
796 boost::scoped_ptr<double> y(new double[m_NN[1]]);
797 boost::scoped_ptr<double> z(new double[m_NN[2]]);
798 double* coords[3] = { x.get(), y.get(), z.get() };
799 #pragma omp parallel
800 {
801 #pragma omp for
802 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
803 coords[0][i0]=getLocalCoordinate(i0, 0);
804 }
805 #pragma omp for
806 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
807 coords[1][i1]=getLocalCoordinate(i1, 1);
808 }
809 #pragma omp for
810 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
811 coords[2][i2]=getLocalCoordinate(i2, 2);
812 }
813 }
814 int* dims = const_cast<int*>(getNumNodesPerDim());
815
816 // write mesh
817 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
818 DB_COLLINEAR, NULL);
819
820 // write node ids
821 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
822 NULL, 0, DB_INT, DB_NODECENT, NULL);
823
824 // write element ids
825 dims = const_cast<int*>(getNumElementsPerDim());
826 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
827 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
828
829 // rank 0 writes multimesh and multivar
830 if (m_mpiInfo->rank == 0) {
831 vector<string> tempstrings;
832 vector<char*> names;
833 for (dim_t i=0; i<m_mpiInfo->size; i++) {
834 stringstream path;
835 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
836 tempstrings.push_back(path.str());
837 names.push_back((char*)tempstrings.back().c_str());
838 }
839 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
840 DBSetDir(dbfile, "/");
841 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
842 &types[0], NULL);
843 tempstrings.clear();
844 names.clear();
845 for (dim_t i=0; i<m_mpiInfo->size; i++) {
846 stringstream path;
847 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
848 tempstrings.push_back(path.str());
849 names.push_back((char*)tempstrings.back().c_str());
850 }
851 types.assign(m_mpiInfo->size, DB_QUADVAR);
852 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
853 &types[0], NULL);
854 tempstrings.clear();
855 names.clear();
856 for (dim_t i=0; i<m_mpiInfo->size; i++) {
857 stringstream path;
858 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
859 tempstrings.push_back(path.str());
860 names.push_back((char*)tempstrings.back().c_str());
861 }
862 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
863 &types[0], NULL);
864 }
865
866 if (m_mpiInfo->size > 1) {
867 #ifdef ESYS_MPI
868 PMPIO_HandOffBaton(baton, dbfile);
869 PMPIO_Finish(baton);
870 #endif
871 } else {
872 DBClose(dbfile);
873 }
874
875 #else // USE_SILO
876 throw RipleyException("dump: no Silo support");
877 #endif
878 }
879
880 const int* Brick::borrowSampleReferenceIDs(int fsType) const
881 {
882 switch (fsType) {
883 case Nodes:
884 case ReducedNodes: //FIXME: reduced
885 return &m_nodeId[0];
886 case DegreesOfFreedom:
887 case ReducedDegreesOfFreedom: //FIXME: reduced
888 return &m_dofId[0];
889 case Elements:
890 case ReducedElements:
891 return &m_elementId[0];
892 case FaceElements:
893 case ReducedFaceElements:
894 return &m_faceId[0];
895 case Points:
896 return &m_diracPointNodeIDs[0];
897 default:
898 break;
899 }
900
901 stringstream msg;
902 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
903 throw RipleyException(msg.str());
904 }
905
906 bool Brick::ownSample(int fsType, index_t id) const
907 {
908 if (getMPISize()==1)
909 return true;
910
911 switch (fsType) {
912 case Nodes:
913 case ReducedNodes: //FIXME: reduced
914 return (m_dofMap[id] < getNumDOF());
915 case DegreesOfFreedom:
916 case ReducedDegreesOfFreedom:
917 return true;
918 case Elements:
919 case ReducedElements:
920 {
921 // check ownership of element's _last_ node
922 const index_t x=id%m_NE[0] + 1;
923 const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
924 const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
925 return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
926 }
927 case FaceElements:
928 case ReducedFaceElements:
929 {
930 // check ownership of face element's last node
931 dim_t n=0;
932 for (size_t i=0; i<6; i++) {
933 n+=m_faceCount[i];
934 if (id<n) {
935 const index_t j=id-n+m_faceCount[i];
936 if (i>=4) { // front or back
937 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
938 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
939 } else if (i>=2) { // bottom or top
940 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
941 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
942 } else { // left or right
943 const index_t first=(i==0 ? 0 : m_NN[0]-1);
944 return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
945 }
946 }
947 }
948 return false;
949 }
950 default:
951 break;
952 }
953
954 stringstream msg;
955 msg << "ownSample: invalid function space type " << fsType;
956 throw RipleyException(msg.str());
957 }
958
959 void Brick::setToNormal(escript::Data& out) const
960 {
961 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
962 out.requireWrite();
963 #pragma omp parallel
964 {
965 if (m_faceOffset[0] > -1) {
966 #pragma omp for nowait
967 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
968 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
969 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
970 // set vector at four quadrature points
971 *o++ = -1.; *o++ = 0.; *o++ = 0.;
972 *o++ = -1.; *o++ = 0.; *o++ = 0.;
973 *o++ = -1.; *o++ = 0.; *o++ = 0.;
974 *o++ = -1.; *o++ = 0.; *o = 0.;
975 }
976 }
977 }
978
979 if (m_faceOffset[1] > -1) {
980 #pragma omp for nowait
981 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
982 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
983 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
984 // set vector at four quadrature points
985 *o++ = 1.; *o++ = 0.; *o++ = 0.;
986 *o++ = 1.; *o++ = 0.; *o++ = 0.;
987 *o++ = 1.; *o++ = 0.; *o++ = 0.;
988 *o++ = 1.; *o++ = 0.; *o = 0.;
989 }
990 }
991 }
992
993 if (m_faceOffset[2] > -1) {
994 #pragma omp for nowait
995 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
996 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
997 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
998 // set vector at four quadrature points
999 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1000 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1001 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1002 *o++ = 0.; *o++ = -1.; *o = 0.;
1003 }
1004 }
1005 }
1006
1007 if (m_faceOffset[3] > -1) {
1008 #pragma omp for nowait
1009 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1010 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1011 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1012 // set vector at four quadrature points
1013 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1014 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1015 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1016 *o++ = 0.; *o++ = 1.; *o = 0.;
1017 }
1018 }
1019 }
1020
1021 if (m_faceOffset[4] > -1) {
1022 #pragma omp for nowait
1023 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1024 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1025 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1026 // set vector at four quadrature points
1027 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1028 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1029 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1030 *o++ = 0.; *o++ = 0.; *o = -1.;
1031 }
1032 }
1033 }
1034
1035 if (m_faceOffset[5] > -1) {
1036 #pragma omp for nowait
1037 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1038 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1039 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1040 // set vector at four quadrature points
1041 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1042 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1043 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1044 *o++ = 0.; *o++ = 0.; *o = 1.;
1045 }
1046 }
1047 }
1048 } // end of parallel section
1049 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1050 out.requireWrite();
1051 #pragma omp parallel
1052 {
1053 if (m_faceOffset[0] > -1) {
1054 #pragma omp for nowait
1055 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1056 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1057 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1058 *o++ = -1.;
1059 *o++ = 0.;
1060 *o = 0.;
1061 }
1062 }
1063 }
1064
1065 if (m_faceOffset[1] > -1) {
1066 #pragma omp for nowait
1067 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1068 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1069 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1070 *o++ = 1.;
1071 *o++ = 0.;
1072 *o = 0.;
1073 }
1074 }
1075 }
1076
1077 if (m_faceOffset[2] > -1) {
1078 #pragma omp for nowait
1079 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1080 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1081 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1082 *o++ = 0.;
1083 *o++ = -1.;
1084 *o = 0.;
1085 }
1086 }
1087 }
1088
1089 if (m_faceOffset[3] > -1) {
1090 #pragma omp for nowait
1091 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1092 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1093 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1094 *o++ = 0.;
1095 *o++ = 1.;
1096 *o = 0.;
1097 }
1098 }
1099 }
1100
1101 if (m_faceOffset[4] > -1) {
1102 #pragma omp for nowait
1103 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1104 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1105 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1106 *o++ = 0.;
1107 *o++ = 0.;
1108 *o = -1.;
1109 }
1110 }
1111 }
1112
1113 if (m_faceOffset[5] > -1) {
1114 #pragma omp for nowait
1115 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1116 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1117 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1118 *o++ = 0.;
1119 *o++ = 0.;
1120 *o = 1.;
1121 }
1122 }
1123 }
1124 } // end of parallel section
1125
1126 } else {
1127 stringstream msg;
1128 msg << "setToNormal: invalid function space type "
1129 << out.getFunctionSpace().getTypeCode();
1130 throw RipleyException(msg.str());
1131 }
1132 }
1133
1134 void Brick::setToSize(escript::Data& out) const
1135 {
1136 if (out.getFunctionSpace().getTypeCode() == Elements
1137 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1138 out.requireWrite();
1139 const dim_t numQuad=out.getNumDataPointsPerSample();
1140 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1141 #pragma omp parallel for
1142 for (index_t k = 0; k < getNumElements(); ++k) {
1143 double* o = out.getSampleDataRW(k);
1144 fill(o, o+numQuad, size);
1145 }
1146 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1147 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1148 out.requireWrite();
1149 const dim_t numQuad=out.getNumDataPointsPerSample();
1150 #pragma omp parallel
1151 {
1152 if (m_faceOffset[0] > -1) {
1153 const double size=min(m_dx[1],m_dx[2]);
1154 #pragma omp for nowait
1155 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1156 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1157 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1158 fill(o, o+numQuad, size);
1159 }
1160 }
1161 }
1162
1163 if (m_faceOffset[1] > -1) {
1164 const double size=min(m_dx[1],m_dx[2]);
1165 #pragma omp for nowait
1166 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1167 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1168 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1169 fill(o, o+numQuad, size);
1170 }
1171 }
1172 }
1173
1174 if (m_faceOffset[2] > -1) {
1175 const double size=min(m_dx[0],m_dx[2]);
1176 #pragma omp for nowait
1177 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1178 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1179 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1180 fill(o, o+numQuad, size);
1181 }
1182 }
1183 }
1184
1185 if (m_faceOffset[3] > -1) {
1186 const double size=min(m_dx[0],m_dx[2]);
1187 #pragma omp for nowait
1188 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1189 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1190 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1191 fill(o, o+numQuad, size);
1192 }
1193 }
1194 }
1195
1196 if (m_faceOffset[4] > -1) {
1197 const double size=min(m_dx[0],m_dx[1]);
1198 #pragma omp for nowait
1199 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1200 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1201 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1202 fill(o, o+numQuad, size);
1203 }
1204 }
1205 }
1206
1207 if (m_faceOffset[5] > -1) {
1208 const double size=min(m_dx[0],m_dx[1]);
1209 #pragma omp for nowait
1210 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1211 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1212 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1213 fill(o, o+numQuad, size);
1214 }
1215 }
1216 }
1217 } // end of parallel section
1218
1219 } else {
1220 stringstream msg;
1221 msg << "setToSize: invalid function space type "
1222 << out.getFunctionSpace().getTypeCode();
1223 throw RipleyException(msg.str());
1224 }
1225 }
1226
1227 void Brick::Print_Mesh_Info(const bool full) const
1228 {
1229 RipleyDomain::Print_Mesh_Info(full);
1230 if (full) {
1231 cout << " Id Coordinates" << endl;
1232 cout.precision(15);
1233 cout.setf(ios::scientific, ios::floatfield);
1234 for (index_t i=0; i < getNumNodes(); i++) {
1235 cout << " " << setw(5) << m_nodeId[i]
1236 << " " << getLocalCoordinate(i%m_NN[0], 0)
1237 << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1238 << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1239 }
1240 }
1241 }
1242
1243
1244 //protected
1245 void Brick::assembleCoordinates(escript::Data& arg) const
1246 {
1247 escriptDataC x = arg.getDataC();
1248 int numDim = m_numDim;
1249 if (!isDataPointShapeEqual(&x, 1, &numDim))
1250 throw RipleyException("setToX: Invalid Data object shape");
1251 if (!numSamplesEqual(&x, 1, getNumNodes()))
1252 throw RipleyException("setToX: Illegal number of samples in Data object");
1253
1254 arg.requireWrite();
1255 #pragma omp parallel for
1256 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1257 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1258 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1259 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1260 point[0] = getLocalCoordinate(i0, 0);
1261 point[1] = getLocalCoordinate(i1, 1);
1262 point[2] = getLocalCoordinate(i2, 2);
1263 }
1264 }
1265 }
1266 }
1267
1268 //protected
1269 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1270 {
1271 const dim_t numComp = in.getDataPointSize();
1272 const double C0 = .044658198738520451079;
1273 const double C1 = .16666666666666666667;
1274 const double C2 = .21132486540518711775;
1275 const double C3 = .25;
1276 const double C4 = .5;
1277 const double C5 = .62200846792814621559;
1278 const double C6 = .78867513459481288225;
1279
1280 if (out.getFunctionSpace().getTypeCode() == Elements) {
1281 out.requireWrite();
1282 #pragma omp parallel
1283 {
1284 vector<double> f_000(numComp);
1285 vector<double> f_001(numComp);
1286 vector<double> f_010(numComp);
1287 vector<double> f_011(numComp);
1288 vector<double> f_100(numComp);
1289 vector<double> f_101(numComp);
1290 vector<double> f_110(numComp);
1291 vector<double> f_111(numComp);
1292 #pragma omp for
1293 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1294 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1295 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1296 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1297 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1298 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1299 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1300 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1301 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1302 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1303 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1304 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1305 for (index_t i=0; i < numComp; ++i) {
1306 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1307 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1308 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1309 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1310 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1311 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1312 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1313 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1314 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1315 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1316 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1317 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1318 o[INDEX3(i,0,0,numComp,3)] = V0;
1319 o[INDEX3(i,1,0,numComp,3)] = V4;
1320 o[INDEX3(i,2,0,numComp,3)] = V8;
1321 o[INDEX3(i,0,1,numComp,3)] = V0;
1322 o[INDEX3(i,1,1,numComp,3)] = V5;
1323 o[INDEX3(i,2,1,numComp,3)] = V9;
1324 o[INDEX3(i,0,2,numComp,3)] = V1;
1325 o[INDEX3(i,1,2,numComp,3)] = V4;
1326 o[INDEX3(i,2,2,numComp,3)] = V10;
1327 o[INDEX3(i,0,3,numComp,3)] = V1;
1328 o[INDEX3(i,1,3,numComp,3)] = V5;
1329 o[INDEX3(i,2,3,numComp,3)] = V11;
1330 o[INDEX3(i,0,4,numComp,3)] = V2;
1331 o[INDEX3(i,1,4,numComp,3)] = V6;
1332 o[INDEX3(i,2,4,numComp,3)] = V8;
1333 o[INDEX3(i,0,5,numComp,3)] = V2;
1334 o[INDEX3(i,1,5,numComp,3)] = V7;
1335 o[INDEX3(i,2,5,numComp,3)] = V9;
1336 o[INDEX3(i,0,6,numComp,3)] = V3;
1337 o[INDEX3(i,1,6,numComp,3)] = V6;
1338 o[INDEX3(i,2,6,numComp,3)] = V10;
1339 o[INDEX3(i,0,7,numComp,3)] = V3;
1340 o[INDEX3(i,1,7,numComp,3)] = V7;
1341 o[INDEX3(i,2,7,numComp,3)] = V11;
1342 } // end of component loop i
1343 } // end of k0 loop
1344 } // end of k1 loop
1345 } // end of k2 loop
1346 } // end of parallel section
1347 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1348 out.requireWrite();
1349 #pragma omp parallel
1350 {
1351 vector<double> f_000(numComp);
1352 vector<double> f_001(numComp);
1353 vector<double> f_010(numComp);
1354 vector<double> f_011(numComp);
1355 vector<double> f_100(numComp);
1356 vector<double> f_101(numComp);
1357 vector<double> f_110(numComp);
1358 vector<double> f_111(numComp);
1359 #pragma omp for
1360 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1361 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1362 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1363 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1364 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1365 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1366 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1367 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1368 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1369 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1370 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1371 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1372 for (index_t i=0; i < numComp; ++i) {
1373 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1374 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1375 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1376 } // end of component loop i
1377 } // end of k0 loop
1378 } // end of k1 loop
1379 } // end of k2 loop
1380 } // end of parallel section
1381 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1382 out.requireWrite();
1383 #pragma omp parallel
1384 {
1385 vector<double> f_000(numComp);
1386 vector<double> f_001(numComp);
1387 vector<double> f_010(numComp);
1388 vector<double> f_011(numComp);
1389 vector<double> f_100(numComp);
1390 vector<double> f_101(numComp);
1391 vector<double> f_110(numComp);
1392 vector<double> f_111(numComp);
1393 if (m_faceOffset[0] > -1) {
1394 #pragma omp for nowait
1395 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1396 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1397 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1398 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1399 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1400 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1401 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1402 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1403 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1404 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1405 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1406 for (index_t i=0; i < numComp; ++i) {
1407 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1408 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1409 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1410 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1411 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1412 o[INDEX3(i,1,0,numComp,3)] = V0;
1413 o[INDEX3(i,2,0,numComp,3)] = V2;
1414 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1415 o[INDEX3(i,1,1,numComp,3)] = V0;
1416 o[INDEX3(i,2,1,numComp,3)] = V3;
1417 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1418 o[INDEX3(i,1,2,numComp,3)] = V1;
1419 o[INDEX3(i,2,2,numComp,3)] = V2;
1420 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1421 o[INDEX3(i,1,3,numComp,3)] = V1;
1422 o[INDEX3(i,2,3,numComp,3)] = V3;
1423 } // end of component loop i
1424 } // end of k1 loop
1425 } // end of k2 loop
1426 } // end of face 0
1427 if (m_faceOffset[1] > -1) {
1428 #pragma omp for nowait
1429 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1430 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1431 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1432 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1433 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1434 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1435 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1436 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1437 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1438 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1439 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1440 for (index_t i=0; i < numComp; ++i) {
1441 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1442 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1443 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1444 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1445 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1446 o[INDEX3(i,1,0,numComp,3)] = V0;
1447 o[INDEX3(i,2,0,numComp,3)] = V2;
1448 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1449 o[INDEX3(i,1,1,numComp,3)] = V0;
1450 o[INDEX3(i,2,1,numComp,3)] = V3;
1451 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1452 o[INDEX3(i,1,2,numComp,3)] = V1;
1453 o[INDEX3(i,2,2,numComp,3)] = V2;
1454 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1455 o[INDEX3(i,1,3,numComp,3)] = V1;
1456 o[INDEX3(i,2,3,numComp,3)] = V3;
1457 } // end of component loop i
1458 } // end of k1 loop
1459 } // end of k2 loop
1460 } // end of face 1
1461 if (m_faceOffset[2] > -1) {
1462 #pragma omp for nowait
1463 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1464 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1465 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1466 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1467 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1468 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1469 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1470 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1471 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1472 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1473 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1474 for (index_t i=0; i < numComp; ++i) {
1475 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1476 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1477 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1478 o[INDEX3(i,0,0,numComp,3)] = V0;
1479 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1480 o[INDEX3(i,2,0,numComp,3)] = V1;
1481 o[INDEX3(i,0,1,numComp,3)] = V0;
1482 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1483 o[INDEX3(i,2,1,numComp,3)] = V2;
1484 o[INDEX3(i,0,2,numComp,3)] = V0;
1485 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1486 o[INDEX3(i,2,2,numComp,3)] = V1;
1487 o[INDEX3(i,0,3,numComp,3)] = V0;
1488 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1489 o[INDEX3(i,2,3,numComp,3)] = V2;
1490 } // end of component loop i
1491 } // end of k0 loop
1492 } // end of k2 loop
1493 } // end of face 2
1494 if (m_faceOffset[3] > -1) {
1495 #pragma omp for nowait
1496 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1497 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1498 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1499 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1500 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1501 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1502 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1503 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1504 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1505 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1506 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1507 for (index_t i=0; i < numComp; ++i) {
1508 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1509 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1510 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1511 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1512 o[INDEX3(i,0,0,numComp,3)] = V0;
1513 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1514 o[INDEX3(i,2,0,numComp,3)] = V2;
1515 o[INDEX3(i,0,1,numComp,3)] = V0;
1516 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1517 o[INDEX3(i,2,1,numComp,3)] = V3;
1518 o[INDEX3(i,0,2,numComp,3)] = V1;
1519 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1520 o[INDEX3(i,2,2,numComp,3)] = V2;
1521 o[INDEX3(i,0,3,numComp,3)] = V1;
1522 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1523 o[INDEX3(i,2,3,numComp,3)] = V3;
1524 } // end of component loop i
1525 } // end of k0 loop
1526 } // end of k2 loop
1527 } // end of face 3
1528 if (m_faceOffset[4] > -1) {
1529 #pragma omp for nowait
1530 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1531 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1532 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1533 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1534 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1535 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1536 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1537 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1538 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1539 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1540 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1541 for (index_t i=0; i < numComp; ++i) {
1542 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1543 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1544 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1545 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1546 o[INDEX3(i,0,0,numComp,3)] = V0;
1547 o[INDEX3(i,1,0,numComp,3)] = V2;
1548 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1549 o[INDEX3(i,0,1,numComp,3)] = V0;
1550 o[INDEX3(i,1,1,numComp,3)] = V3;
1551 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1552 o[INDEX3(i,0,2,numComp,3)] = V1;
1553 o[INDEX3(i,1,2,numComp,3)] = V2;
1554 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1555 o[INDEX3(i,0,3,numComp,3)] = V1;
1556 o[INDEX3(i,1,3,numComp,3)] = V3;
1557 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1558 } // end of component loop i
1559 } // end of k0 loop
1560 } // end of k1 loop
1561 } // end of face 4
1562 if (m_faceOffset[5] > -1) {
1563 #pragma omp for nowait
1564 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1565 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1566 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1567 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1568 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1569 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1570 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1571 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1572 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1573 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1574 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1575 for (index_t i=0; i < numComp; ++i) {
1576 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1577 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1578 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1579 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1580 o[INDEX3(i,0,0,numComp,3)] = V0;
1581 o[INDEX3(i,1,0,numComp,3)] = V2;
1582 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1583 o[INDEX3(i,0,1,numComp,3)] = V0;
1584 o[INDEX3(i,1,1,numComp,3)] = V3;
1585 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1586 o[INDEX3(i,0,2,numComp,3)] = V1;
1587 o[INDEX3(i,1,2,numComp,3)] = V2;
1588 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1589 o[INDEX3(i,0,3,numComp,3)] = V1;
1590 o[INDEX3(i,1,3,numComp,3)] = V3;
1591 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1592 } // end of component loop i
1593 } // end of k0 loop
1594 } // end of k1 loop
1595 } // end of face 5
1596 } // end of parallel section
1597 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1598 out.requireWrite();
1599 #pragma omp parallel
1600 {
1601 vector<double> f_000(numComp);
1602 vector<double> f_001(numComp);
1603 vector<double> f_010(numComp);
1604 vector<double> f_011(numComp);
1605 vector<double> f_100(numComp);
1606 vector<double> f_101(numComp);
1607 vector<double> f_110(numComp);
1608 vector<double> f_111(numComp);
1609 if (m_faceOffset[0] > -1) {
1610 #pragma omp for nowait
1611 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1612 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1613 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1614 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1615 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1616 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1617 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1618 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1619 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1620 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1621 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1622 for (index_t i=0; i < numComp; ++i) {
1623 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1624 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1625 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1626 } // end of component loop i
1627 } // end of k1 loop
1628 } // end of k2 loop
1629 } // end of face 0
1630 if (m_faceOffset[1] > -1) {
1631 #pragma omp for nowait
1632 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1633 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1634 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1635 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1636 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1637 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1638 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1639 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1640 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1641 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1642 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1643 for (index_t i=0; i < numComp; ++i) {
1644 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1645 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1646 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1647 } // end of component loop i
1648 } // end of k1 loop
1649 } // end of k2 loop
1650 } // end of face 1
1651 if (m_faceOffset[2] > -1) {
1652 #pragma omp for nowait
1653 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1654 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1655 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1656 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1657 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1658 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1659 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1660 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1661 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1662 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1663 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1664 for (index_t i=0; i < numComp; ++i) {
1665 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / m_dx[0];
1666 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1667 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / m_dx[2];
1668 } // end of component loop i
1669 } // end of k0 loop
1670 } // end of k2 loop
1671 } // end of face 2
1672 if (m_faceOffset[3] > -1) {
1673 #pragma omp for nowait
1674 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1675 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1676 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1677 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1678 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1679 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1680 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1681 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1682 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1683 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1684 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1685 for (index_t i=0; i < numComp; ++i) {
1686 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / m_dx[0];
1687 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1688 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / m_dx[2];
1689 } // end of component loop i
1690 } // end of k0 loop
1691 } // end of k2 loop
1692 } // end of face 3
1693 if (m_faceOffset[4] > -1) {
1694 #pragma omp for nowait
1695 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1696 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1697 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1698 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1699 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1700 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1701 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1702 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1703 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1704 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1705 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1706 for (index_t i=0; i < numComp; ++i) {
1707 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / m_dx[0];
1708 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / m_dx[1];
1709 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1710 } // end of component loop i
1711 } // end of k0 loop
1712 } // end of k1 loop
1713 } // end of face 4
1714 if (m_faceOffset[5] > -1) {
1715 #pragma omp for nowait
1716 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1717 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1718 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1719 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1720 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1721 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1722 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1723 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1724 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1725 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1726 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1727 for (index_t i=0; i < numComp; ++i) {
1728 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / m_dx[0];
1729 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / m_dx[1];
1730 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1731 } // end of component loop i
1732 } // end of k0 loop
1733 } // end of k1 loop
1734 } // end of face 5
1735 } // end of parallel section
1736 }
1737 }
1738
1739 //protected
1740 void Brick::assembleIntegrate(vector<double>& integrals, const escript::Data& arg) const
1741 {
1742 const dim_t numComp = arg.getDataPointSize();
1743 const index_t left = (m_offset[0]==0 ? 0 : 1);
1744 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1745 const index_t front = (m_offset[2]==0 ? 0 : 1);
1746 const int fs = arg.getFunctionSpace().getTypeCode();
1747 if (fs == Elements && arg.actsExpanded()) {
1748 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2]/8.;
1749 #pragma omp parallel
1750 {
1751 vector<double> int_local(numComp, 0);
1752 #pragma omp for nowait
1753 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1754 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1755 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1756 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1757 for (index_t i=0; i < numComp; ++i) {
1758 const double f_0 = f[INDEX2(i,0,numComp)];
1759 const double f_1 = f[INDEX2(i,1,numComp)];
1760 const double f_2 = f[INDEX2(i,2,numComp)];
1761 const double f_3 = f[INDEX2(i,3,numComp)];
1762 const double f_4 = f[INDEX2(i,4,numComp)];
1763 const double f_5 = f[INDEX2(i,5,numComp)];
1764 const double f_6 = f[INDEX2(i,6,numComp)];
1765 const double f_7 = f[INDEX2(i,7,numComp)];
1766 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
1767 } // end of component loop i
1768 } // end of k0 loop
1769 } // end of k1 loop
1770 } // end of k2 loop
1771
1772 #pragma omp critical
1773 for (index_t i=0; i<numComp; i++)
1774 integrals[i]+=int_local[i];
1775 } // end of parallel section
1776
1777 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1778 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2];
1779 #pragma omp parallel
1780 {
1781 vector<double> int_local(numComp, 0);
1782 #pragma omp for nowait
1783 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1784 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1785 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1786 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1787 for (index_t i=0; i < numComp; ++i) {
1788 int_local[i]+=f[i]*w_0;
1789 } // end of component loop i
1790 } // end of k0 loop
1791 } // end of k1 loop
1792 } // end of k2 loop
1793
1794 #pragma omp critical
1795 for (index_t i=0; i<numComp; i++)
1796 integrals[i]+=int_local[i];
1797 } // end of parallel section
1798
1799 } else if (fs == FaceElements && arg.actsExpanded()) {
1800 const double w_0 = m_dx[1]*m_dx[2]/4.;
1801 const double w_1 = m_dx[0]*m_dx[2]/4.;
1802 const double w_2 = m_dx[0]*m_dx[1]/4.;
1803 #pragma omp parallel
1804 {
1805 vector<double> int_local(numComp, 0);
1806 if (m_faceOffset[0] > -1) {
1807 #pragma omp for nowait
1808 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1809 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1810 const double* f = arg.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1811 for (index_t i=0; i < numComp; ++i) {
1812 const double f_0 = f[INDEX2(i,0,numComp)];
1813 const double f_1 = f[INDEX2(i,1,numComp)];
1814 const double f_2 = f[INDEX2(i,2,numComp)];
1815 const double f_3 = f[INDEX2(i,3,numComp)];
1816 int_local