/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4817 - (show annotations)
Fri Mar 28 08:04:09 2014 UTC (4 years, 11 months ago) by caltinay
Original Path: trunk/ripley/src/Brick.cpp
File size: 165607 byte(s)
Coupler/Connector shared ptrs.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <ripley/Brick.h>
18 #include <paso/SystemMatrix.h>
19 #include <esysUtils/esysFileWriter.h>
20 #include <ripley/DefaultAssembler3D.h>
21 #include <ripley/WaveAssembler3D.h>
22 #include <ripley/LameAssembler3D.h>
23 #include <ripley/domainhelpers.h>
24 #include <boost/scoped_array.hpp>
25
26 #ifdef USE_NETCDF
27 #include <netcdfcpp.h>
28 #endif
29
30 #if USE_SILO
31 #include <silo.h>
32 #ifdef ESYS_MPI
33 #include <pmpio.h>
34 #endif
35 #endif
36
37 #include <iomanip>
38
39 #include "esysUtils/EsysRandom.h"
40 #include "blocktools.h"
41
42
43 using namespace std;
44 using esysUtils::FileWriter;
45
46 namespace ripley {
47
48 int indexOfMax(int a, int b, int c) {
49 if (a > b) {
50 if (c > a) {
51 return 2;
52 }
53 return 0;
54 } else if (b > c) {
55 return 1;
56 }
57 return 2;
58 }
59
60 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
61 double x1, double y1, double z1, int d0, int d1, int d2,
62 const std::vector<double>& points, const std::vector<int>& tags,
63 const simap_t& tagnamestonums) :
64 RipleyDomain(3)
65 {
66 // ignore subdivision parameters for serial run
67 if (m_mpiInfo->size == 1) {
68 d0=1;
69 d1=1;
70 d2=1;
71 }
72 bool warn=false;
73
74 std::vector<int> factors;
75 int ranks = m_mpiInfo->size;
76 int epr[3] = {n0,n1,n2};
77 int d[3] = {d0,d1,d2};
78 if (d0<=0 || d1<=0 || d2<=0) {
79 for (int i = 0; i < 3; i++) {
80 if (d[i] < 1) {
81 d[i] = 1;
82 continue;
83 }
84 epr[i] = -1; // can no longer be max
85 if (ranks % d[i] != 0) {
86 throw RipleyException("Invalid number of spatial subdivisions");
87 }
88 //remove
89 ranks /= d[i];
90 }
91 factorise(factors, ranks);
92 if (factors.size() != 0) {
93 warn = true;
94 }
95 }
96 while (factors.size() > 0) {
97 int i = indexOfMax(epr[0],epr[1],epr[2]);
98 int f = factors.back();
99 factors.pop_back();
100 d[i] *= f;
101 epr[i] /= f;
102 }
103 d0 = d[0]; d1 = d[1]; d2 = d[2];
104
105 // ensure number of subdivisions is valid and nodes can be distributed
106 // among number of ranks
107 if (d0*d1*d2 != m_mpiInfo->size){
108 throw RipleyException("Invalid number of spatial subdivisions");
109 }
110 if (warn) {
111 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
112 << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
113 }
114
115 double l0 = x1-x0;
116 double l1 = y1-y0;
117 double l2 = z1-z0;
118 m_dx[0] = l0/n0;
119 m_dx[1] = l1/n1;
120 m_dx[2] = l2/n2;
121
122 if ((n0+1)%d0 > 0) {
123 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
124 l0=m_dx[0]*n0;
125 cout << "Warning: Adjusted number of elements and length. N0="
126 << n0 << ", l0=" << l0 << endl;
127 }
128 if ((n1+1)%d1 > 0) {
129 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
130 l1=m_dx[1]*n1;
131 cout << "Warning: Adjusted number of elements and length. N1="
132 << n1 << ", l1=" << l1 << endl;
133 }
134 if ((n2+1)%d2 > 0) {
135 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
136 l2=m_dx[2]*n2;
137 cout << "Warning: Adjusted number of elements and length. N2="
138 << n2 << ", l2=" << l2 << endl;
139 }
140
141 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
142 throw RipleyException("Too few elements for the number of ranks");
143
144 m_gNE[0] = n0;
145 m_gNE[1] = n1;
146 m_gNE[2] = n2;
147 m_origin[0] = x0;
148 m_origin[1] = y0;
149 m_origin[2] = z0;
150 m_length[0] = l0;
151 m_length[1] = l1;
152 m_length[2] = l2;
153 m_NX[0] = d0;
154 m_NX[1] = d1;
155 m_NX[2] = d2;
156
157 // local number of elements (including overlap)
158 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
159 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
160 m_NE[0]++;
161 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
162 m_ownNE[0]--;
163
164 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
165 if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
166 m_NE[1]++;
167 else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
168 m_ownNE[1]--;
169
170 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
171 if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
172 m_NE[2]++;
173 else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
174 m_ownNE[2]--;
175
176 // local number of nodes
177 m_NN[0] = m_NE[0]+1;
178 m_NN[1] = m_NE[1]+1;
179 m_NN[2] = m_NE[2]+1;
180
181 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
182 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
183 if (m_offset[0] > 0)
184 m_offset[0]--;
185 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
186 if (m_offset[1] > 0)
187 m_offset[1]--;
188 m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
189 if (m_offset[2] > 0)
190 m_offset[2]--;
191
192 populateSampleIds();
193 createPattern();
194
195 assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
196 for (map<string, int>::const_iterator i = tagnamestonums.begin();
197 i != tagnamestonums.end(); i++) {
198 setTagMap(i->first, i->second);
199 }
200 addPoints(tags.size(), &points[0], &tags[0]);
201 }
202
203
204 Brick::~Brick()
205 {
206 paso::SystemMatrixPattern_free(m_pattern);
207 delete assembler;
208 }
209
210 string Brick::getDescription() const
211 {
212 return "ripley::Brick";
213 }
214
215 bool Brick::operator==(const AbstractDomain& other) const
216 {
217 const Brick* o=dynamic_cast<const Brick*>(&other);
218 if (o) {
219 return (RipleyDomain::operator==(other) &&
220 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
221 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
222 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
223 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
224 }
225
226 return false;
227 }
228
229 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
230 const ReaderParameters& params) const
231 {
232 #ifdef USE_NETCDF
233 // check destination function space
234 int myN0, myN1, myN2;
235 if (out.getFunctionSpace().getTypeCode() == Nodes) {
236 myN0 = m_NN[0];
237 myN1 = m_NN[1];
238 myN2 = m_NN[2];
239 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
240 out.getFunctionSpace().getTypeCode() == ReducedElements) {
241 myN0 = m_NE[0];
242 myN1 = m_NE[1];
243 myN2 = m_NE[2];
244 } else
245 throw RipleyException("readNcGrid(): invalid function space for output data object");
246
247 if (params.first.size() != 3)
248 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
249
250 if (params.numValues.size() != 3)
251 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
252
253 if (params.multiplier.size() != 3)
254 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
255 for (size_t i=0; i<params.multiplier.size(); i++)
256 if (params.multiplier[i]<1)
257 throw RipleyException("readNcGrid(): all multipliers must be positive");
258
259 // check file existence and size
260 NcFile f(filename.c_str(), NcFile::ReadOnly);
261 if (!f.is_valid())
262 throw RipleyException("readNcGrid(): cannot open file");
263
264 NcVar* var = f.get_var(varname.c_str());
265 if (!var)
266 throw RipleyException("readNcGrid(): invalid variable name");
267
268 // TODO: rank>0 data support
269 const int numComp = out.getDataPointSize();
270 if (numComp > 1)
271 throw RipleyException("readNcGrid(): only scalar data supported");
272
273 const int dims = var->num_dims();
274 boost::scoped_array<long> edges(var->edges());
275
276 // is this a slice of the data object (dims!=3)?
277 // note the expected ordering of edges (as in numpy: z,y,x)
278 if ( (dims==3 && (params.numValues[2] > edges[0] ||
279 params.numValues[1] > edges[1] ||
280 params.numValues[0] > edges[2]))
281 || (dims==2 && params.numValues[2]>1)
282 || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
283 throw RipleyException("readNcGrid(): not enough data in file");
284 }
285
286 // check if this rank contributes anything
287 if (params.first[0] >= m_offset[0]+myN0 ||
288 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
289 params.first[1] >= m_offset[1]+myN1 ||
290 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
291 params.first[2] >= m_offset[2]+myN2 ||
292 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
293 return;
294 }
295
296 // now determine how much this rank has to write
297
298 // first coordinates in data object to write to
299 const int first0 = max(0, params.first[0]-m_offset[0]);
300 const int first1 = max(0, params.first[1]-m_offset[1]);
301 const int first2 = max(0, params.first[2]-m_offset[2]);
302 // indices to first value in file (not accounting for reverse yet)
303 int idx0 = max(0, m_offset[0]-params.first[0]);
304 int idx1 = max(0, m_offset[1]-params.first[1]);
305 int idx2 = max(0, m_offset[2]-params.first[2]);
306 // number of values to read
307 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
308 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
309 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
310
311 // make sure we read the right block if going backwards through file
312 if (params.reverse[0])
313 idx0 = edges[dims-1]-num0-idx0;
314 if (dims>1 && params.reverse[1])
315 idx1 = edges[dims-2]-num1-idx1;
316 if (dims>2 && params.reverse[2])
317 idx2 = edges[dims-3]-num2-idx2;
318
319
320 vector<double> values(num0*num1*num2);
321 if (dims==3) {
322 var->set_cur(idx2, idx1, idx0);
323 var->get(&values[0], num2, num1, num0);
324 } else if (dims==2) {
325 var->set_cur(idx1, idx0);
326 var->get(&values[0], num1, num0);
327 } else {
328 var->set_cur(idx0);
329 var->get(&values[0], num0);
330 }
331
332 const int dpp = out.getNumDataPointsPerSample();
333 out.requireWrite();
334
335 // helpers for reversing
336 const int x0 = (params.reverse[0] ? num0-1 : 0);
337 const int x_mult = (params.reverse[0] ? -1 : 1);
338 const int y0 = (params.reverse[1] ? num1-1 : 0);
339 const int y_mult = (params.reverse[1] ? -1 : 1);
340 const int z0 = (params.reverse[2] ? num2-1 : 0);
341 const int z_mult = (params.reverse[2] ? -1 : 1);
342
343 for (index_t z=0; z<num2; z++) {
344 for (index_t y=0; y<num1; y++) {
345 #pragma omp parallel for
346 for (index_t x=0; x<num0; x++) {
347 const int baseIndex = first0+x*params.multiplier[0]
348 +(first1+y*params.multiplier[1])*myN0
349 +(first2+z*params.multiplier[2])*myN0*myN1;
350 const int srcIndex=(z0+z_mult*z)*num1*num0
351 +(y0+y_mult*y)*num0
352 +(x0+x_mult*x);
353 if (!isnan(values[srcIndex])) {
354 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
355 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
356 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
357 const int dataIndex = baseIndex+m0
358 +m1*myN0
359 +m2*myN0*myN1;
360 double* dest = out.getSampleDataRW(dataIndex);
361 for (index_t q=0; q<dpp; q++) {
362 *dest++ = values[srcIndex];
363 }
364 }
365 }
366 }
367 }
368 }
369 }
370 }
371 #else
372 throw RipleyException("readNcGrid(): not compiled with netCDF support");
373 #endif
374 }
375
376 #ifdef USE_BOOSTIO
377 void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
378 const ReaderParameters& params) const
379 {
380 // the mapping is not universally correct but should work on our
381 // supported platforms
382 switch (params.dataType) {
383 case DATATYPE_INT32:
384 readBinaryGridZippedImpl<int>(out, filename, params);
385 break;
386 case DATATYPE_FLOAT32:
387 readBinaryGridZippedImpl<float>(out, filename, params);
388 break;
389 case DATATYPE_FLOAT64:
390 readBinaryGridZippedImpl<double>(out, filename, params);
391 break;
392 default:
393 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
394 }
395 }
396 #endif
397
398 void Brick::readBinaryGrid(escript::Data& out, string filename,
399 const ReaderParameters& params) const
400 {
401 // the mapping is not universally correct but should work on our
402 // supported platforms
403 switch (params.dataType) {
404 case DATATYPE_INT32:
405 readBinaryGridImpl<int>(out, filename, params);
406 break;
407 case DATATYPE_FLOAT32:
408 readBinaryGridImpl<float>(out, filename, params);
409 break;
410 case DATATYPE_FLOAT64:
411 readBinaryGridImpl<double>(out, filename, params);
412 break;
413 default:
414 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
415 }
416 }
417
418 template<typename ValueType>
419 void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
420 const ReaderParameters& params) const
421 {
422 // check destination function space
423 int myN0, myN1, myN2;
424 if (out.getFunctionSpace().getTypeCode() == Nodes) {
425 myN0 = m_NN[0];
426 myN1 = m_NN[1];
427 myN2 = m_NN[2];
428 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
429 out.getFunctionSpace().getTypeCode() == ReducedElements) {
430 myN0 = m_NE[0];
431 myN1 = m_NE[1];
432 myN2 = m_NE[2];
433 } else
434 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
435
436 if (params.first.size() != 3)
437 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
438
439 if (params.numValues.size() != 3)
440 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
441
442 if (params.multiplier.size() != 3)
443 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
444 for (size_t i=0; i<params.multiplier.size(); i++)
445 if (params.multiplier[i]<1)
446 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
447
448 // check file existence and size
449 ifstream f(filename.c_str(), ifstream::binary);
450 if (f.fail()) {
451 throw RipleyException("readBinaryGrid(): cannot open file");
452 }
453 f.seekg(0, ios::end);
454 const int numComp = out.getDataPointSize();
455 const int filesize = f.tellg();
456 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
457 if (filesize < reqsize) {
458 f.close();
459 throw RipleyException("readBinaryGrid(): not enough data in file");
460 }
461
462 // check if this rank contributes anything
463 if (params.first[0] >= m_offset[0]+myN0 ||
464 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
465 params.first[1] >= m_offset[1]+myN1 ||
466 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
467 params.first[2] >= m_offset[2]+myN2 ||
468 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
469 f.close();
470 return;
471 }
472
473 // now determine how much this rank has to write
474
475 // first coordinates in data object to write to
476 const int first0 = max(0, params.first[0]-m_offset[0]);
477 const int first1 = max(0, params.first[1]-m_offset[1]);
478 const int first2 = max(0, params.first[2]-m_offset[2]);
479 // indices to first value in file
480 const int idx0 = max(0, m_offset[0]-params.first[0]);
481 const int idx1 = max(0, m_offset[1]-params.first[1]);
482 const int idx2 = max(0, m_offset[2]-params.first[2]);
483 // number of values to read
484 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
485 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
486 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
487
488 out.requireWrite();
489 vector<ValueType> values(num0*numComp);
490 const int dpp = out.getNumDataPointsPerSample();
491
492 for (int z=0; z<num2; z++) {
493 for (int y=0; y<num1; y++) {
494 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
495 +(idx2+z)*params.numValues[0]*params.numValues[1]);
496 f.seekg(fileofs*sizeof(ValueType));
497 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
498
499 for (int x=0; x<num0; x++) {
500 const int baseIndex = first0+x*params.multiplier[0]
501 +(first1+y*params.multiplier[1])*myN0
502 +(first2+z*params.multiplier[2])*myN0*myN1;
503 for (int m2=0; m2<params.multiplier[2]; m2++) {
504 for (int m1=0; m1<params.multiplier[1]; m1++) {
505 for (int m0=0; m0<params.multiplier[0]; m0++) {
506 const int dataIndex = baseIndex+m0
507 +m1*myN0
508 +m2*myN0*myN1;
509 double* dest = out.getSampleDataRW(dataIndex);
510 for (int c=0; c<numComp; c++) {
511 ValueType val = values[x*numComp+c];
512
513 if (params.byteOrder != BYTEORDER_NATIVE) {
514 char* cval = reinterpret_cast<char*>(&val);
515 // this will alter val!!
516 byte_swap32(cval);
517 }
518 if (!std::isnan(val)) {
519 for (int q=0; q<dpp; q++) {
520 *dest++ = static_cast<double>(val);
521 }
522 }
523 }
524 }
525 }
526 }
527 }
528 }
529 }
530
531 f.close();
532 }
533
534 #ifdef USE_BOOSTIO
535 template<typename ValueType>
536 void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
537 const ReaderParameters& params) const
538 {
539 // check destination function space
540 int myN0, myN1, myN2;
541 if (out.getFunctionSpace().getTypeCode() == Nodes) {
542 myN0 = m_NN[0];
543 myN1 = m_NN[1];
544 myN2 = m_NN[2];
545 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
546 out.getFunctionSpace().getTypeCode() == ReducedElements) {
547 myN0 = m_NE[0];
548 myN1 = m_NE[1];
549 myN2 = m_NE[2];
550 } else
551 throw RipleyException("readBinaryGridFromZipped(): invalid function space for output data object");
552
553 if (params.first.size() != 3)
554 throw RipleyException("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
555
556 if (params.numValues.size() != 3)
557 throw RipleyException("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
558
559 if (params.multiplier.size() != 3)
560 throw RipleyException("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
561 for (size_t i=0; i<params.multiplier.size(); i++)
562 if (params.multiplier[i]<1)
563 throw RipleyException("readBinaryGridFromZipped(): all multipliers must be positive");
564
565 // check file existence and size
566 ifstream f(filename.c_str(), ifstream::binary);
567 if (f.fail()) {
568 throw RipleyException("readBinaryGridFromZipped(): cannot open file");
569 }
570 f.seekg(0, ios::end);
571 const int numComp = out.getDataPointSize();
572 int filesize = f.tellg();
573 f.seekg(0, ios::beg);
574 std::vector<char> compressed(filesize);
575 f.read((char*)&compressed[0], filesize);
576 f.close();
577 std::vector<char> decompressed = unzip(compressed);
578 filesize = decompressed.size();
579 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
580 if (filesize < reqsize) {
581 throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
582 }
583
584 // check if this rank contributes anything
585 if (params.first[0] >= m_offset[0]+myN0 ||
586 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
587 params.first[1] >= m_offset[1]+myN1 ||
588 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
589 params.first[2] >= m_offset[2]+myN2 ||
590 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
591 return;
592 }
593
594 // now determine how much this rank has to write
595
596 // first coordinates in data object to write to
597 const int first0 = max(0, params.first[0]-m_offset[0]);
598 const int first1 = max(0, params.first[1]-m_offset[1]);
599 const int first2 = max(0, params.first[2]-m_offset[2]);
600 // indices to first value in file
601 const int idx0 = max(0, m_offset[0]-params.first[0]);
602 const int idx1 = max(0, m_offset[1]-params.first[1]);
603 const int idx2 = max(0, m_offset[2]-params.first[2]);
604 // number of values to read
605 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
606 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
607 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
608
609 out.requireWrite();
610 vector<ValueType> values(num0*numComp);
611 const int dpp = out.getNumDataPointsPerSample();
612
613 for (int z=0; z<num2; z++) {
614 for (int y=0; y<num1; y++) {
615 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
616 +(idx2+z)*params.numValues[0]*params.numValues[1]);
617 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
618
619 for (int x=0; x<num0; x++) {
620 const int baseIndex = first0+x*params.multiplier[0]
621 +(first1+y*params.multiplier[1])*myN0
622 +(first2+z*params.multiplier[2])*myN0*myN1;
623 for (int m2=0; m2<params.multiplier[2]; m2++) {
624 for (int m1=0; m1<params.multiplier[1]; m1++) {
625 for (int m0=0; m0<params.multiplier[0]; m0++) {
626 const int dataIndex = baseIndex+m0
627 +m1*myN0
628 +m2*myN0*myN1;
629 double* dest = out.getSampleDataRW(dataIndex);
630 for (int c=0; c<numComp; c++) {
631 ValueType val = values[x*numComp+c];
632
633 if (params.byteOrder != BYTEORDER_NATIVE) {
634 char* cval = reinterpret_cast<char*>(&val);
635 // this will alter val!!
636 byte_swap32(cval);
637 }
638 if (!std::isnan(val)) {
639 for (int q=0; q<dpp; q++) {
640 *dest++ = static_cast<double>(val);
641 }
642 }
643 }
644 }
645 }
646 }
647 }
648 }
649 }
650 }
651 #endif
652
653 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
654 int byteOrder, int dataType) const
655 {
656 // the mapping is not universally correct but should work on our
657 // supported platforms
658 switch (dataType) {
659 case DATATYPE_INT32:
660 writeBinaryGridImpl<int>(in, filename, byteOrder);
661 break;
662 case DATATYPE_FLOAT32:
663 writeBinaryGridImpl<float>(in, filename, byteOrder);
664 break;
665 case DATATYPE_FLOAT64:
666 writeBinaryGridImpl<double>(in, filename, byteOrder);
667 break;
668 default:
669 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
670 }
671 }
672
673 template<typename ValueType>
674 void Brick::writeBinaryGridImpl(const escript::Data& in,
675 const string& filename, int byteOrder) const
676 {
677 // check function space and determine number of points
678 int myN0, myN1, myN2;
679 int totalN0, totalN1, totalN2;
680 if (in.getFunctionSpace().getTypeCode() == Nodes) {
681 myN0 = m_NN[0];
682 myN1 = m_NN[1];
683 myN2 = m_NN[2];
684 totalN0 = m_gNE[0]+1;
685 totalN1 = m_gNE[1]+1;
686 totalN2 = m_gNE[2]+1;
687 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
688 in.getFunctionSpace().getTypeCode() == ReducedElements) {
689 myN0 = m_NE[0];
690 myN1 = m_NE[1];
691 myN2 = m_NE[2];
692 totalN0 = m_gNE[0];
693 totalN1 = m_gNE[1];
694 totalN2 = m_gNE[2];
695 } else
696 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
697
698 const int numComp = in.getDataPointSize();
699 const int dpp = in.getNumDataPointsPerSample();
700 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
701
702 if (numComp > 1 || dpp > 1)
703 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
704
705 // from here on we know that each sample consists of one value
706 FileWriter fw;
707 fw.openFile(filename, fileSize);
708 MPIBarrier();
709
710 for (index_t z=0; z<myN2; z++) {
711 for (index_t y=0; y<myN1; y++) {
712 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
713 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
714 ostringstream oss;
715
716 for (index_t x=0; x<myN0; x++) {
717 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
718 ValueType fvalue = static_cast<ValueType>(*sample);
719 if (byteOrder == BYTEORDER_NATIVE) {
720 oss.write((char*)&fvalue, sizeof(fvalue));
721 } else {
722 char* value = reinterpret_cast<char*>(&fvalue);
723 oss.write(byte_swap32(value), sizeof(fvalue));
724 }
725 }
726 fw.writeAt(oss, fileofs);
727 }
728 }
729 fw.close();
730 }
731
732 void Brick::dump(const string& fileName) const
733 {
734 #if USE_SILO
735 string fn(fileName);
736 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
737 fn+=".silo";
738 }
739
740 int driver=DB_HDF5;
741 string siloPath;
742 DBfile* dbfile = NULL;
743
744 #ifdef ESYS_MPI
745 PMPIO_baton_t* baton = NULL;
746 const int NUM_SILO_FILES = 1;
747 const char* blockDirFmt = "/block%04d";
748 #endif
749
750 if (m_mpiInfo->size > 1) {
751 #ifdef ESYS_MPI
752 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
753 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
754 PMPIO_DefaultClose, (void*)&driver);
755 // try the fallback driver in case of error
756 if (!baton && driver != DB_PDB) {
757 driver = DB_PDB;
758 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
759 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
760 PMPIO_DefaultClose, (void*)&driver);
761 }
762 if (baton) {
763 char str[64];
764 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
765 siloPath = str;
766 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
767 }
768 #endif
769 } else {
770 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
771 getDescription().c_str(), driver);
772 // try the fallback driver in case of error
773 if (!dbfile && driver != DB_PDB) {
774 driver = DB_PDB;
775 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
776 getDescription().c_str(), driver);
777 }
778 }
779
780 if (!dbfile)
781 throw RipleyException("dump: Could not create Silo file");
782
783 /*
784 if (driver==DB_HDF5) {
785 // gzip level 1 already provides good compression with minimal
786 // performance penalty. Some tests showed that gzip levels >3 performed
787 // rather badly on escript data both in terms of time and space
788 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
789 }
790 */
791
792 boost::scoped_ptr<double> x(new double[m_NN[0]]);
793 boost::scoped_ptr<double> y(new double[m_NN[1]]);
794 boost::scoped_ptr<double> z(new double[m_NN[2]]);
795 double* coords[3] = { x.get(), y.get(), z.get() };
796 #pragma omp parallel
797 {
798 #pragma omp for
799 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
800 coords[0][i0]=getLocalCoordinate(i0, 0);
801 }
802 #pragma omp for
803 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
804 coords[1][i1]=getLocalCoordinate(i1, 1);
805 }
806 #pragma omp for
807 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
808 coords[2][i2]=getLocalCoordinate(i2, 2);
809 }
810 }
811 int* dims = const_cast<int*>(getNumNodesPerDim());
812
813 // write mesh
814 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
815 DB_COLLINEAR, NULL);
816
817 // write node ids
818 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
819 NULL, 0, DB_INT, DB_NODECENT, NULL);
820
821 // write element ids
822 dims = const_cast<int*>(getNumElementsPerDim());
823 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
824 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
825
826 // rank 0 writes multimesh and multivar
827 if (m_mpiInfo->rank == 0) {
828 vector<string> tempstrings;
829 vector<char*> names;
830 for (dim_t i=0; i<m_mpiInfo->size; i++) {
831 stringstream path;
832 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
833 tempstrings.push_back(path.str());
834 names.push_back((char*)tempstrings.back().c_str());
835 }
836 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
837 DBSetDir(dbfile, "/");
838 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
839 &types[0], NULL);
840 tempstrings.clear();
841 names.clear();
842 for (dim_t i=0; i<m_mpiInfo->size; i++) {
843 stringstream path;
844 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
845 tempstrings.push_back(path.str());
846 names.push_back((char*)tempstrings.back().c_str());
847 }
848 types.assign(m_mpiInfo->size, DB_QUADVAR);
849 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
850 &types[0], NULL);
851 tempstrings.clear();
852 names.clear();
853 for (dim_t i=0; i<m_mpiInfo->size; i++) {
854 stringstream path;
855 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
856 tempstrings.push_back(path.str());
857 names.push_back((char*)tempstrings.back().c_str());
858 }
859 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
860 &types[0], NULL);
861 }
862
863 if (m_mpiInfo->size > 1) {
864 #ifdef ESYS_MPI
865 PMPIO_HandOffBaton(baton, dbfile);
866 PMPIO_Finish(baton);
867 #endif
868 } else {
869 DBClose(dbfile);
870 }
871
872 #else // USE_SILO
873 throw RipleyException("dump: no Silo support");
874 #endif
875 }
876
877 const int* Brick::borrowSampleReferenceIDs(int fsType) const
878 {
879 switch (fsType) {
880 case Nodes:
881 case ReducedNodes: //FIXME: reduced
882 return &m_nodeId[0];
883 case DegreesOfFreedom:
884 case ReducedDegreesOfFreedom: //FIXME: reduced
885 return &m_dofId[0];
886 case Elements:
887 case ReducedElements:
888 return &m_elementId[0];
889 case FaceElements:
890 case ReducedFaceElements:
891 return &m_faceId[0];
892 case Points:
893 return &m_diracPointNodeIDs[0];
894 default:
895 break;
896 }
897
898 stringstream msg;
899 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
900 throw RipleyException(msg.str());
901 }
902
903 bool Brick::ownSample(int fsType, index_t id) const
904 {
905 if (getMPISize()==1)
906 return true;
907
908 switch (fsType) {
909 case Nodes:
910 case ReducedNodes: //FIXME: reduced
911 return (m_dofMap[id] < getNumDOF());
912 case DegreesOfFreedom:
913 case ReducedDegreesOfFreedom:
914 return true;
915 case Elements:
916 case ReducedElements:
917 {
918 // check ownership of element's _last_ node
919 const index_t x=id%m_NE[0] + 1;
920 const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
921 const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
922 return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
923 }
924 case FaceElements:
925 case ReducedFaceElements:
926 {
927 // check ownership of face element's last node
928 dim_t n=0;
929 for (size_t i=0; i<6; i++) {
930 n+=m_faceCount[i];
931 if (id<n) {
932 const index_t j=id-n+m_faceCount[i];
933 if (i>=4) { // front or back
934 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
935 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
936 } else if (i>=2) { // bottom or top
937 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
938 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
939 } else { // left or right
940 const index_t first=(i==0 ? 0 : m_NN[0]-1);
941 return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
942 }
943 }
944 }
945 return false;
946 }
947 default:
948 break;
949 }
950
951 stringstream msg;
952 msg << "ownSample: invalid function space type " << fsType;
953 throw RipleyException(msg.str());
954 }
955
956 void Brick::setToNormal(escript::Data& out) const
957 {
958 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
959 out.requireWrite();
960 #pragma omp parallel
961 {
962 if (m_faceOffset[0] > -1) {
963 #pragma omp for nowait
964 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
965 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
966 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
967 // set vector at four quadrature points
968 *o++ = -1.; *o++ = 0.; *o++ = 0.;
969 *o++ = -1.; *o++ = 0.; *o++ = 0.;
970 *o++ = -1.; *o++ = 0.; *o++ = 0.;
971 *o++ = -1.; *o++ = 0.; *o = 0.;
972 }
973 }
974 }
975
976 if (m_faceOffset[1] > -1) {
977 #pragma omp for nowait
978 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
979 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
980 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
981 // set vector at four quadrature points
982 *o++ = 1.; *o++ = 0.; *o++ = 0.;
983 *o++ = 1.; *o++ = 0.; *o++ = 0.;
984 *o++ = 1.; *o++ = 0.; *o++ = 0.;
985 *o++ = 1.; *o++ = 0.; *o = 0.;
986 }
987 }
988 }
989
990 if (m_faceOffset[2] > -1) {
991 #pragma omp for nowait
992 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
993 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
994 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
995 // set vector at four quadrature points
996 *o++ = 0.; *o++ = -1.; *o++ = 0.;
997 *o++ = 0.; *o++ = -1.; *o++ = 0.;
998 *o++ = 0.; *o++ = -1.; *o++ = 0.;
999 *o++ = 0.; *o++ = -1.; *o = 0.;
1000 }
1001 }
1002 }
1003
1004 if (m_faceOffset[3] > -1) {
1005 #pragma omp for nowait
1006 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1007 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1008 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1009 // set vector at four quadrature points
1010 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1011 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1012 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1013 *o++ = 0.; *o++ = 1.; *o = 0.;
1014 }
1015 }
1016 }
1017
1018 if (m_faceOffset[4] > -1) {
1019 #pragma omp for nowait
1020 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1021 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1022 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1023 // set vector at four quadrature points
1024 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1025 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1026 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1027 *o++ = 0.; *o++ = 0.; *o = -1.;
1028 }
1029 }
1030 }
1031
1032 if (m_faceOffset[5] > -1) {
1033 #pragma omp for nowait
1034 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1035 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1036 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1037 // set vector at four quadrature points
1038 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1039 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1040 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1041 *o++ = 0.; *o++ = 0.; *o = 1.;
1042 }
1043 }
1044 }
1045 } // end of parallel section
1046 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1047 out.requireWrite();
1048 #pragma omp parallel
1049 {
1050 if (m_faceOffset[0] > -1) {
1051 #pragma omp for nowait
1052 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1053 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1054 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1055 *o++ = -1.;
1056 *o++ = 0.;
1057 *o = 0.;
1058 }
1059 }
1060 }
1061
1062 if (m_faceOffset[1] > -1) {
1063 #pragma omp for nowait
1064 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1065 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1066 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1067 *o++ = 1.;
1068 *o++ = 0.;
1069 *o = 0.;
1070 }
1071 }
1072 }
1073
1074 if (m_faceOffset[2] > -1) {
1075 #pragma omp for nowait
1076 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1077 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1078 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1079 *o++ = 0.;
1080 *o++ = -1.;
1081 *o = 0.;
1082 }
1083 }
1084 }
1085
1086 if (m_faceOffset[3] > -1) {
1087 #pragma omp for nowait
1088 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1089 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1090 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1091 *o++ = 0.;
1092 *o++ = 1.;
1093 *o = 0.;
1094 }
1095 }
1096 }
1097
1098 if (m_faceOffset[4] > -1) {
1099 #pragma omp for nowait
1100 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1101 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1102 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1103 *o++ = 0.;
1104 *o++ = 0.;
1105 *o = -1.;
1106 }
1107 }
1108 }
1109
1110 if (m_faceOffset[5] > -1) {
1111 #pragma omp for nowait
1112 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1113 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1114 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1115 *o++ = 0.;
1116 *o++ = 0.;
1117 *o = 1.;
1118 }
1119 }
1120 }
1121 } // end of parallel section
1122
1123 } else {
1124 stringstream msg;
1125 msg << "setToNormal: invalid function space type "
1126 << out.getFunctionSpace().getTypeCode();
1127 throw RipleyException(msg.str());
1128 }
1129 }
1130
1131 void Brick::setToSize(escript::Data& out) const
1132 {
1133 if (out.getFunctionSpace().getTypeCode() == Elements
1134 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1135 out.requireWrite();
1136 const dim_t numQuad=out.getNumDataPointsPerSample();
1137 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1138 #pragma omp parallel for
1139 for (index_t k = 0; k < getNumElements(); ++k) {
1140 double* o = out.getSampleDataRW(k);
1141 fill(o, o+numQuad, size);
1142 }
1143 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1144 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1145 out.requireWrite();
1146 const dim_t numQuad=out.getNumDataPointsPerSample();
1147 #pragma omp parallel
1148 {
1149 if (m_faceOffset[0] > -1) {
1150 const double size=min(m_dx[1],m_dx[2]);
1151 #pragma omp for nowait
1152 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1153 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1154 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1155 fill(o, o+numQuad, size);
1156 }
1157 }
1158 }
1159
1160 if (m_faceOffset[1] > -1) {
1161 const double size=min(m_dx[1],m_dx[2]);
1162 #pragma omp for nowait
1163 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1164 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1165 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1166 fill(o, o+numQuad, size);
1167 }
1168 }
1169 }
1170
1171 if (m_faceOffset[2] > -1) {
1172 const double size=min(m_dx[0],m_dx[2]);
1173 #pragma omp for nowait
1174 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1175 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1176 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1177 fill(o, o+numQuad, size);
1178 }
1179 }
1180 }
1181
1182 if (m_faceOffset[3] > -1) {
1183 const double size=min(m_dx[0],m_dx[2]);
1184 #pragma omp for nowait
1185 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1186 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1187 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1188 fill(o, o+numQuad, size);
1189 }
1190 }
1191 }
1192
1193 if (m_faceOffset[4] > -1) {
1194 const double size=min(m_dx[0],m_dx[1]);
1195 #pragma omp for nowait
1196 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1197 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1198 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1199 fill(o, o+numQuad, size);
1200 }
1201 }
1202 }
1203
1204 if (m_faceOffset[5] > -1) {
1205 const double size=min(m_dx[0],m_dx[1]);
1206 #pragma omp for nowait
1207 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1208 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1209 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1210 fill(o, o+numQuad, size);
1211 }
1212 }
1213 }
1214 } // end of parallel section
1215
1216 } else {
1217 stringstream msg;
1218 msg << "setToSize: invalid function space type "
1219 << out.getFunctionSpace().getTypeCode();
1220 throw RipleyException(msg.str());
1221 }
1222 }
1223
1224 void Brick::Print_Mesh_Info(const bool full) const
1225 {
1226 RipleyDomain::Print_Mesh_Info(full);
1227 if (full) {
1228 cout << " Id Coordinates" << endl;
1229 cout.precision(15);
1230 cout.setf(ios::scientific, ios::floatfield);
1231 for (index_t i=0; i < getNumNodes(); i++) {
1232 cout << " " << setw(5) << m_nodeId[i]
1233 << " " << getLocalCoordinate(i%m_NN[0], 0)
1234 << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1235 << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1236 }
1237 }
1238 }
1239
1240
1241 //protected
1242 void Brick::assembleCoordinates(escript::Data& arg) const
1243 {
1244 escriptDataC x = arg.getDataC();
1245 int numDim = m_numDim;
1246 if (!isDataPointShapeEqual(&x, 1, &numDim))
1247 throw RipleyException("setToX: Invalid Data object shape");
1248 if (!numSamplesEqual(&x, 1, getNumNodes()))
1249 throw RipleyException("setToX: Illegal number of samples in Data object");
1250
1251 arg.requireWrite();
1252 #pragma omp parallel for
1253 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1254 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1255 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1256 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1257 point[0] = getLocalCoordinate(i0, 0);
1258 point[1] = getLocalCoordinate(i1, 1);
1259 point[2] = getLocalCoordinate(i2, 2);
1260 }
1261 }
1262 }
1263 }
1264
1265 //protected
1266 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1267 {
1268 const dim_t numComp = in.getDataPointSize();
1269 const double C0 = .044658198738520451079;
1270 const double C1 = .16666666666666666667;
1271 const double C2 = .21132486540518711775;
1272 const double C3 = .25;
1273 const double C4 = .5;
1274 const double C5 = .62200846792814621559;
1275 const double C6 = .78867513459481288225;
1276
1277 if (out.getFunctionSpace().getTypeCode() == Elements) {
1278 out.requireWrite();
1279 #pragma omp parallel
1280 {
1281 vector<double> f_000(numComp);
1282 vector<double> f_001(numComp);
1283 vector<double> f_010(numComp);
1284 vector<double> f_011(numComp);
1285 vector<double> f_100(numComp);
1286 vector<double> f_101(numComp);
1287 vector<double> f_110(numComp);
1288 vector<double> f_111(numComp);
1289 #pragma omp for
1290 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1291 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1292 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1293 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1294 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1295 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1296 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1297 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1298 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1299 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1300 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1301 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1302 for (index_t i=0; i < numComp; ++i) {
1303 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1304 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1305 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1306 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1307 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1308 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1309 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1310 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1311 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1312 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1313 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1314 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1315 o[INDEX3(i,0,0,numComp,3)] = V0;
1316 o[INDEX3(i,1,0,numComp,3)] = V4;
1317 o[INDEX3(i,2,0,numComp,3)] = V8;
1318 o[INDEX3(i,0,1,numComp,3)] = V0;
1319 o[INDEX3(i,1,1,numComp,3)] = V5;
1320 o[INDEX3(i,2,1,numComp,3)] = V9;
1321 o[INDEX3(i,0,2,numComp,3)] = V1;
1322 o[INDEX3(i,1,2,numComp,3)] = V4;
1323 o[INDEX3(i,2,2,numComp,3)] = V10;
1324 o[INDEX3(i,0,3,numComp,3)] = V1;
1325 o[INDEX3(i,1,3,numComp,3)] = V5;
1326 o[INDEX3(i,2,3,numComp,3)] = V11;
1327 o[INDEX3(i,0,4,numComp,3)] = V2;
1328 o[INDEX3(i,1,4,numComp,3)] = V6;
1329 o[INDEX3(i,2,4,numComp,3)] = V8;
1330 o[INDEX3(i,0,5,numComp,3)] = V2;
1331 o[INDEX3(i,1,5,numComp,3)] = V7;
1332 o[INDEX3(i,2,5,numComp,3)] = V9;
1333 o[INDEX3(i,0,6,numComp,3)] = V3;
1334 o[INDEX3(i,1,6,numComp,3)] = V6;
1335 o[INDEX3(i,2,6,numComp,3)] = V10;
1336 o[INDEX3(i,0,7,numComp,3)] = V3;
1337 o[INDEX3(i,1,7,numComp,3)] = V7;
1338 o[INDEX3(i,2,7,numComp,3)] = V11;
1339 } // end of component loop i
1340 } // end of k0 loop
1341 } // end of k1 loop
1342 } // end of k2 loop
1343 } // end of parallel section
1344 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1345 out.requireWrite();
1346 #pragma omp parallel
1347 {
1348 vector<double> f_000(numComp);
1349 vector<double> f_001(numComp);
1350 vector<double> f_010(numComp);
1351 vector<double> f_011(numComp);
1352 vector<double> f_100(numComp);
1353 vector<double> f_101(numComp);
1354 vector<double> f_110(numComp);
1355 vector<double> f_111(numComp);
1356 #pragma omp for
1357 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1358 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1359 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1360 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1361 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1362 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1363 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1364 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1365 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1366 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1367 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1368 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1369 for (index_t i=0; i < numComp; ++i) {
1370 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1371 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1372 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1373 } // end of component loop i
1374 } // end of k0 loop
1375 } // end of k1 loop
1376 } // end of k2 loop
1377 } // end of parallel section
1378 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1379 out.requireWrite();
1380 #pragma omp parallel
1381 {
1382 vector<double> f_000(numComp);
1383 vector<double> f_001(numComp);
1384 vector<double> f_010(numComp);
1385 vector<double> f_011(numComp);
1386 vector<double> f_100(numComp);
1387 vector<double> f_101(numComp);
1388 vector<double> f_110(numComp);
1389 vector<double> f_111(numComp);
1390 if (m_faceOffset[0] > -1) {
1391 #pragma omp for nowait
1392 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1393 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1394 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1395 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1396 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1397 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1398 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1399 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1400 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1401 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1402 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1403 for (index_t i=0; i < numComp; ++i) {
1404 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1405 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1406 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1407 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1408 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1409 o[INDEX3(i,1,0,numComp,3)] = V0;
1410 o[INDEX3(i,2,0,numComp,3)] = V2;
1411 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1412 o[INDEX3(i,1,1,numComp,3)] = V0;
1413 o[INDEX3(i,2,1,numComp,3)] = V3;
1414 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1415 o[INDEX3(i,1,2,numComp,3)] = V1;
1416 o[INDEX3(i,2,2,numComp,3)] = V2;
1417 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1418 o[INDEX3(i,1,3,numComp,3)] = V1;
1419 o[INDEX3(i,2,3,numComp,3)] = V3;
1420 } // end of component loop i
1421 } // end of k1 loop
1422 } // end of k2 loop
1423 } // end of face 0
1424 if (m_faceOffset[1] > -1) {
1425 #pragma omp for nowait
1426 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1427 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1428 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1429 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1430 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1431 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1432 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1433 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1434 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1435 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1436 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1437 for (index_t i=0; i < numComp; ++i) {
1438 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1439 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1440 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1441 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1442 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1443 o[INDEX3(i,1,0,numComp,3)] = V0;
1444 o[INDEX3(i,2,0,numComp,3)] = V2;
1445 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1446 o[INDEX3(i,1,1,numComp,3)] = V0;
1447 o[INDEX3(i,2,1,numComp,3)] = V3;
1448 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1449 o[INDEX3(i,1,2,numComp,3)] = V1;
1450 o[INDEX3(i,2,2,numComp,3)] = V2;
1451 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1452 o[INDEX3(i,1,3,numComp,3)] = V1;
1453 o[INDEX3(i,2,3,numComp,3)] = V3;
1454 } // end of component loop i
1455 } // end of k1 loop
1456 } // end of k2 loop
1457 } // end of face 1
1458 if (m_faceOffset[2] > -1) {
1459 #pragma omp for nowait
1460 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1461 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1462 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1463 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1464 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1465 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1466 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1467 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1468 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1469 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1470 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1471 for (index_t i=0; i < numComp; ++i) {
1472 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1473 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1474 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1475 o[INDEX3(i,0,0,numComp,3)] = V0;
1476 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1477 o[INDEX3(i,2,0,numComp,3)] = V1;
1478 o[INDEX3(i,0,1,numComp,3)] = V0;
1479 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1480 o[INDEX3(i,2,1,numComp,3)] = V2;
1481 o[INDEX3(i,0,2,numComp,3)] = V0;
1482 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1483 o[INDEX3(i,2,2,numComp,3)] = V1;
1484 o[INDEX3(i,0,3,numComp,3)] = V0;
1485 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1486 o[INDEX3(i,2,3,numComp,3)] = V2;
1487 } // end of component loop i
1488 } // end of k0 loop
1489 } // end of k2 loop
1490 } // end of face 2
1491 if (m_faceOffset[3] > -1) {
1492 #pragma omp for nowait
1493 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1494 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1495 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1496 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1497 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1498 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1499 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1500 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1501 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1502 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1503 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1504 for (index_t i=0; i < numComp; ++i) {
1505 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1506 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1507 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1508 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1509 o[INDEX3(i,0,0,numComp,3)] = V0;
1510 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1511 o[INDEX3(i,2,0,numComp,3)] = V2;
1512 o[INDEX3(i,0,1,numComp,3)] = V0;
1513 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1514 o[INDEX3(i,2,1,numComp,3)] = V3;
1515 o[INDEX3(i,0,2,numComp,3)] = V1;
1516 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1517 o[INDEX3(i,2,2,numComp,3)] = V2;
1518 o[INDEX3(i,0,3,numComp,3)] = V1;
1519 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1520 o[INDEX3(i,2,3,numComp,3)] = V3;
1521 } // end of component loop i
1522 } // end of k0 loop
1523 } // end of k2 loop
1524 } // end of face 3
1525 if (m_faceOffset[4] > -1) {
1526 #pragma omp for nowait
1527 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1528 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1529 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1530 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1531 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1532 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1533 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1534 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1535 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1536 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1537 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1538 for (index_t i=0; i < numComp; ++i) {
1539 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1540 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1541 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1542 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1543 o[INDEX3(i,0,0,numComp,3)] = V0;
1544 o[INDEX3(i,1,0,numComp,3)] = V2;
1545 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1546 o[INDEX3(i,0,1,numComp,3)] = V0;
1547 o[INDEX3(i,1,1,numComp,3)] = V3;
1548 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1549 o[INDEX3(i,0,2,numComp,3)] = V1;
1550 o[INDEX3(i,1,2,numComp,3)] = V2;
1551 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1552 o[INDEX3(i,0,3,numComp,3)] = V1;
1553 o[INDEX3(i,1,3,numComp,3)] = V3;
1554 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1555 } // end of component loop i
1556 } // end of k0 loop
1557 } // end of k1 loop
1558 } // end of face 4
1559 if (m_faceOffset[5] > -1) {
1560 #pragma omp for nowait
1561 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1562 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1563 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1564 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1565 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1566 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1567 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1568 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1569 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1570 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1571 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1572 for (index_t i=0; i < numComp; ++i) {
1573 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1574 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1575 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1576 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1577 o[INDEX3(i,0,0,numComp,3)] = V0;
1578 o[INDEX3(i,1,0,numComp,3)] = V2;
1579 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1580 o[INDEX3(i,0,1,numComp,3)] = V0;
1581 o[INDEX3(i,1,1,numComp,3)] = V3;
1582 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1583 o[INDEX3(i,0,2,numComp,3)] = V1;
1584 o[INDEX3(i,1,2,numComp,3)] = V2;
1585 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1586 o[INDEX3(i,0,3,numComp,3)] = V1;
1587 o[INDEX3(i,1,3,numComp,3)] = V3;
1588 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1589 } // end of component loop i
1590 } // end of k0 loop
1591 } // end of k1 loop
1592 } // end of face 5
1593 } // end of parallel section
1594 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1595 out.requireWrite();
1596 #pragma omp parallel
1597 {
1598 vector<double> f_000(numComp);
1599 vector<double> f_001(numComp);
1600 vector<double> f_010(numComp);
1601 vector<double> f_011(numComp);
1602 vector<double> f_100(numComp);
1603 vector<double> f_101(numComp);
1604 vector<double> f_110(numComp);
1605 vector<double> f_111(numComp);
1606 if (m_faceOffset[0] > -1) {
1607 #pragma omp for nowait
1608 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1609 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1610 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1611 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1612 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1613 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1614 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1615 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1616 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1617 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1618 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1619 for (index_t i=0; i < numComp; ++i) {
1620 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1621 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1622 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1623 } // end of component loop i
1624 } // end of k1 loop
1625 } // end of k2 loop
1626 } // end of face 0
1627 if (m_faceOffset[1] > -1) {
1628 #pragma omp for nowait
1629 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1630 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1631 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1632 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1633 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1634 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1635 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1636 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1637 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1638 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1639 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1640 for (index_t i=0; i < numComp; ++i) {
1641 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1642 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1643 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1644 } // end of component loop i
1645 } // end of k1 loop
1646 } // end of k2 loop
1647 } // end of face 1
1648 if (m_faceOffset[2] > -1) {
1649 #pragma omp for nowait
1650 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1651 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1652 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1653 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1654 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1655 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1656 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1657 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1658 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1659 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1660 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1661 for (index_t i=0; i < numComp; ++i) {
1662 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / m_dx[0];
1663 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1664 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / m_dx[2];
1665 } // end of component loop i
1666 } // end of k0 loop
1667 } // end of k2 loop
1668 } // end of face 2
1669 if (m_faceOffset[3] > -1) {
1670 #pragma omp for nowait
1671 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1672 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1673 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1674 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1675 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1676 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1677 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1678 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1679 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1680 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1681 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1682 for (index_t i=0; i < numComp; ++i) {
1683 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / m_dx[0];
1684 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1685 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / m_dx[2];
1686 } // end of component loop i
1687 } // end of k0 loop
1688 } // end of k2 loop
1689 } // end of face 3
1690 if (m_faceOffset[4] > -1) {
1691 #pragma omp for nowait
1692 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1693 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1694 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1695 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1696 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1697 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1698 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1699 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1700 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1701 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1702 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1703 for (index_t i=0; i < numComp; ++i) {
1704 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / m_dx[0];
1705 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / m_dx[1];
1706 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1707 } // end of component loop i
1708 } // end of k0 loop
1709 } // end of k1 loop
1710 } // end of face 4
1711 if (m_faceOffset[5] > -1) {
1712 #pragma omp for nowait
1713 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1714 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1715 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1716 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1717 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1718 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1719 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1720 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1721 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1722 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1723 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1724 for (index_t i=0; i < numComp; ++i) {
1725 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / m_dx[0];
1726 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / m_dx[1];
1727 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1728 } // end of component loop i
1729 } // end of k0 loop
1730 } // end of k1 loop
1731 } // end of face 5
1732 } // end of parallel section
1733 }
1734 }
1735
1736 //protected
1737 void Brick::assembleIntegrate(vector<double>& integrals, const escript::Data& arg) const
1738 {
1739 const dim_t numComp = arg.getDataPointSize();
1740 const index_t left = (m_offset[0]==0 ? 0 : 1);
1741 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1742 const index_t front = (m_offset[2]==0 ? 0 : 1);
1743 const int fs = arg.getFunctionSpace().getTypeCode();
1744 if (fs == Elements && arg.actsExpanded()) {
1745 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2]/8.;
1746 #pragma omp parallel
1747 {
1748 vector<double> int_local(numComp, 0);
1749 #pragma omp for nowait
1750 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1751 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1752 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1753 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1754 for (index_t i=0; i < numComp; ++i) {
1755 const double f_0 = f[INDEX2(i,0,numComp)];
1756 const double f_1 = f[INDEX2(i,1,numComp)];
1757 const double f_2 = f[INDEX2(i,2,numComp)];
1758 const double f_3 = f[INDEX2(i,3,numComp)];
1759 const double f_4 = f[INDEX2(i,4,numComp)];
1760 const double f_5 = f[INDEX2(i,5,numComp)];
1761 const double f_6 = f[INDEX2(i,6,numComp)];
1762 const double f_7 = f[INDEX2(i,7,numComp)];
1763 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
1764 } // end of component loop i
1765 } // end of k0 loop
1766 } // end of k1 loop
1767 } // end of k2 loop
1768
1769 #pragma omp critical
1770 for (index_t i=0; i<numComp; i++)
1771 integrals[i]+=int_local[i];
1772 } // end of parallel section
1773
1774 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1775 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2];
1776 #pragma omp parallel
1777 {
1778 vector<double> int_local(numComp, 0);
1779 #pragma omp for nowait
1780 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1781 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1782 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1783 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1784 for (index_t i=0; i < numComp; ++i) {
1785 int_local[i]+=f[i]*w_0;
1786 } // end of component loop i
1787 } // end of k0 loop
1788 } // end of k1 loop
1789 } // end of k2 loop
1790
1791 #pragma omp critical
1792 for (index_t i=0; i<numComp; i++)
1793 integrals[i]+=int_local[i];
1794 } // end of parallel section
1795
1796 } else if (fs == FaceElements && arg.actsExpanded()) {
1797 const double w_0 = m_dx[1]*m_dx[2]/4.;
1798 const double w_1 = m_dx[0]*m_dx[2]/4.;
1799 const double w_2 = m_dx[0]*m_dx[1]/4.;
1800 #pragma omp parallel
1801 {
1802 vector<double> int_local(numComp, 0);
1803 if (m_faceOffset[0] > -1) {
1804 #pragma omp for nowait
1805 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1806 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1807 const double* f = arg.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1808 for (index_t i=0; i < numComp; ++i) {
1809 const double f_0 = f[INDEX2(i,0,numComp)];
1810 const double f_1 = f[INDEX2(i,1,numComp)];
1811 const double f_2 = f[INDEX2(i,2,numComp)];
1812 const double f_3 = f[INDEX2(i,3,numComp)];
1813 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
1814 } // end of component loop i
1815 } // end of k1 loop