/[escript]/trunk/ripley/src/Brick.cpp
ViewVC logotype

Contents of /trunk/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4687 - (show annotations)
Wed Feb 19 00:03:29 2014 UTC (5 years, 4 months ago) by jfenwick
File size: 159178 byte(s)
Remove randomFill python method from ripley domains.
All random data objects (for all domain types) should be generated 
using esys.escript.RandomData()

The only filtered random we have is gaussian on ripley but
it is triggered by passing the tuple as the last arg of RandomData().

While the interface is a bit more complicated (in that you always need
 to pass in shape and functionspace) it does mean we have a 
common interface for all domains. 

Removed randomFill from DataExpanded.
The reasoning behind this is to force domains to call the util function
themselves and enforce whatever consistancy requirements they have.

Added version of blocktools to deal with 2D case in Ripley.
Use blocktools for the 2D transfers [This was cleaner than modifying the
previous implementation to deal with variable shaped points].

Note that under MPI, ripley can not generate random data (even unfiltered)
if any of its per rank dimensions is <4 elements on any side.

Unit tests for these calls are in but some extra checks still needed.



1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <ripley/Brick.h>
18 #include <paso/SystemMatrix.h>
19 #include <esysUtils/esysFileWriter.h>
20 #include <ripley/DefaultAssembler3D.h>
21 #include <ripley/WaveAssembler3D.h>
22 #include <boost/scoped_array.hpp>
23
24 #ifdef USE_NETCDF
25 #include <netcdfcpp.h>
26 #endif
27
28 #if USE_SILO
29 #include <silo.h>
30 #ifdef ESYS_MPI
31 #include <pmpio.h>
32 #endif
33 #endif
34
35 #include <iomanip>
36
37 #include "esysUtils/EsysRandom.h"
38 #include "blocktools.h"
39
40
41 using namespace std;
42 using esysUtils::FileWriter;
43
44 namespace ripley {
45
46 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
47 double x1, double y1, double z1, int d0, int d1, int d2,
48 const std::vector<double>& points, const std::vector<int>& tags,
49 const simap_t& tagnamestonums) :
50 RipleyDomain(3)
51 {
52 // ignore subdivision parameters for serial run
53 if (m_mpiInfo->size == 1) {
54 d0=1;
55 d1=1;
56 d2=1;
57 }
58 bool warn=false;
59 // if number of subdivisions is non-positive, try to subdivide by the same
60 // ratio as the number of elements
61 if (d0<=0 && d1<=0 && d2<=0) {
62 warn=true;
63 d0=(int)(pow(m_mpiInfo->size*(n0+1)*(n0+1)/((float)(n1+1)*(n2+1)), 1.f/3));
64 d0=max(1, d0);
65 d1=max(1, (int)(d0*n1/(float)n0));
66 d2=m_mpiInfo->size/(d0*d1);
67 if (d0*d1*d2 != m_mpiInfo->size) {
68 // ratios not the same so leave "smallest" side undivided and try
69 // dividing 2 sides only
70 if (n0>=n1) {
71 if (n1>=n2) {
72 d0=d1=0;
73 d2=1;
74 } else {
75 d0=d2=0;
76 d1=1;
77 }
78 } else {
79 if (n0>=n2) {
80 d0=d1=0;
81 d2=1;
82 } else {
83 d0=1;
84 d1=d2=0;
85 }
86 }
87 }
88 }
89 if (d0<=0 && d1<=0) {
90 warn=true;
91 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1))));
92 d1=m_mpiInfo->size/d0;
93 if (d0*d1*d2 != m_mpiInfo->size) {
94 // ratios not the same so subdivide side with more elements only
95 if (n0>n1) {
96 d0=0;
97 d1=1;
98 } else {
99 d0=1;
100 d1=0;
101 }
102 }
103 } else if (d0<=0 && d2<=0) {
104 warn=true;
105 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n2+1))));
106 d2=m_mpiInfo->size/d0;
107 if (d0*d1*d2 != m_mpiInfo->size) {
108 // ratios not the same so subdivide side with more elements only
109 if (n0>n2) {
110 d0=0;
111 d2=1;
112 } else {
113 d0=1;
114 d2=0;
115 }
116 }
117 } else if (d1<=0 && d2<=0) {
118 warn=true;
119 d1=max(1, int(sqrt(m_mpiInfo->size*(n1+1)/(float)(n2+1))));
120 d2=m_mpiInfo->size/d1;
121 if (d0*d1*d2 != m_mpiInfo->size) {
122 // ratios not the same so subdivide side with more elements only
123 if (n1>n2) {
124 d1=0;
125 d2=1;
126 } else {
127 d1=1;
128 d2=0;
129 }
130 }
131 }
132 if (d0<=0) {
133 // d1,d2 are preset, determine d0
134 d0=m_mpiInfo->size/(d1*d2);
135 } else if (d1<=0) {
136 // d0,d2 are preset, determine d1
137 d1=m_mpiInfo->size/(d0*d2);
138 } else if (d2<=0) {
139 // d0,d1 are preset, determine d2
140 d2=m_mpiInfo->size/(d0*d1);
141 }
142
143 // ensure number of subdivisions is valid and nodes can be distributed
144 // among number of ranks
145 if (d0*d1*d2 != m_mpiInfo->size)
146 throw RipleyException("Invalid number of spatial subdivisions");
147
148 if (warn) {
149 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
150 << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
151 }
152
153 double l0 = x1-x0;
154 double l1 = y1-y0;
155 double l2 = z1-z0;
156 m_dx[0] = l0/n0;
157 m_dx[1] = l1/n1;
158 m_dx[2] = l2/n2;
159
160 if ((n0+1)%d0 > 0) {
161 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
162 l0=m_dx[0]*n0;
163 cout << "Warning: Adjusted number of elements and length. N0="
164 << n0 << ", l0=" << l0 << endl;
165 }
166 if ((n1+1)%d1 > 0) {
167 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
168 l1=m_dx[1]*n1;
169 cout << "Warning: Adjusted number of elements and length. N1="
170 << n1 << ", l1=" << l1 << endl;
171 }
172 if ((n2+1)%d2 > 0) {
173 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
174 l2=m_dx[2]*n2;
175 cout << "Warning: Adjusted number of elements and length. N2="
176 << n2 << ", l2=" << l2 << endl;
177 }
178
179 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
180 throw RipleyException("Too few elements for the number of ranks");
181
182 m_gNE[0] = n0;
183 m_gNE[1] = n1;
184 m_gNE[2] = n2;
185 m_origin[0] = x0;
186 m_origin[1] = y0;
187 m_origin[2] = z0;
188 m_length[0] = l0;
189 m_length[1] = l1;
190 m_length[2] = l2;
191 m_NX[0] = d0;
192 m_NX[1] = d1;
193 m_NX[2] = d2;
194
195 // local number of elements (including overlap)
196 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
197 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
198 m_NE[0]++;
199 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
200 m_ownNE[0]--;
201
202 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
203 if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
204 m_NE[1]++;
205 else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
206 m_ownNE[1]--;
207
208 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
209 if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
210 m_NE[2]++;
211 else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
212 m_ownNE[2]--;
213
214 // local number of nodes
215 m_NN[0] = m_NE[0]+1;
216 m_NN[1] = m_NE[1]+1;
217 m_NN[2] = m_NE[2]+1;
218
219 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
220 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
221 if (m_offset[0] > 0)
222 m_offset[0]--;
223 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
224 if (m_offset[1] > 0)
225 m_offset[1]--;
226 m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
227 if (m_offset[2] > 0)
228 m_offset[2]--;
229
230 populateSampleIds();
231 createPattern();
232
233 assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
234 for (map<string, int>::const_iterator i = tagnamestonums.begin();
235 i != tagnamestonums.end(); i++) {
236 setTagMap(i->first, i->second);
237 }
238 addPoints(tags.size(), &points[0], &tags[0]);
239 }
240
241
242 Brick::~Brick()
243 {
244 Paso_SystemMatrixPattern_free(m_pattern);
245 Paso_Connector_free(m_connector);
246 delete assembler;
247 }
248
249 string Brick::getDescription() const
250 {
251 return "ripley::Brick";
252 }
253
254 bool Brick::operator==(const AbstractDomain& other) const
255 {
256 const Brick* o=dynamic_cast<const Brick*>(&other);
257 if (o) {
258 return (RipleyDomain::operator==(other) &&
259 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
260 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
261 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
262 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
263 }
264
265 return false;
266 }
267
268 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
269 const ReaderParameters& params) const
270 {
271 #ifdef USE_NETCDF
272 // check destination function space
273 int myN0, myN1, myN2;
274 if (out.getFunctionSpace().getTypeCode() == Nodes) {
275 myN0 = m_NN[0];
276 myN1 = m_NN[1];
277 myN2 = m_NN[2];
278 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
279 out.getFunctionSpace().getTypeCode() == ReducedElements) {
280 myN0 = m_NE[0];
281 myN1 = m_NE[1];
282 myN2 = m_NE[2];
283 } else
284 throw RipleyException("readNcGrid(): invalid function space for output data object");
285
286 if (params.first.size() != 3)
287 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
288
289 if (params.numValues.size() != 3)
290 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
291
292 if (params.multiplier.size() != 3)
293 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
294 for (size_t i=0; i<params.multiplier.size(); i++)
295 if (params.multiplier[i]<1)
296 throw RipleyException("readNcGrid(): all multipliers must be positive");
297
298 // check file existence and size
299 NcFile f(filename.c_str(), NcFile::ReadOnly);
300 if (!f.is_valid())
301 throw RipleyException("readNcGrid(): cannot open file");
302
303 NcVar* var = f.get_var(varname.c_str());
304 if (!var)
305 throw RipleyException("readNcGrid(): invalid variable name");
306
307 // TODO: rank>0 data support
308 const int numComp = out.getDataPointSize();
309 if (numComp > 1)
310 throw RipleyException("readNcGrid(): only scalar data supported");
311
312 const int dims = var->num_dims();
313 boost::scoped_array<long> edges(var->edges());
314
315 // is this a slice of the data object (dims!=3)?
316 // note the expected ordering of edges (as in numpy: z,y,x)
317 if ( (dims==3 && (params.numValues[2] > edges[0] ||
318 params.numValues[1] > edges[1] ||
319 params.numValues[0] > edges[2]))
320 || (dims==2 && params.numValues[2]>1)
321 || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
322 throw RipleyException("readNcGrid(): not enough data in file");
323 }
324
325 // check if this rank contributes anything
326 if (params.first[0] >= m_offset[0]+myN0 ||
327 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
328 params.first[1] >= m_offset[1]+myN1 ||
329 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
330 params.first[2] >= m_offset[2]+myN2 ||
331 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
332 return;
333 }
334
335 // now determine how much this rank has to write
336
337 // first coordinates in data object to write to
338 const int first0 = max(0, params.first[0]-m_offset[0]);
339 const int first1 = max(0, params.first[1]-m_offset[1]);
340 const int first2 = max(0, params.first[2]-m_offset[2]);
341 // indices to first value in file (not accounting for reverse yet)
342 int idx0 = max(0, m_offset[0]-params.first[0]);
343 int idx1 = max(0, m_offset[1]-params.first[1]);
344 int idx2 = max(0, m_offset[2]-params.first[2]);
345 // number of values to read
346 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
347 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
348 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
349
350 // make sure we read the right block if going backwards through file
351 if (params.reverse[0])
352 idx0 = edges[dims-1]-num0-idx0;
353 if (dims>1 && params.reverse[1])
354 idx1 = edges[dims-2]-num1-idx1;
355 if (dims>2 && params.reverse[2])
356 idx2 = edges[dims-3]-num2-idx2;
357
358
359 vector<double> values(num0*num1*num2);
360 if (dims==3) {
361 var->set_cur(idx2, idx1, idx0);
362 var->get(&values[0], num2, num1, num0);
363 } else if (dims==2) {
364 var->set_cur(idx1, idx0);
365 var->get(&values[0], num1, num0);
366 } else {
367 var->set_cur(idx0);
368 var->get(&values[0], num0);
369 }
370
371 const int dpp = out.getNumDataPointsPerSample();
372 out.requireWrite();
373
374 // helpers for reversing
375 const int x0 = (params.reverse[0] ? num0-1 : 0);
376 const int x_mult = (params.reverse[0] ? -1 : 1);
377 const int y0 = (params.reverse[1] ? num1-1 : 0);
378 const int y_mult = (params.reverse[1] ? -1 : 1);
379 const int z0 = (params.reverse[2] ? num2-1 : 0);
380 const int z_mult = (params.reverse[2] ? -1 : 1);
381
382 for (index_t z=0; z<num2; z++) {
383 for (index_t y=0; y<num1; y++) {
384 #pragma omp parallel for
385 for (index_t x=0; x<num0; x++) {
386 const int baseIndex = first0+x*params.multiplier[0]
387 +(first1+y*params.multiplier[1])*myN0
388 +(first2+z*params.multiplier[2])*myN0*myN1;
389 const int srcIndex=(z0+z_mult*z)*num1*num0
390 +(y0+y_mult*y)*num0
391 +(x0+x_mult*x);
392 if (!isnan(values[srcIndex])) {
393 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
394 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
395 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
396 const int dataIndex = baseIndex+m0
397 +m1*myN0
398 +m2*myN0*myN1;
399 double* dest = out.getSampleDataRW(dataIndex);
400 for (index_t q=0; q<dpp; q++) {
401 *dest++ = values[srcIndex];
402 }
403 }
404 }
405 }
406 }
407 }
408 }
409 }
410 #else
411 throw RipleyException("readNcGrid(): not compiled with netCDF support");
412 #endif
413 }
414
415 void Brick::readBinaryGrid(escript::Data& out, string filename,
416 const ReaderParameters& params) const
417 {
418 // the mapping is not universally correct but should work on our
419 // supported platforms
420 switch (params.dataType) {
421 case DATATYPE_INT32:
422 readBinaryGridImpl<int>(out, filename, params);
423 break;
424 case DATATYPE_FLOAT32:
425 readBinaryGridImpl<float>(out, filename, params);
426 break;
427 case DATATYPE_FLOAT64:
428 readBinaryGridImpl<double>(out, filename, params);
429 break;
430 default:
431 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
432 }
433 }
434
435 template<typename ValueType>
436 void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
437 const ReaderParameters& params) const
438 {
439 // check destination function space
440 int myN0, myN1, myN2;
441 if (out.getFunctionSpace().getTypeCode() == Nodes) {
442 myN0 = m_NN[0];
443 myN1 = m_NN[1];
444 myN2 = m_NN[2];
445 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
446 out.getFunctionSpace().getTypeCode() == ReducedElements) {
447 myN0 = m_NE[0];
448 myN1 = m_NE[1];
449 myN2 = m_NE[2];
450 } else
451 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
452
453 if (params.first.size() != 3)
454 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
455
456 if (params.numValues.size() != 3)
457 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
458
459 if (params.multiplier.size() != 3)
460 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
461 for (size_t i=0; i<params.multiplier.size(); i++)
462 if (params.multiplier[i]<1)
463 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
464
465 // check file existence and size
466 ifstream f(filename.c_str(), ifstream::binary);
467 if (f.fail()) {
468 throw RipleyException("readBinaryGrid(): cannot open file");
469 }
470 f.seekg(0, ios::end);
471 const int numComp = out.getDataPointSize();
472 const int filesize = f.tellg();
473 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
474 if (filesize < reqsize) {
475 f.close();
476 throw RipleyException("readBinaryGrid(): not enough data in file");
477 }
478
479 // check if this rank contributes anything
480 if (params.first[0] >= m_offset[0]+myN0 ||
481 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
482 params.first[1] >= m_offset[1]+myN1 ||
483 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
484 params.first[2] >= m_offset[2]+myN2 ||
485 params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
486 f.close();
487 return;
488 }
489
490 // now determine how much this rank has to write
491
492 // first coordinates in data object to write to
493 const int first0 = max(0, params.first[0]-m_offset[0]);
494 const int first1 = max(0, params.first[1]-m_offset[1]);
495 const int first2 = max(0, params.first[2]-m_offset[2]);
496 // indices to first value in file
497 const int idx0 = max(0, m_offset[0]-params.first[0]);
498 const int idx1 = max(0, m_offset[1]-params.first[1]);
499 const int idx2 = max(0, m_offset[2]-params.first[2]);
500 // number of values to read
501 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
502 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
503 const int num2 = min(params.numValues[2]-idx2, myN2-first2);
504
505 out.requireWrite();
506 vector<ValueType> values(num0*numComp);
507 const int dpp = out.getNumDataPointsPerSample();
508
509 for (int z=0; z<num2; z++) {
510 for (int y=0; y<num1; y++) {
511 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
512 +(idx2+z)*params.numValues[0]*params.numValues[1]);
513 f.seekg(fileofs*sizeof(ValueType));
514 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
515
516 for (int x=0; x<num0; x++) {
517 const int baseIndex = first0+x*params.multiplier[0]
518 +(first1+y*params.multiplier[1])*myN0
519 +(first2+z*params.multiplier[2])*myN0*myN1;
520 for (int m2=0; m2<params.multiplier[2]; m2++) {
521 for (int m1=0; m1<params.multiplier[1]; m1++) {
522 for (int m0=0; m0<params.multiplier[0]; m0++) {
523 const int dataIndex = baseIndex+m0
524 +m1*myN0
525 +m2*myN0*myN1;
526 double* dest = out.getSampleDataRW(dataIndex);
527 for (int c=0; c<numComp; c++) {
528 ValueType val = values[x*numComp+c];
529
530 if (params.byteOrder != BYTEORDER_NATIVE) {
531 char* cval = reinterpret_cast<char*>(&val);
532 // this will alter val!!
533 byte_swap32(cval);
534 }
535 if (!std::isnan(val)) {
536 for (int q=0; q<dpp; q++) {
537 *dest++ = static_cast<double>(val);
538 }
539 }
540 }
541 }
542 }
543 }
544 }
545 }
546 }
547
548 f.close();
549 }
550
551 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
552 int byteOrder, int dataType) const
553 {
554 // the mapping is not universally correct but should work on our
555 // supported platforms
556 switch (dataType) {
557 case DATATYPE_INT32:
558 writeBinaryGridImpl<int>(in, filename, byteOrder);
559 break;
560 case DATATYPE_FLOAT32:
561 writeBinaryGridImpl<float>(in, filename, byteOrder);
562 break;
563 case DATATYPE_FLOAT64:
564 writeBinaryGridImpl<double>(in, filename, byteOrder);
565 break;
566 default:
567 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
568 }
569 }
570
571 template<typename ValueType>
572 void Brick::writeBinaryGridImpl(const escript::Data& in,
573 const string& filename, int byteOrder) const
574 {
575 // check function space and determine number of points
576 int myN0, myN1, myN2;
577 int totalN0, totalN1, totalN2;
578 if (in.getFunctionSpace().getTypeCode() == Nodes) {
579 myN0 = m_NN[0];
580 myN1 = m_NN[1];
581 myN2 = m_NN[2];
582 totalN0 = m_gNE[0]+1;
583 totalN1 = m_gNE[1]+1;
584 totalN2 = m_gNE[2]+1;
585 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
586 in.getFunctionSpace().getTypeCode() == ReducedElements) {
587 myN0 = m_NE[0];
588 myN1 = m_NE[1];
589 myN2 = m_NE[2];
590 totalN0 = m_gNE[0];
591 totalN1 = m_gNE[1];
592 totalN2 = m_gNE[2];
593 } else
594 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
595
596 const int numComp = in.getDataPointSize();
597 const int dpp = in.getNumDataPointsPerSample();
598 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
599
600 if (numComp > 1 || dpp > 1)
601 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
602
603 // from here on we know that each sample consists of one value
604 FileWriter fw;
605 fw.openFile(filename, fileSize);
606 MPIBarrier();
607
608 for (index_t z=0; z<myN2; z++) {
609 for (index_t y=0; y<myN1; y++) {
610 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
611 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
612 ostringstream oss;
613
614 for (index_t x=0; x<myN0; x++) {
615 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
616 ValueType fvalue = static_cast<ValueType>(*sample);
617 if (byteOrder == BYTEORDER_NATIVE) {
618 oss.write((char*)&fvalue, sizeof(fvalue));
619 } else {
620 char* value = reinterpret_cast<char*>(&fvalue);
621 oss.write(byte_swap32(value), sizeof(fvalue));
622 }
623 }
624 fw.writeAt(oss, fileofs);
625 }
626 }
627 fw.close();
628 }
629
630 void Brick::dump(const string& fileName) const
631 {
632 #if USE_SILO
633 string fn(fileName);
634 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
635 fn+=".silo";
636 }
637
638 int driver=DB_HDF5;
639 string siloPath;
640 DBfile* dbfile = NULL;
641
642 #ifdef ESYS_MPI
643 PMPIO_baton_t* baton = NULL;
644 const int NUM_SILO_FILES = 1;
645 const char* blockDirFmt = "/block%04d";
646 #endif
647
648 if (m_mpiInfo->size > 1) {
649 #ifdef ESYS_MPI
650 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
651 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
652 PMPIO_DefaultClose, (void*)&driver);
653 // try the fallback driver in case of error
654 if (!baton && driver != DB_PDB) {
655 driver = DB_PDB;
656 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
657 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
658 PMPIO_DefaultClose, (void*)&driver);
659 }
660 if (baton) {
661 char str[64];
662 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
663 siloPath = str;
664 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
665 }
666 #endif
667 } else {
668 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
669 getDescription().c_str(), driver);
670 // try the fallback driver in case of error
671 if (!dbfile && driver != DB_PDB) {
672 driver = DB_PDB;
673 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
674 getDescription().c_str(), driver);
675 }
676 }
677
678 if (!dbfile)
679 throw RipleyException("dump: Could not create Silo file");
680
681 /*
682 if (driver==DB_HDF5) {
683 // gzip level 1 already provides good compression with minimal
684 // performance penalty. Some tests showed that gzip levels >3 performed
685 // rather badly on escript data both in terms of time and space
686 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
687 }
688 */
689
690 boost::scoped_ptr<double> x(new double[m_NN[0]]);
691 boost::scoped_ptr<double> y(new double[m_NN[1]]);
692 boost::scoped_ptr<double> z(new double[m_NN[2]]);
693 double* coords[3] = { x.get(), y.get(), z.get() };
694 #pragma omp parallel
695 {
696 #pragma omp for
697 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
698 coords[0][i0]=getLocalCoordinate(i0, 0);
699 }
700 #pragma omp for
701 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
702 coords[1][i1]=getLocalCoordinate(i1, 1);
703 }
704 #pragma omp for
705 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
706 coords[2][i2]=getLocalCoordinate(i2, 2);
707 }
708 }
709 int* dims = const_cast<int*>(getNumNodesPerDim());
710
711 // write mesh
712 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
713 DB_COLLINEAR, NULL);
714
715 // write node ids
716 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
717 NULL, 0, DB_INT, DB_NODECENT, NULL);
718
719 // write element ids
720 dims = const_cast<int*>(getNumElementsPerDim());
721 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
722 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
723
724 // rank 0 writes multimesh and multivar
725 if (m_mpiInfo->rank == 0) {
726 vector<string> tempstrings;
727 vector<char*> names;
728 for (dim_t i=0; i<m_mpiInfo->size; i++) {
729 stringstream path;
730 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
731 tempstrings.push_back(path.str());
732 names.push_back((char*)tempstrings.back().c_str());
733 }
734 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
735 DBSetDir(dbfile, "/");
736 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
737 &types[0], NULL);
738 tempstrings.clear();
739 names.clear();
740 for (dim_t i=0; i<m_mpiInfo->size; i++) {
741 stringstream path;
742 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
743 tempstrings.push_back(path.str());
744 names.push_back((char*)tempstrings.back().c_str());
745 }
746 types.assign(m_mpiInfo->size, DB_QUADVAR);
747 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
748 &types[0], NULL);
749 tempstrings.clear();
750 names.clear();
751 for (dim_t i=0; i<m_mpiInfo->size; i++) {
752 stringstream path;
753 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
754 tempstrings.push_back(path.str());
755 names.push_back((char*)tempstrings.back().c_str());
756 }
757 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
758 &types[0], NULL);
759 }
760
761 if (m_mpiInfo->size > 1) {
762 #ifdef ESYS_MPI
763 PMPIO_HandOffBaton(baton, dbfile);
764 PMPIO_Finish(baton);
765 #endif
766 } else {
767 DBClose(dbfile);
768 }
769
770 #else // USE_SILO
771 throw RipleyException("dump: no Silo support");
772 #endif
773 }
774
775 const int* Brick::borrowSampleReferenceIDs(int fsType) const
776 {
777 switch (fsType) {
778 case Nodes:
779 case ReducedNodes: //FIXME: reduced
780 return &m_nodeId[0];
781 case DegreesOfFreedom:
782 case ReducedDegreesOfFreedom: //FIXME: reduced
783 return &m_dofId[0];
784 case Elements:
785 case ReducedElements:
786 return &m_elementId[0];
787 case FaceElements:
788 case ReducedFaceElements:
789 return &m_faceId[0];
790 case Points:
791 return &m_diracPointNodeIDs[0];
792 default:
793 break;
794 }
795
796 stringstream msg;
797 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
798 throw RipleyException(msg.str());
799 }
800
801 bool Brick::ownSample(int fsType, index_t id) const
802 {
803 if (getMPISize()==1)
804 return true;
805
806 switch (fsType) {
807 case Nodes:
808 case ReducedNodes: //FIXME: reduced
809 return (m_dofMap[id] < getNumDOF());
810 case DegreesOfFreedom:
811 case ReducedDegreesOfFreedom:
812 return true;
813 case Elements:
814 case ReducedElements:
815 {
816 // check ownership of element's _last_ node
817 const index_t x=id%m_NE[0] + 1;
818 const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
819 const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
820 return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
821 }
822 case FaceElements:
823 case ReducedFaceElements:
824 {
825 // check ownership of face element's last node
826 dim_t n=0;
827 for (size_t i=0; i<6; i++) {
828 n+=m_faceCount[i];
829 if (id<n) {
830 const index_t j=id-n+m_faceCount[i];
831 if (i>=4) { // front or back
832 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
833 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
834 } else if (i>=2) { // bottom or top
835 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
836 return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
837 } else { // left or right
838 const index_t first=(i==0 ? 0 : m_NN[0]-1);
839 return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
840 }
841 }
842 }
843 return false;
844 }
845 default:
846 break;
847 }
848
849 stringstream msg;
850 msg << "ownSample: invalid function space type " << fsType;
851 throw RipleyException(msg.str());
852 }
853
854 void Brick::setToNormal(escript::Data& out) const
855 {
856 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
857 out.requireWrite();
858 #pragma omp parallel
859 {
860 if (m_faceOffset[0] > -1) {
861 #pragma omp for nowait
862 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
863 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
864 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
865 // set vector at four quadrature points
866 *o++ = -1.; *o++ = 0.; *o++ = 0.;
867 *o++ = -1.; *o++ = 0.; *o++ = 0.;
868 *o++ = -1.; *o++ = 0.; *o++ = 0.;
869 *o++ = -1.; *o++ = 0.; *o = 0.;
870 }
871 }
872 }
873
874 if (m_faceOffset[1] > -1) {
875 #pragma omp for nowait
876 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
877 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
878 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
879 // set vector at four quadrature points
880 *o++ = 1.; *o++ = 0.; *o++ = 0.;
881 *o++ = 1.; *o++ = 0.; *o++ = 0.;
882 *o++ = 1.; *o++ = 0.; *o++ = 0.;
883 *o++ = 1.; *o++ = 0.; *o = 0.;
884 }
885 }
886 }
887
888 if (m_faceOffset[2] > -1) {
889 #pragma omp for nowait
890 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
891 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
892 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
893 // set vector at four quadrature points
894 *o++ = 0.; *o++ = -1.; *o++ = 0.;
895 *o++ = 0.; *o++ = -1.; *o++ = 0.;
896 *o++ = 0.; *o++ = -1.; *o++ = 0.;
897 *o++ = 0.; *o++ = -1.; *o = 0.;
898 }
899 }
900 }
901
902 if (m_faceOffset[3] > -1) {
903 #pragma omp for nowait
904 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
905 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
906 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
907 // set vector at four quadrature points
908 *o++ = 0.; *o++ = 1.; *o++ = 0.;
909 *o++ = 0.; *o++ = 1.; *o++ = 0.;
910 *o++ = 0.; *o++ = 1.; *o++ = 0.;
911 *o++ = 0.; *o++ = 1.; *o = 0.;
912 }
913 }
914 }
915
916 if (m_faceOffset[4] > -1) {
917 #pragma omp for nowait
918 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
919 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
920 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
921 // set vector at four quadrature points
922 *o++ = 0.; *o++ = 0.; *o++ = -1.;
923 *o++ = 0.; *o++ = 0.; *o++ = -1.;
924 *o++ = 0.; *o++ = 0.; *o++ = -1.;
925 *o++ = 0.; *o++ = 0.; *o = -1.;
926 }
927 }
928 }
929
930 if (m_faceOffset[5] > -1) {
931 #pragma omp for nowait
932 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
933 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
934 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
935 // set vector at four quadrature points
936 *o++ = 0.; *o++ = 0.; *o++ = 1.;
937 *o++ = 0.; *o++ = 0.; *o++ = 1.;
938 *o++ = 0.; *o++ = 0.; *o++ = 1.;
939 *o++ = 0.; *o++ = 0.; *o = 1.;
940 }
941 }
942 }
943 } // end of parallel section
944 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
945 out.requireWrite();
946 #pragma omp parallel
947 {
948 if (m_faceOffset[0] > -1) {
949 #pragma omp for nowait
950 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
951 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
952 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
953 *o++ = -1.;
954 *o++ = 0.;
955 *o = 0.;
956 }
957 }
958 }
959
960 if (m_faceOffset[1] > -1) {
961 #pragma omp for nowait
962 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
963 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
964 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
965 *o++ = 1.;
966 *o++ = 0.;
967 *o = 0.;
968 }
969 }
970 }
971
972 if (m_faceOffset[2] > -1) {
973 #pragma omp for nowait
974 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
975 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
976 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
977 *o++ = 0.;
978 *o++ = -1.;
979 *o = 0.;
980 }
981 }
982 }
983
984 if (m_faceOffset[3] > -1) {
985 #pragma omp for nowait
986 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
987 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
988 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
989 *o++ = 0.;
990 *o++ = 1.;
991 *o = 0.;
992 }
993 }
994 }
995
996 if (m_faceOffset[4] > -1) {
997 #pragma omp for nowait
998 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
999 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1000 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1001 *o++ = 0.;
1002 *o++ = 0.;
1003 *o = -1.;
1004 }
1005 }
1006 }
1007
1008 if (m_faceOffset[5] > -1) {
1009 #pragma omp for nowait
1010 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1011 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1012 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1013 *o++ = 0.;
1014 *o++ = 0.;
1015 *o = 1.;
1016 }
1017 }
1018 }
1019 } // end of parallel section
1020
1021 } else {
1022 stringstream msg;
1023 msg << "setToNormal: invalid function space type "
1024 << out.getFunctionSpace().getTypeCode();
1025 throw RipleyException(msg.str());
1026 }
1027 }
1028
1029 void Brick::setToSize(escript::Data& out) const
1030 {
1031 if (out.getFunctionSpace().getTypeCode() == Elements
1032 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1033 out.requireWrite();
1034 const dim_t numQuad=out.getNumDataPointsPerSample();
1035 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1036 #pragma omp parallel for
1037 for (index_t k = 0; k < getNumElements(); ++k) {
1038 double* o = out.getSampleDataRW(k);
1039 fill(o, o+numQuad, size);
1040 }
1041 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1042 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1043 out.requireWrite();
1044 const dim_t numQuad=out.getNumDataPointsPerSample();
1045 #pragma omp parallel
1046 {
1047 if (m_faceOffset[0] > -1) {
1048 const double size=min(m_dx[1],m_dx[2]);
1049 #pragma omp for nowait
1050 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1051 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1052 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1053 fill(o, o+numQuad, size);
1054 }
1055 }
1056 }
1057
1058 if (m_faceOffset[1] > -1) {
1059 const double size=min(m_dx[1],m_dx[2]);
1060 #pragma omp for nowait
1061 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1062 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1063 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1064 fill(o, o+numQuad, size);
1065 }
1066 }
1067 }
1068
1069 if (m_faceOffset[2] > -1) {
1070 const double size=min(m_dx[0],m_dx[2]);
1071 #pragma omp for nowait
1072 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1073 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1074 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1075 fill(o, o+numQuad, size);
1076 }
1077 }
1078 }
1079
1080 if (m_faceOffset[3] > -1) {
1081 const double size=min(m_dx[0],m_dx[2]);
1082 #pragma omp for nowait
1083 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1084 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1085 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1086 fill(o, o+numQuad, size);
1087 }
1088 }
1089 }
1090
1091 if (m_faceOffset[4] > -1) {
1092 const double size=min(m_dx[0],m_dx[1]);
1093 #pragma omp for nowait
1094 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1095 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1096 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1097 fill(o, o+numQuad, size);
1098 }
1099 }
1100 }
1101
1102 if (m_faceOffset[5] > -1) {
1103 const double size=min(m_dx[0],m_dx[1]);
1104 #pragma omp for nowait
1105 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1106 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1107 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1108 fill(o, o+numQuad, size);
1109 }
1110 }
1111 }
1112 } // end of parallel section
1113
1114 } else {
1115 stringstream msg;
1116 msg << "setToSize: invalid function space type "
1117 << out.getFunctionSpace().getTypeCode();
1118 throw RipleyException(msg.str());
1119 }
1120 }
1121
1122 void Brick::Print_Mesh_Info(const bool full) const
1123 {
1124 RipleyDomain::Print_Mesh_Info(full);
1125 if (full) {
1126 cout << " Id Coordinates" << endl;
1127 cout.precision(15);
1128 cout.setf(ios::scientific, ios::floatfield);
1129 for (index_t i=0; i < getNumNodes(); i++) {
1130 cout << " " << setw(5) << m_nodeId[i]
1131 << " " << getLocalCoordinate(i%m_NN[0], 0)
1132 << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1133 << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1134 }
1135 }
1136 }
1137
1138
1139 //protected
1140 void Brick::assembleCoordinates(escript::Data& arg) const
1141 {
1142 escriptDataC x = arg.getDataC();
1143 int numDim = m_numDim;
1144 if (!isDataPointShapeEqual(&x, 1, &numDim))
1145 throw RipleyException("setToX: Invalid Data object shape");
1146 if (!numSamplesEqual(&x, 1, getNumNodes()))
1147 throw RipleyException("setToX: Illegal number of samples in Data object");
1148
1149 arg.requireWrite();
1150 #pragma omp parallel for
1151 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1152 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1153 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1154 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1155 point[0] = getLocalCoordinate(i0, 0);
1156 point[1] = getLocalCoordinate(i1, 1);
1157 point[2] = getLocalCoordinate(i2, 2);
1158 }
1159 }
1160 }
1161 }
1162
1163 //protected
1164 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1165 {
1166 const dim_t numComp = in.getDataPointSize();
1167 const double C0 = .044658198738520451079;
1168 const double C1 = .16666666666666666667;
1169 const double C2 = .21132486540518711775;
1170 const double C3 = .25;
1171 const double C4 = .5;
1172 const double C5 = .62200846792814621559;
1173 const double C6 = .78867513459481288225;
1174
1175 if (out.getFunctionSpace().getTypeCode() == Elements) {
1176 out.requireWrite();
1177 #pragma omp parallel
1178 {
1179 vector<double> f_000(numComp);
1180 vector<double> f_001(numComp);
1181 vector<double> f_010(numComp);
1182 vector<double> f_011(numComp);
1183 vector<double> f_100(numComp);
1184 vector<double> f_101(numComp);
1185 vector<double> f_110(numComp);
1186 vector<double> f_111(numComp);
1187 #pragma omp for
1188 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1189 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1190 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1191 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1192 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1193 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1194 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1195 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1196 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1197 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1198 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1199 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1200 for (index_t i=0; i < numComp; ++i) {
1201 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1202 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1203 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1204 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1205 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1206 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1207 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1208 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1209 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1210 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1211 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1212 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1213 o[INDEX3(i,0,0,numComp,3)] = V0;
1214 o[INDEX3(i,1,0,numComp,3)] = V4;
1215 o[INDEX3(i,2,0,numComp,3)] = V8;
1216 o[INDEX3(i,0,1,numComp,3)] = V0;
1217 o[INDEX3(i,1,1,numComp,3)] = V5;
1218 o[INDEX3(i,2,1,numComp,3)] = V9;
1219 o[INDEX3(i,0,2,numComp,3)] = V1;
1220 o[INDEX3(i,1,2,numComp,3)] = V4;
1221 o[INDEX3(i,2,2,numComp,3)] = V10;
1222 o[INDEX3(i,0,3,numComp,3)] = V1;
1223 o[INDEX3(i,1,3,numComp,3)] = V5;
1224 o[INDEX3(i,2,3,numComp,3)] = V11;
1225 o[INDEX3(i,0,4,numComp,3)] = V2;
1226 o[INDEX3(i,1,4,numComp,3)] = V6;
1227 o[INDEX3(i,2,4,numComp,3)] = V8;
1228 o[INDEX3(i,0,5,numComp,3)] = V2;
1229 o[INDEX3(i,1,5,numComp,3)] = V7;
1230 o[INDEX3(i,2,5,numComp,3)] = V9;
1231 o[INDEX3(i,0,6,numComp,3)] = V3;
1232 o[INDEX3(i,1,6,numComp,3)] = V6;
1233 o[INDEX3(i,2,6,numComp,3)] = V10;
1234 o[INDEX3(i,0,7,numComp,3)] = V3;
1235 o[INDEX3(i,1,7,numComp,3)] = V7;
1236 o[INDEX3(i,2,7,numComp,3)] = V11;
1237 } // end of component loop i
1238 } // end of k0 loop
1239 } // end of k1 loop
1240 } // end of k2 loop
1241 } // end of parallel section
1242 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1243 out.requireWrite();
1244 #pragma omp parallel
1245 {
1246 vector<double> f_000(numComp);
1247 vector<double> f_001(numComp);
1248 vector<double> f_010(numComp);
1249 vector<double> f_011(numComp);
1250 vector<double> f_100(numComp);
1251 vector<double> f_101(numComp);
1252 vector<double> f_110(numComp);
1253 vector<double> f_111(numComp);
1254 #pragma omp for
1255 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1256 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1257 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1258 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1259 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1260 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1261 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1262 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1263 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1264 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1265 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1266 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1267 for (index_t i=0; i < numComp; ++i) {
1268 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1269 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1270 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1271 } // end of component loop i
1272 } // end of k0 loop
1273 } // end of k1 loop
1274 } // end of k2 loop
1275 } // end of parallel section
1276 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1277 out.requireWrite();
1278 #pragma omp parallel
1279 {
1280 vector<double> f_000(numComp);
1281 vector<double> f_001(numComp);
1282 vector<double> f_010(numComp);
1283 vector<double> f_011(numComp);
1284 vector<double> f_100(numComp);
1285 vector<double> f_101(numComp);
1286 vector<double> f_110(numComp);
1287 vector<double> f_111(numComp);
1288 if (m_faceOffset[0] > -1) {
1289 #pragma omp for nowait
1290 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1291 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1292 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1293 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1294 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1295 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1296 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1297 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1298 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1299 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1300 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1301 for (index_t i=0; i < numComp; ++i) {
1302 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1303 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1304 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1305 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1306 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1307 o[INDEX3(i,1,0,numComp,3)] = V0;
1308 o[INDEX3(i,2,0,numComp,3)] = V2;
1309 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1310 o[INDEX3(i,1,1,numComp,3)] = V0;
1311 o[INDEX3(i,2,1,numComp,3)] = V3;
1312 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1313 o[INDEX3(i,1,2,numComp,3)] = V1;
1314 o[INDEX3(i,2,2,numComp,3)] = V2;
1315 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1316 o[INDEX3(i,1,3,numComp,3)] = V1;
1317 o[INDEX3(i,2,3,numComp,3)] = V3;
1318 } // end of component loop i
1319 } // end of k1 loop
1320 } // end of k2 loop
1321 } // end of face 0
1322 if (m_faceOffset[1] > -1) {
1323 #pragma omp for nowait
1324 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1325 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1326 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1327 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1328 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1329 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1330 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1331 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1332 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1333 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1334 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1335 for (index_t i=0; i < numComp; ++i) {
1336 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1337 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1338 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1339 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1340 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1341 o[INDEX3(i,1,0,numComp,3)] = V0;
1342 o[INDEX3(i,2,0,numComp,3)] = V2;
1343 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1344 o[INDEX3(i,1,1,numComp,3)] = V0;
1345 o[INDEX3(i,2,1,numComp,3)] = V3;
1346 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1347 o[INDEX3(i,1,2,numComp,3)] = V1;
1348 o[INDEX3(i,2,2,numComp,3)] = V2;
1349 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1350 o[INDEX3(i,1,3,numComp,3)] = V1;
1351 o[INDEX3(i,2,3,numComp,3)] = V3;
1352 } // end of component loop i
1353 } // end of k1 loop
1354 } // end of k2 loop
1355 } // end of face 1
1356 if (m_faceOffset[2] > -1) {
1357 #pragma omp for nowait
1358 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1359 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1360 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1361 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1362 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1363 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1364 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1365 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1366 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1367 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1368 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1369 for (index_t i=0; i < numComp; ++i) {
1370 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1371 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1372 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1373 o[INDEX3(i,0,0,numComp,3)] = V0;
1374 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1375 o[INDEX3(i,2,0,numComp,3)] = V1;
1376 o[INDEX3(i,0,1,numComp,3)] = V0;
1377 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1378 o[INDEX3(i,2,1,numComp,3)] = V2;
1379 o[INDEX3(i,0,2,numComp,3)] = V0;
1380 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1381 o[INDEX3(i,2,2,numComp,3)] = V1;
1382 o[INDEX3(i,0,3,numComp,3)] = V0;
1383 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1384 o[INDEX3(i,2,3,numComp,3)] = V2;
1385 } // end of component loop i
1386 } // end of k0 loop
1387 } // end of k2 loop
1388 } // end of face 2
1389 if (m_faceOffset[3] > -1) {
1390 #pragma omp for nowait
1391 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1392 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1393 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1394 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1395 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1396 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1397 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1398 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1399 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1400 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1401 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1402 for (index_t i=0; i < numComp; ++i) {
1403 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1404 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1405 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1406 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1407 o[INDEX3(i,0,0,numComp,3)] = V0;
1408 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1409 o[INDEX3(i,2,0,numComp,3)] = V2;
1410 o[INDEX3(i,0,1,numComp,3)] = V0;
1411 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1412 o[INDEX3(i,2,1,numComp,3)] = V3;
1413 o[INDEX3(i,0,2,numComp,3)] = V1;
1414 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1415 o[INDEX3(i,2,2,numComp,3)] = V2;
1416 o[INDEX3(i,0,3,numComp,3)] = V1;
1417 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1418 o[INDEX3(i,2,3,numComp,3)] = V3;
1419 } // end of component loop i
1420 } // end of k0 loop
1421 } // end of k2 loop
1422 } // end of face 3
1423 if (m_faceOffset[4] > -1) {
1424 #pragma omp for nowait
1425 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1426 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1427 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1428 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1429 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1430 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1431 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1432 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1433 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1434 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1435 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1436 for (index_t i=0; i < numComp; ++i) {
1437 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1438 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1439 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1440 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1441 o[INDEX3(i,0,0,numComp,3)] = V0;
1442 o[INDEX3(i,1,0,numComp,3)] = V2;
1443 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1444 o[INDEX3(i,0,1,numComp,3)] = V0;
1445 o[INDEX3(i,1,1,numComp,3)] = V3;
1446 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1447 o[INDEX3(i,0,2,numComp,3)] = V1;
1448 o[INDEX3(i,1,2,numComp,3)] = V2;
1449 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1450 o[INDEX3(i,0,3,numComp,3)] = V1;
1451 o[INDEX3(i,1,3,numComp,3)] = V3;
1452 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1453 } // end of component loop i
1454 } // end of k0 loop
1455 } // end of k1 loop
1456 } // end of face 4
1457 if (m_faceOffset[5] > -1) {
1458 #pragma omp for nowait
1459 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1460 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1461 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1462 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1463 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1464 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1465 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1466 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1467 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1468 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1469 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1470 for (index_t i=0; i < numComp; ++i) {
1471 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1472 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1473 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1474 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1475 o[INDEX3(i,0,0,numComp,3)] = V0;
1476 o[INDEX3(i,1,0,numComp,3)] = V2;
1477 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1478 o[INDEX3(i,0,1,numComp,3)] = V0;
1479 o[INDEX3(i,1,1,numComp,3)] = V3;
1480 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1481 o[INDEX3(i,0,2,numComp,3)] = V1;
1482 o[INDEX3(i,1,2,numComp,3)] = V2;
1483 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1484 o[INDEX3(i,0,3,numComp,3)] = V1;
1485 o[INDEX3(i,1,3,numComp,3)] = V3;
1486 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1487 } // end of component loop i
1488 } // end of k0 loop
1489 } // end of k1 loop
1490 } // end of face 5
1491 } // end of parallel section
1492 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1493 out.requireWrite();
1494 #pragma omp parallel
1495 {
1496 vector<double> f_000(numComp);
1497 vector<double> f_001(numComp);
1498 vector<double> f_010(numComp);
1499 vector<double> f_011(numComp);
1500 vector<double> f_100(numComp);
1501 vector<double> f_101(numComp);
1502 vector<double> f_110(numComp);
1503 vector<double> f_111(numComp);
1504 if (m_faceOffset[0] > -1) {
1505 #pragma omp for nowait
1506 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1507 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1508 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1509 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1510 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1511 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1512 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1513 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1514 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1515 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1516 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1517 for (index_t i=0; i < numComp; ++i) {
1518 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1519 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1520 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1521 } // end of component loop i
1522 } // end of k1 loop
1523 } // end of k2 loop
1524 } // end of face 0
1525 if (m_faceOffset[1] > -1) {
1526 #pragma omp for nowait
1527 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1528 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1529 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1530 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1531 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1532 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1533 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1534 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1535 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1536 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1537 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1538 for (index_t i=0; i < numComp; ++i) {
1539 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1540 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1541 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1542 } // end of component loop i
1543 } // end of k1 loop
1544 } // end of k2 loop
1545 } // end of face 1
1546 if (m_faceOffset[2] > -1) {
1547 #pragma omp for nowait
1548 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1549 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1550 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1551 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1552 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1553 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1554 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1555 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1556 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1557 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1558 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1559 for (index_t i=0; i < numComp; ++i) {
1560 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / m_dx[0];
1561 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1562 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / m_dx[2];
1563 } // end of component loop i
1564 } // end of k0 loop
1565 } // end of k2 loop
1566 } // end of face 2
1567 if (m_faceOffset[3] > -1) {
1568 #pragma omp for nowait
1569 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1570 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1571 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1572 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1573 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1574 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1575 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1576 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1577 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1578 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1579 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1580 for (index_t i=0; i < numComp; ++i) {
1581 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / m_dx[0];
1582 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1583 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / m_dx[2];
1584 } // end of component loop i
1585 } // end of k0 loop
1586 } // end of k2 loop
1587 } // end of face 3
1588 if (m_faceOffset[4] > -1) {
1589 #pragma omp for nowait
1590 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1591 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1592 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1593 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1594 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1595 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1596 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1597 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1598 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1599 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1600 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1601 for (index_t i=0; i < numComp; ++i) {
1602 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / m_dx[0];
1603 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / m_dx[1];
1604 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1605 } // end of component loop i
1606 } // end of k0 loop
1607 } // end of k1 loop
1608 } // end of face 4
1609 if (m_faceOffset[5] > -1) {
1610 #pragma omp for nowait
1611 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1612 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1613 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1614 memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1615 memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1616 memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1617 memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1618 memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1619 memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1620 memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1621 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1622 for (index_t i=0; i < numComp; ++i) {
1623 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / m_dx[0];
1624 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / m_dx[1];
1625 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1626 } // end of component loop i
1627 } // end of k0 loop
1628 } // end of k1 loop
1629 } // end of face 5
1630 } // end of parallel section
1631 }
1632 }
1633
1634 //protected
1635 void Brick::assembleIntegrate(vector<double>& integrals, const escript::Data& arg) const
1636 {
1637 const dim_t numComp = arg.getDataPointSize();
1638 const index_t left = (m_offset[0]==0 ? 0 : 1);
1639 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1640 const index_t front = (m_offset[2]==0 ? 0 : 1);
1641 const int fs = arg.getFunctionSpace().getTypeCode();
1642 if (fs == Elements && arg.actsExpanded()) {
1643 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2]/8.;
1644 #pragma omp parallel
1645 {
1646 vector<double> int_local(numComp, 0);
1647 #pragma omp for nowait
1648 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1649 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1650 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1651 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1652 for (index_t i=0; i < numComp; ++i) {
1653 const double f_0 = f[INDEX2(i,0,numComp)];
1654 const double f_1 = f[INDEX2(i,1,numComp)];
1655 const double f_2 = f[INDEX2(i,2,numComp)];
1656 const double f_3 = f[INDEX2(i,3,numComp)];
1657 const double f_4 = f[INDEX2(i,4,numComp)];
1658 const double f_5 = f[INDEX2(i,5,numComp)];
1659 const double f_6 = f[INDEX2(i,6,numComp)];
1660 const double f_7 = f[INDEX2(i,7,numComp)];
1661 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
1662 } // end of component loop i
1663 } // end of k0 loop
1664 } // end of k1 loop
1665 } // end of k2 loop
1666
1667 #pragma omp critical
1668 for (index_t i=0; i<numComp; i++)
1669 integrals[i]+=int_local[i];
1670 } // end of parallel section
1671
1672 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1673 const double w_0 = m_dx[0]*m_dx[1]*m_dx[2];
1674 #pragma omp parallel
1675 {
1676 vector<double> int_local(numComp, 0);
1677 #pragma omp for nowait
1678 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1679 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1680 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1681 const double* f = arg.getSampleDataRO(INDEX3(k0, k1, k2, m_NE[0], m_NE[1]));
1682 for (index_t i=0; i < numComp; ++i) {
1683 int_local[i]+=f[i]*w_0;
1684 } // end of component loop i
1685 } // end of k0 loop
1686 } // end of k1 loop
1687 } // end of k2 loop
1688
1689 #pragma omp critical
1690 for (index_t i=0; i<numComp; i++)
1691 integrals[i]+=int_local[i];
1692 } // end of parallel section
1693
1694 } else if (fs == FaceElements && arg.actsExpanded()) {
1695 const double w_0 = m_dx[1]*m_dx[2]/4.;
1696 const double w_1 = m_dx[0]*m_dx[2]/4.;
1697 const double w_2 = m_dx[0]*m_dx[1]/4.;
1698 #pragma omp parallel
1699 {
1700 vector<double> int_local(numComp, 0);
1701 if (m_faceOffset[0] > -1) {
1702 #pragma omp for nowait
1703 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1704 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1705 const double* f = arg.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1706 for (index_t i=0; i < numComp; ++i) {
1707 const double f_0 = f[INDEX2(i,0,numComp)];
1708 const double f_1 = f[INDEX2(i,1,numComp)];
1709 const double f_2 = f[INDEX2(i,2,numComp)];
1710 const double f_3 = f[INDEX2(i,3,numComp)];
1711 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
1712 } // end of component loop i
1713 } // end of k1 loop
1714 } // end of k2 loop
1715 }
1716
1717 if (m_faceOffset[1] > -1) {
1718 #pragma omp for nowait
1719 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1720 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1721 const double* f = arg.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1722 for (index_t i=0; i < numComp; ++i) {
1723 const double f_0 = f[INDEX2(i,0,numComp)];
1724 const double f_1 = f[INDEX2(i,1,numComp)];
1725 const double f_2 = f[INDEX2(i,2,numComp)];
1726 const double f_3 = f[INDEX2(i,3,numComp)];
1727 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
1728 } // end of component loop i
1729 } // end of k1 loop
1730 } // end of k2 loop
1731 }
1732
1733 if (m_faceOffset[2] > -1) {
1734 #pragma omp for nowait
1735 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1736 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1737 const double* f = arg.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1738 for (index_t i=0; i < numComp; ++i) {
1739 const double f_0 = f[INDEX2(i,0,numComp)];
1740 const double f_1 = f[INDEX2(i,1,numComp)];
1741 const double f_2 = f[INDEX2(i,2,numComp)];
1742 const double f_3 = f[INDEX2(i,3,numComp)];
1743 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
1744 } // end of component loop i
1745 } // end of k1 loop
1746 } // end of k2 loop
1747 }
1748
1749 if (m_faceOffset[3] > -1) {
1750 #pragma omp for nowait
1751 for (index_t k2 = front; k2 < front+m_ownNE[2]; ++k2) {
1752 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1753 const double* f = arg.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1754 for (index_t i=0; i < numComp; ++i) {
1755 const double f_0 = f[INDEX2(i,0,numComp)];
1756 const double f_1 = f[INDEX2(i,1,numComp)];
1757 const double f_2 = f[INDEX2(i,2,numComp)];
1758 const double f_3 = f[INDEX2(i,3,numComp)];
1759 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
1760 } // end of component loop i
1761 } // end of k1 loop
1762 } // end of k2 loop
1763 }
1764
1765 if (m_faceOffset[4] > -1) {
1766 #pragma omp for nowait
1767 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1768 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1769 const double* f = arg.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1770 for (index_t i=0; i < numComp; ++i) {
1771 const double f_0 = f[INDEX2(i,0,numComp)];
1772 const double f_1 = f[INDEX2(i,1,numComp)];
1773 const double f_2 = f[INDEX2(i,2,numComp)];
1774 const double f_3 = f[INDEX2(i,3,numComp)];
1775 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
1776 } // end of component loop i
1777 } // end of k1 loop
1778 } // end of k2 loop
1779 }
1780
1781 if (m_faceOffset[5] > -1) {
1782 #pragma omp for nowait
1783 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1784 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1785 const double* f = arg.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1786 for (index_t i=0; i < numComp; ++i) {
1787 const double f_0 = f[INDEX2(i,0,numComp)];
1788 const double f_1 = f[INDEX2(i,1,numComp)];
1789 const double f_2 = f[INDEX2(i,2,numComp)];
1790 const double f_3 = f[INDEX2(i,3,numComp)];
1791 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
1792 } // end of component loop i
1793 } // end of k1 loop
1794 } // end of k2 loop
1795 }
1796
1797 #pragma omp critical
1798 for (index_t i=0; i<numComp; i++)
1799 integrals[i]+=int_local[i];
1800 } // end of parallel section
1801
1802 } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded()))