/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5084 - (hide annotations)
Sun Jun 29 23:29:51 2014 UTC (4 years, 8 months ago) by caltinay
File size: 165487 byte(s)
Fast forward to latest trunk which has had an impressive number of changes...

1 caltinay 3691
2 jfenwick 3981 /*****************************************************************************
3 caltinay 3691 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 caltinay 3691 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 caltinay 3691
17     #include <ripley/Brick.h>
18 sshaw 4622 #include <ripley/DefaultAssembler3D.h>
19 sshaw 4629 #include <ripley/WaveAssembler3D.h>
20 sshaw 4712 #include <ripley/LameAssembler3D.h>
21 caltinay 5084 #include <ripley/blocktools.h>
22 sshaw 4712 #include <ripley/domainhelpers.h>
23 caltinay 5084 #include <esysUtils/esysFileWriter.h>
24     #include <esysUtils/EsysRandom.h>
25     #include <paso/SystemMatrix.h>
26    
27 caltinay 4477 #include <boost/scoped_array.hpp>
28    
29 caltinay 4013 #ifdef USE_NETCDF
30     #include <netcdfcpp.h>
31     #endif
32    
33 caltinay 3691 #if USE_SILO
34     #include <silo.h>
35     #ifdef ESYS_MPI
36     #include <pmpio.h>
37     #endif
38     #endif
39    
40     #include <iomanip>
41 caltinay 5084 #include <limits>
42 caltinay 3691
43 jfenwick 4650
44 caltinay 3691 using namespace std;
45 caltinay 4334 using esysUtils::FileWriter;
46 caltinay 3691
47     namespace ripley {
48    
49 sshaw 4712 int indexOfMax(int a, int b, int c) {
50     if (a > b) {
51     if (c > a) {
52     return 2;
53     }
54     return 0;
55     } else if (b > c) {
56     return 1;
57     }
58     return 2;
59     }
60    
61 caltinay 3781 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
62 sshaw 4622 double x1, double y1, double z1, int d0, int d1, int d2,
63     const std::vector<double>& points, const std::vector<int>& tags,
64 jfenwick 4934 const simap_t& tagnamestonums,
65     escript::SubWorld_ptr w) :
66     RipleyDomain(3, w)
67 caltinay 3691 {
68 sshaw 4861 if (static_cast<long>(n0 + 1) * static_cast<long>(n1 + 1)
69     * static_cast<long>(n2 + 1) > std::numeric_limits<int>::max())
70     throw RipleyException("The number of elements has overflowed, this "
71     "limit may be raised in future releases.");
72    
73 sshaw 4851 if (n0 <= 0 || n1 <= 0 || n2 <= 0)
74     throw RipleyException("Number of elements in each spatial dimension "
75     "must be positive");
76    
77 caltinay 3943 // ignore subdivision parameters for serial run
78     if (m_mpiInfo->size == 1) {
79     d0=1;
80     d1=1;
81     d2=1;
82     }
83     bool warn=false;
84 sshaw 4712
85     std::vector<int> factors;
86     int ranks = m_mpiInfo->size;
87     int epr[3] = {n0,n1,n2};
88     int d[3] = {d0,d1,d2};
89     if (d0<=0 || d1<=0 || d2<=0) {
90     for (int i = 0; i < 3; i++) {
91     if (d[i] < 1) {
92     d[i] = 1;
93     continue;
94 caltinay 3943 }
95 sshaw 4712 epr[i] = -1; // can no longer be max
96     if (ranks % d[i] != 0) {
97     throw RipleyException("Invalid number of spatial subdivisions");
98 caltinay 3943 }
99 sshaw 4712 //remove
100     ranks /= d[i];
101 caltinay 3943 }
102 sshaw 4712 factorise(factors, ranks);
103     if (factors.size() != 0) {
104     warn = true;
105 caltinay 3943 }
106 sshaw 4712 }
107     while (factors.size() > 0) {
108     int i = indexOfMax(epr[0],epr[1],epr[2]);
109     int f = factors.back();
110     factors.pop_back();
111     d[i] *= f;
112     epr[i] /= f;
113 caltinay 3943 }
114 sshaw 4712 d0 = d[0]; d1 = d[1]; d2 = d[2];
115 caltinay 3943
116 caltinay 3691 // ensure number of subdivisions is valid and nodes can be distributed
117     // among number of ranks
118 sshaw 4712 if (d0*d1*d2 != m_mpiInfo->size){
119 caltinay 3691 throw RipleyException("Invalid number of spatial subdivisions");
120 sshaw 4712 }
121 caltinay 3943 if (warn) {
122     cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
123     << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
124     }
125 caltinay 3691
126 caltinay 4334 double l0 = x1-x0;
127     double l1 = y1-y0;
128     double l2 = z1-z0;
129     m_dx[0] = l0/n0;
130     m_dx[1] = l1/n1;
131     m_dx[2] = l2/n2;
132    
133     if ((n0+1)%d0 > 0) {
134 caltinay 3943 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
135 caltinay 4334 l0=m_dx[0]*n0;
136 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N0="
137 caltinay 4334 << n0 << ", l0=" << l0 << endl;
138 caltinay 3943 }
139 caltinay 4334 if ((n1+1)%d1 > 0) {
140 caltinay 3943 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
141 caltinay 4334 l1=m_dx[1]*n1;
142 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N1="
143 caltinay 4334 << n1 << ", l1=" << l1 << endl;
144 caltinay 3943 }
145 caltinay 4334 if ((n2+1)%d2 > 0) {
146 caltinay 3943 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
147 caltinay 4334 l2=m_dx[2]*n2;
148 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N2="
149 caltinay 4334 << n2 << ", l2=" << l2 << endl;
150 caltinay 3943 }
151    
152 caltinay 4334 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
153 caltinay 3753 throw RipleyException("Too few elements for the number of ranks");
154    
155 caltinay 4334 m_gNE[0] = n0;
156     m_gNE[1] = n1;
157     m_gNE[2] = n2;
158     m_origin[0] = x0;
159     m_origin[1] = y0;
160     m_origin[2] = z0;
161     m_length[0] = l0;
162     m_length[1] = l1;
163     m_length[2] = l2;
164     m_NX[0] = d0;
165     m_NX[1] = d1;
166     m_NX[2] = d2;
167    
168 caltinay 3753 // local number of elements (including overlap)
169 caltinay 4334 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
170     if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
171     m_NE[0]++;
172     else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
173     m_ownNE[0]--;
174 caltinay 3764
175 caltinay 4334 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
176     if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
177     m_NE[1]++;
178     else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
179     m_ownNE[1]--;
180 caltinay 3764
181 caltinay 4334 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
182     if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
183     m_NE[2]++;
184     else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
185     m_ownNE[2]--;
186 caltinay 3753
187     // local number of nodes
188 caltinay 4334 m_NN[0] = m_NE[0]+1;
189     m_NN[1] = m_NE[1]+1;
190     m_NN[2] = m_NE[2]+1;
191 caltinay 3753
192 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
193 caltinay 4334 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
194     if (m_offset[0] > 0)
195     m_offset[0]--;
196     m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
197     if (m_offset[1] > 0)
198     m_offset[1]--;
199     m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
200     if (m_offset[2] > 0)
201     m_offset[2]--;
202 caltinay 3753
203 caltinay 3691 populateSampleIds();
204 caltinay 3756 createPattern();
205 sshaw 4622
206 sshaw 4629 for (map<string, int>::const_iterator i = tagnamestonums.begin();
207     i != tagnamestonums.end(); i++) {
208     setTagMap(i->first, i->second);
209     }
210 sshaw 4622 addPoints(tags.size(), &points[0], &tags[0]);
211 caltinay 3691 }
212    
213    
214     Brick::~Brick()
215     {
216     }
217    
218     string Brick::getDescription() const
219     {
220     return "ripley::Brick";
221     }
222    
223     bool Brick::operator==(const AbstractDomain& other) const
224     {
225 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
226     if (o) {
227     return (RipleyDomain::operator==(other) &&
228 caltinay 4334 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
229     && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
230     && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
231     && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
232 caltinay 3744 }
233 caltinay 3691
234     return false;
235     }
236    
237 caltinay 4013 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
238 caltinay 4618 const ReaderParameters& params) const
239 caltinay 4013 {
240     #ifdef USE_NETCDF
241     // check destination function space
242     int myN0, myN1, myN2;
243     if (out.getFunctionSpace().getTypeCode() == Nodes) {
244 caltinay 4334 myN0 = m_NN[0];
245     myN1 = m_NN[1];
246     myN2 = m_NN[2];
247 caltinay 4013 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
248     out.getFunctionSpace().getTypeCode() == ReducedElements) {
249 caltinay 4334 myN0 = m_NE[0];
250     myN1 = m_NE[1];
251     myN2 = m_NE[2];
252 caltinay 4013 } else
253     throw RipleyException("readNcGrid(): invalid function space for output data object");
254    
255 caltinay 4615 if (params.first.size() != 3)
256 caltinay 4013 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
257    
258 caltinay 4615 if (params.numValues.size() != 3)
259 caltinay 4013 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
260    
261 caltinay 4615 if (params.multiplier.size() != 3)
262 caltinay 4277 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
263 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
264     if (params.multiplier[i]<1)
265 caltinay 4277 throw RipleyException("readNcGrid(): all multipliers must be positive");
266    
267 caltinay 4013 // check file existence and size
268     NcFile f(filename.c_str(), NcFile::ReadOnly);
269     if (!f.is_valid())
270     throw RipleyException("readNcGrid(): cannot open file");
271    
272     NcVar* var = f.get_var(varname.c_str());
273     if (!var)
274     throw RipleyException("readNcGrid(): invalid variable name");
275    
276     // TODO: rank>0 data support
277     const int numComp = out.getDataPointSize();
278     if (numComp > 1)
279     throw RipleyException("readNcGrid(): only scalar data supported");
280    
281     const int dims = var->num_dims();
282 caltinay 4477 boost::scoped_array<long> edges(var->edges());
283 caltinay 4013
284     // is this a slice of the data object (dims!=3)?
285     // note the expected ordering of edges (as in numpy: z,y,x)
286 caltinay 4615 if ( (dims==3 && (params.numValues[2] > edges[0] ||
287     params.numValues[1] > edges[1] ||
288     params.numValues[0] > edges[2]))
289     || (dims==2 && params.numValues[2]>1)
290     || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
291 caltinay 4013 throw RipleyException("readNcGrid(): not enough data in file");
292     }
293    
294     // check if this rank contributes anything
295 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
296     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
297     params.first[1] >= m_offset[1]+myN1 ||
298     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
299     params.first[2] >= m_offset[2]+myN2 ||
300     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
301 caltinay 4013 return;
302     }
303    
304     // now determine how much this rank has to write
305    
306     // first coordinates in data object to write to
307 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
308     const int first1 = max(0, params.first[1]-m_offset[1]);
309     const int first2 = max(0, params.first[2]-m_offset[2]);
310 caltinay 4618 // indices to first value in file (not accounting for reverse yet)
311     int idx0 = max(0, m_offset[0]-params.first[0]);
312     int idx1 = max(0, m_offset[1]-params.first[1]);
313     int idx2 = max(0, m_offset[2]-params.first[2]);
314 caltinay 4277 // number of values to read
315 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
316     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
317     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
318 caltinay 4013
319 caltinay 4618 // make sure we read the right block if going backwards through file
320     if (params.reverse[0])
321     idx0 = edges[dims-1]-num0-idx0;
322     if (dims>1 && params.reverse[1])
323     idx1 = edges[dims-2]-num1-idx1;
324     if (dims>2 && params.reverse[2])
325     idx2 = edges[dims-3]-num2-idx2;
326    
327    
328 caltinay 4013 vector<double> values(num0*num1*num2);
329     if (dims==3) {
330     var->set_cur(idx2, idx1, idx0);
331     var->get(&values[0], num2, num1, num0);
332     } else if (dims==2) {
333     var->set_cur(idx1, idx0);
334     var->get(&values[0], num1, num0);
335     } else {
336     var->set_cur(idx0);
337     var->get(&values[0], num0);
338     }
339    
340     const int dpp = out.getNumDataPointsPerSample();
341     out.requireWrite();
342    
343 caltinay 4618 // helpers for reversing
344     const int x0 = (params.reverse[0] ? num0-1 : 0);
345     const int x_mult = (params.reverse[0] ? -1 : 1);
346     const int y0 = (params.reverse[1] ? num1-1 : 0);
347     const int y_mult = (params.reverse[1] ? -1 : 1);
348     const int z0 = (params.reverse[2] ? num2-1 : 0);
349     const int z_mult = (params.reverse[2] ? -1 : 1);
350    
351 caltinay 4013 for (index_t z=0; z<num2; z++) {
352     for (index_t y=0; y<num1; y++) {
353     #pragma omp parallel for
354     for (index_t x=0; x<num0; x++) {
355 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
356     +(first1+y*params.multiplier[1])*myN0
357     +(first2+z*params.multiplier[2])*myN0*myN1;
358 caltinay 4618 const int srcIndex=(z0+z_mult*z)*num1*num0
359     +(y0+y_mult*y)*num0
360     +(x0+x_mult*x);
361 caltinay 4174 if (!isnan(values[srcIndex])) {
362 caltinay 4615 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
363     for (index_t m1=0; m1<params.multiplier[1]; m1++) {
364     for (index_t m0=0; m0<params.multiplier[0]; m0++) {
365 caltinay 4277 const int dataIndex = baseIndex+m0
366     +m1*myN0
367     +m2*myN0*myN1;
368     double* dest = out.getSampleDataRW(dataIndex);
369     for (index_t q=0; q<dpp; q++) {
370     *dest++ = values[srcIndex];
371     }
372     }
373     }
374 caltinay 4174 }
375 caltinay 4013 }
376     }
377     }
378     }
379     #else
380     throw RipleyException("readNcGrid(): not compiled with netCDF support");
381     #endif
382     }
383    
384 sshaw 4738 #ifdef USE_BOOSTIO
385     void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
386     const ReaderParameters& params) const
387     {
388     // the mapping is not universally correct but should work on our
389     // supported platforms
390     switch (params.dataType) {
391     case DATATYPE_INT32:
392     readBinaryGridZippedImpl<int>(out, filename, params);
393     break;
394     case DATATYPE_FLOAT32:
395     readBinaryGridZippedImpl<float>(out, filename, params);
396     break;
397     case DATATYPE_FLOAT64:
398     readBinaryGridZippedImpl<double>(out, filename, params);
399     break;
400     default:
401     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
402     }
403     }
404     #endif
405    
406 caltinay 4334 void Brick::readBinaryGrid(escript::Data& out, string filename,
407 caltinay 4618 const ReaderParameters& params) const
408 caltinay 4334 {
409 caltinay 4495 // the mapping is not universally correct but should work on our
410     // supported platforms
411 caltinay 4615 switch (params.dataType) {
412 caltinay 4495 case DATATYPE_INT32:
413 caltinay 4615 readBinaryGridImpl<int>(out, filename, params);
414 caltinay 4495 break;
415     case DATATYPE_FLOAT32:
416 caltinay 4615 readBinaryGridImpl<float>(out, filename, params);
417 caltinay 4495 break;
418     case DATATYPE_FLOAT64:
419 caltinay 4615 readBinaryGridImpl<double>(out, filename, params);
420 caltinay 4495 break;
421     default:
422     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
423     }
424     }
425    
426     template<typename ValueType>
427     void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
428 caltinay 4618 const ReaderParameters& params) const
429 caltinay 4495 {
430 caltinay 4334 // check destination function space
431     int myN0, myN1, myN2;
432     if (out.getFunctionSpace().getTypeCode() == Nodes) {
433     myN0 = m_NN[0];
434     myN1 = m_NN[1];
435     myN2 = m_NN[2];
436     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
437     out.getFunctionSpace().getTypeCode() == ReducedElements) {
438     myN0 = m_NE[0];
439     myN1 = m_NE[1];
440     myN2 = m_NE[2];
441     } else
442     throw RipleyException("readBinaryGrid(): invalid function space for output data object");
443    
444 caltinay 4615 if (params.first.size() != 3)
445 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
446    
447 caltinay 4615 if (params.numValues.size() != 3)
448 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
449    
450 caltinay 4615 if (params.multiplier.size() != 3)
451 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
452 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
453     if (params.multiplier[i]<1)
454 caltinay 4334 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
455 caltinay 4988 if (params.reverse[0] != 0 || params.reverse[1] != 0)
456     throw RipleyException("readBinaryGrid(): reversing only supported in Z-direction currently");
457 caltinay 4334
458     // check file existence and size
459     ifstream f(filename.c_str(), ifstream::binary);
460     if (f.fail()) {
461     throw RipleyException("readBinaryGrid(): cannot open file");
462     }
463     f.seekg(0, ios::end);
464     const int numComp = out.getDataPointSize();
465     const int filesize = f.tellg();
466 caltinay 4615 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
467 caltinay 4334 if (filesize < reqsize) {
468     f.close();
469     throw RipleyException("readBinaryGrid(): not enough data in file");
470     }
471    
472     // check if this rank contributes anything
473 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
474     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
475     params.first[1] >= m_offset[1]+myN1 ||
476     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
477     params.first[2] >= m_offset[2]+myN2 ||
478     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
479 caltinay 4334 f.close();
480     return;
481     }
482    
483     // now determine how much this rank has to write
484    
485     // first coordinates in data object to write to
486 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
487     const int first1 = max(0, params.first[1]-m_offset[1]);
488     const int first2 = max(0, params.first[2]-m_offset[2]);
489 caltinay 4988 // indices to first value in file (not accounting for reverse yet)
490     int idx0 = max(0, m_offset[0]-params.first[0]);
491     int idx1 = max(0, m_offset[1]-params.first[1]);
492     int idx2 = max(0, m_offset[2]-params.first[2]);
493 caltinay 4334 // number of values to read
494 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
495     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
496     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
497 caltinay 4334
498 caltinay 4988 // make sure we read the right block if going backwards through file
499     if (params.reverse[2])
500     idx2 = params.numValues[2]-idx2-1;
501    
502     // helpers for reversing
503     const int z_mult = (params.reverse[2] ? -1 : 1);
504    
505 caltinay 4334 out.requireWrite();
506 caltinay 4495 vector<ValueType> values(num0*numComp);
507 caltinay 4334 const int dpp = out.getNumDataPointsPerSample();
508    
509 caltinay 4529 for (int z=0; z<num2; z++) {
510     for (int y=0; y<num1; y++) {
511 caltinay 4988 const int fileofs = numComp*(idx0 +
512     (idx1+y)*params.numValues[0] +
513     (idx2+z_mult*z)*params.numValues[0]*params.numValues[1]);
514 caltinay 4495 f.seekg(fileofs*sizeof(ValueType));
515     f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
516 caltinay 4334
517 caltinay 4529 for (int x=0; x<num0; x++) {
518 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
519     +(first1+y*params.multiplier[1])*myN0
520     +(first2+z*params.multiplier[2])*myN0*myN1;
521     for (int m2=0; m2<params.multiplier[2]; m2++) {
522     for (int m1=0; m1<params.multiplier[1]; m1++) {
523     for (int m0=0; m0<params.multiplier[0]; m0++) {
524 caltinay 4334 const int dataIndex = baseIndex+m0
525     +m1*myN0
526     +m2*myN0*myN1;
527     double* dest = out.getSampleDataRW(dataIndex);
528 caltinay 4529 for (int c=0; c<numComp; c++) {
529     ValueType val = values[x*numComp+c];
530    
531 caltinay 4615 if (params.byteOrder != BYTEORDER_NATIVE) {
532 caltinay 4529 char* cval = reinterpret_cast<char*>(&val);
533     // this will alter val!!
534 caltinay 5084 if (sizeof(ValueType)>4) {
535     byte_swap64(cval);
536     } else {
537     byte_swap32(cval);
538     }
539 caltinay 4529 }
540 caltinay 5084 if (!isnan(val)) {
541 caltinay 4529 for (int q=0; q<dpp; q++) {
542     *dest++ = static_cast<double>(val);
543 caltinay 4334 }
544     }
545     }
546     }
547     }
548     }
549     }
550     }
551     }
552    
553     f.close();
554     }
555    
556 sshaw 4738 #ifdef USE_BOOSTIO
557     template<typename ValueType>
558     void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
559     const ReaderParameters& params) const
560     {
561     // check destination function space
562     int myN0, myN1, myN2;
563     if (out.getFunctionSpace().getTypeCode() == Nodes) {
564     myN0 = m_NN[0];
565     myN1 = m_NN[1];
566     myN2 = m_NN[2];
567     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
568     out.getFunctionSpace().getTypeCode() == ReducedElements) {
569     myN0 = m_NE[0];
570     myN1 = m_NE[1];
571     myN2 = m_NE[2];
572     } else
573     throw RipleyException("readBinaryGridFromZipped(): invalid function space for output data object");
574    
575     if (params.first.size() != 3)
576     throw RipleyException("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
577    
578     if (params.numValues.size() != 3)
579     throw RipleyException("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
580    
581     if (params.multiplier.size() != 3)
582     throw RipleyException("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
583     for (size_t i=0; i<params.multiplier.size(); i++)
584     if (params.multiplier[i]<1)
585     throw RipleyException("readBinaryGridFromZipped(): all multipliers must be positive");
586    
587     // check file existence and size
588     ifstream f(filename.c_str(), ifstream::binary);
589     if (f.fail()) {
590     throw RipleyException("readBinaryGridFromZipped(): cannot open file");
591     }
592     f.seekg(0, ios::end);
593     const int numComp = out.getDataPointSize();
594     int filesize = f.tellg();
595     f.seekg(0, ios::beg);
596     std::vector<char> compressed(filesize);
597     f.read((char*)&compressed[0], filesize);
598     f.close();
599     std::vector<char> decompressed = unzip(compressed);
600     filesize = decompressed.size();
601     const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
602     if (filesize < reqsize) {
603     throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
604     }
605    
606     // check if this rank contributes anything
607     if (params.first[0] >= m_offset[0]+myN0 ||
608     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
609     params.first[1] >= m_offset[1]+myN1 ||
610     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
611     params.first[2] >= m_offset[2]+myN2 ||
612     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
613     return;
614     }
615    
616     // now determine how much this rank has to write
617    
618     // first coordinates in data object to write to
619     const int first0 = max(0, params.first[0]-m_offset[0]);
620     const int first1 = max(0, params.first[1]-m_offset[1]);
621     const int first2 = max(0, params.first[2]-m_offset[2]);
622     // indices to first value in file
623     const int idx0 = max(0, m_offset[0]-params.first[0]);
624     const int idx1 = max(0, m_offset[1]-params.first[1]);
625     const int idx2 = max(0, m_offset[2]-params.first[2]);
626     // number of values to read
627     const int num0 = min(params.numValues[0]-idx0, myN0-first0);
628     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
629     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
630    
631     out.requireWrite();
632     vector<ValueType> values(num0*numComp);
633     const int dpp = out.getNumDataPointsPerSample();
634    
635     for (int z=0; z<num2; z++) {
636     for (int y=0; y<num1; y++) {
637     const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
638     +(idx2+z)*params.numValues[0]*params.numValues[1]);
639     memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
640    
641     for (int x=0; x<num0; x++) {
642     const int baseIndex = first0+x*params.multiplier[0]
643     +(first1+y*params.multiplier[1])*myN0
644     +(first2+z*params.multiplier[2])*myN0*myN1;
645     for (int m2=0; m2<params.multiplier[2]; m2++) {
646     for (int m1=0; m1<params.multiplier[1]; m1++) {
647     for (int m0=0; m0<params.multiplier[0]; m0++) {
648     const int dataIndex = baseIndex+m0
649     +m1*myN0
650     +m2*myN0*myN1;
651     double* dest = out.getSampleDataRW(dataIndex);
652     for (int c=0; c<numComp; c++) {
653     ValueType val = values[x*numComp+c];
654    
655     if (params.byteOrder != BYTEORDER_NATIVE) {
656     char* cval = reinterpret_cast<char*>(&val);
657     // this will alter val!!
658     byte_swap32(cval);
659     }
660 caltinay 5084 if (!isnan(val)) {
661 sshaw 4738 for (int q=0; q<dpp; q++) {
662     *dest++ = static_cast<double>(val);
663     }
664     }
665     }
666     }
667     }
668     }
669     }
670     }
671     }
672     }
673     #endif
674    
675 caltinay 4357 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
676     int byteOrder, int dataType) const
677 caltinay 4334 {
678 caltinay 4357 // the mapping is not universally correct but should work on our
679     // supported platforms
680     switch (dataType) {
681     case DATATYPE_INT32:
682     writeBinaryGridImpl<int>(in, filename, byteOrder);
683     break;
684     case DATATYPE_FLOAT32:
685     writeBinaryGridImpl<float>(in, filename, byteOrder);
686     break;
687     case DATATYPE_FLOAT64:
688     writeBinaryGridImpl<double>(in, filename, byteOrder);
689     break;
690     default:
691     throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
692     }
693     }
694    
695     template<typename ValueType>
696     void Brick::writeBinaryGridImpl(const escript::Data& in,
697     const string& filename, int byteOrder) const
698     {
699 caltinay 4334 // check function space and determine number of points
700     int myN0, myN1, myN2;
701     int totalN0, totalN1, totalN2;
702     if (in.getFunctionSpace().getTypeCode() == Nodes) {
703     myN0 = m_NN[0];
704     myN1 = m_NN[1];
705     myN2 = m_NN[2];
706     totalN0 = m_gNE[0]+1;
707     totalN1 = m_gNE[1]+1;
708     totalN2 = m_gNE[2]+1;
709     } else if (in.getFunctionSpace().getTypeCode() == Elements ||
710     in.getFunctionSpace().getTypeCode() == ReducedElements) {
711     myN0 = m_NE[0];
712     myN1 = m_NE[1];
713     myN2 = m_NE[2];
714     totalN0 = m_gNE[0];
715     totalN1 = m_gNE[1];
716     totalN2 = m_gNE[2];
717     } else
718     throw RipleyException("writeBinaryGrid(): invalid function space of data object");
719    
720     const int numComp = in.getDataPointSize();
721     const int dpp = in.getNumDataPointsPerSample();
722 caltinay 4357 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
723 caltinay 4334
724     if (numComp > 1 || dpp > 1)
725     throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
726    
727     // from here on we know that each sample consists of one value
728 caltinay 4482 FileWriter fw;
729     fw.openFile(filename, fileSize);
730 caltinay 4334 MPIBarrier();
731    
732     for (index_t z=0; z<myN2; z++) {
733     for (index_t y=0; y<myN1; y++) {
734     const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
735 caltinay 4357 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
736 caltinay 4334 ostringstream oss;
737    
738     for (index_t x=0; x<myN0; x++) {
739 caltinay 4626 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
740 caltinay 4357 ValueType fvalue = static_cast<ValueType>(*sample);
741     if (byteOrder == BYTEORDER_NATIVE) {
742 caltinay 4334 oss.write((char*)&fvalue, sizeof(fvalue));
743     } else {
744     char* value = reinterpret_cast<char*>(&fvalue);
745 caltinay 5084 if (sizeof(fvalue)>4) {
746     byte_swap64(value);
747     } else {
748     byte_swap32(value);
749     }
750     oss.write(value, sizeof(fvalue));
751 caltinay 4334 }
752     }
753 caltinay 4482 fw.writeAt(oss, fileofs);
754 caltinay 4334 }
755     }
756 caltinay 4482 fw.close();
757 caltinay 4334 }
758    
759 caltinay 3691 void Brick::dump(const string& fileName) const
760     {
761     #if USE_SILO
762     string fn(fileName);
763     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
764     fn+=".silo";
765     }
766    
767     int driver=DB_HDF5;
768     string siloPath;
769     DBfile* dbfile = NULL;
770    
771     #ifdef ESYS_MPI
772     PMPIO_baton_t* baton = NULL;
773 gross 3793 const int NUM_SILO_FILES = 1;
774     const char* blockDirFmt = "/block%04d";
775 caltinay 3691 #endif
776    
777     if (m_mpiInfo->size > 1) {
778     #ifdef ESYS_MPI
779     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
780     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
781     PMPIO_DefaultClose, (void*)&driver);
782     // try the fallback driver in case of error
783     if (!baton && driver != DB_PDB) {
784     driver = DB_PDB;
785     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
786     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
787     PMPIO_DefaultClose, (void*)&driver);
788     }
789     if (baton) {
790     char str[64];
791     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
792     siloPath = str;
793     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
794     }
795     #endif
796     } else {
797     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
798     getDescription().c_str(), driver);
799     // try the fallback driver in case of error
800     if (!dbfile && driver != DB_PDB) {
801     driver = DB_PDB;
802     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
803     getDescription().c_str(), driver);
804     }
805     }
806    
807     if (!dbfile)
808     throw RipleyException("dump: Could not create Silo file");
809    
810     /*
811     if (driver==DB_HDF5) {
812     // gzip level 1 already provides good compression with minimal
813     // performance penalty. Some tests showed that gzip levels >3 performed
814     // rather badly on escript data both in terms of time and space
815     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
816     }
817     */
818    
819 caltinay 4334 boost::scoped_ptr<double> x(new double[m_NN[0]]);
820     boost::scoped_ptr<double> y(new double[m_NN[1]]);
821     boost::scoped_ptr<double> z(new double[m_NN[2]]);
822 caltinay 3691 double* coords[3] = { x.get(), y.get(), z.get() };
823 caltinay 5084 const int NN0 = m_NN[0];
824     const int NN1 = m_NN[1];
825     const int NN2 = m_NN[2];
826    
827 caltinay 3691 #pragma omp parallel
828     {
829     #pragma omp for
830 caltinay 5084 for (dim_t i0 = 0; i0 < NN0; i0++) {
831 caltinay 4334 coords[0][i0]=getLocalCoordinate(i0, 0);
832 caltinay 3691 }
833     #pragma omp for
834 caltinay 5084 for (dim_t i1 = 0; i1 < NN1; i1++) {
835 caltinay 4334 coords[1][i1]=getLocalCoordinate(i1, 1);
836 caltinay 3691 }
837     #pragma omp for
838 caltinay 5084 for (dim_t i2 = 0; i2 < NN2; i2++) {
839 caltinay 4334 coords[2][i2]=getLocalCoordinate(i2, 2);
840 caltinay 3691 }
841     }
842 caltinay 4334 int* dims = const_cast<int*>(getNumNodesPerDim());
843    
844     // write mesh
845     DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
846 caltinay 3691 DB_COLLINEAR, NULL);
847    
848 caltinay 4334 // write node ids
849     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
850 caltinay 3698 NULL, 0, DB_INT, DB_NODECENT, NULL);
851 caltinay 3691
852 caltinay 3698 // write element ids
853 caltinay 4334 dims = const_cast<int*>(getNumElementsPerDim());
854 caltinay 3698 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
855 caltinay 4334 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
856 caltinay 3698
857     // rank 0 writes multimesh and multivar
858 caltinay 3691 if (m_mpiInfo->rank == 0) {
859     vector<string> tempstrings;
860     vector<char*> names;
861     for (dim_t i=0; i<m_mpiInfo->size; i++) {
862     stringstream path;
863     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
864     tempstrings.push_back(path.str());
865     names.push_back((char*)tempstrings.back().c_str());
866     }
867     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
868     DBSetDir(dbfile, "/");
869     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
870     &types[0], NULL);
871     tempstrings.clear();
872     names.clear();
873     for (dim_t i=0; i<m_mpiInfo->size; i++) {
874     stringstream path;
875     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
876     tempstrings.push_back(path.str());
877     names.push_back((char*)tempstrings.back().c_str());
878     }
879     types.assign(m_mpiInfo->size, DB_QUADVAR);
880     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
881     &types[0], NULL);
882 caltinay 3698 tempstrings.clear();
883     names.clear();
884     for (dim_t i=0; i<m_mpiInfo->size; i++) {
885     stringstream path;
886     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
887     tempstrings.push_back(path.str());
888     names.push_back((char*)tempstrings.back().c_str());
889     }
890     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
891     &types[0], NULL);
892 caltinay 3691 }
893    
894     if (m_mpiInfo->size > 1) {
895     #ifdef ESYS_MPI
896     PMPIO_HandOffBaton(baton, dbfile);
897     PMPIO_Finish(baton);
898     #endif
899     } else {
900     DBClose(dbfile);
901     }
902    
903     #else // USE_SILO
904 caltinay 3791 throw RipleyException("dump: no Silo support");
905 caltinay 3691 #endif
906     }
907    
908     const int* Brick::borrowSampleReferenceIDs(int fsType) const
909     {
910 caltinay 3697 switch (fsType) {
911     case Nodes:
912 caltinay 3748 case ReducedNodes: //FIXME: reduced
913 caltinay 3753 return &m_nodeId[0];
914 caltinay 3757 case DegreesOfFreedom:
915     case ReducedDegreesOfFreedom: //FIXME: reduced
916 caltinay 3753 return &m_dofId[0];
917 caltinay 3697 case Elements:
918 caltinay 3733 case ReducedElements:
919 caltinay 3697 return &m_elementId[0];
920 caltinay 3757 case FaceElements:
921 caltinay 3733 case ReducedFaceElements:
922 caltinay 3697 return &m_faceId[0];
923 sshaw 4660 case Points:
924     return &m_diracPointNodeIDs[0];
925 caltinay 3697 default:
926     break;
927     }
928 caltinay 3691
929 caltinay 3697 stringstream msg;
930 caltinay 3791 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
931 caltinay 3697 throw RipleyException(msg.str());
932 caltinay 3691 }
933    
934 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
935 caltinay 3691 {
936 caltinay 3759 if (getMPISize()==1)
937     return true;
938    
939 caltinay 3757 switch (fsType) {
940     case Nodes:
941     case ReducedNodes: //FIXME: reduced
942     return (m_dofMap[id] < getNumDOF());
943     case DegreesOfFreedom:
944     case ReducedDegreesOfFreedom:
945     return true;
946     case Elements:
947     case ReducedElements:
948     {
949     // check ownership of element's _last_ node
950 caltinay 4334 const index_t x=id%m_NE[0] + 1;
951     const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
952     const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
953     return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
954 caltinay 3757 }
955     case FaceElements:
956     case ReducedFaceElements:
957 caltinay 3759 {
958     // check ownership of face element's last node
959     dim_t n=0;
960 caltinay 4334 for (size_t i=0; i<6; i++) {
961     n+=m_faceCount[i];
962 caltinay 3759 if (id<n) {
963 caltinay 4334 const index_t j=id-n+m_faceCount[i];
964 caltinay 3759 if (i>=4) { // front or back
965 caltinay 4334 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
966     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
967 caltinay 3759 } else if (i>=2) { // bottom or top
968 caltinay 4334 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
969     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
970 caltinay 3759 } else { // left or right
971 caltinay 4334 const index_t first=(i==0 ? 0 : m_NN[0]-1);
972     return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
973 caltinay 3759 }
974     }
975     }
976     return false;
977     }
978 caltinay 3757 default:
979     break;
980 caltinay 3753 }
981 caltinay 3757
982     stringstream msg;
983 caltinay 3791 msg << "ownSample: invalid function space type " << fsType;
984 caltinay 3757 throw RipleyException(msg.str());
985 caltinay 3691 }
986    
987 caltinay 3764 void Brick::setToNormal(escript::Data& out) const
988 caltinay 3703 {
989 caltinay 5084 const int NE0 = m_NE[0];
990     const int NE1 = m_NE[1];
991     const int NE2 = m_NE[2];
992    
993 caltinay 3764 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
994     out.requireWrite();
995     #pragma omp parallel
996     {
997     if (m_faceOffset[0] > -1) {
998     #pragma omp for nowait
999 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1000     for (index_t k1 = 0; k1 < NE1; ++k1) {
1001 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1002 caltinay 3764 // set vector at four quadrature points
1003     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1004     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1005     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1006     *o++ = -1.; *o++ = 0.; *o = 0.;
1007     }
1008     }
1009     }
1010    
1011     if (m_faceOffset[1] > -1) {
1012     #pragma omp for nowait
1013 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1014     for (index_t k1 = 0; k1 < NE1; ++k1) {
1015 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1016 caltinay 3764 // set vector at four quadrature points
1017     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1018     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1019     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1020     *o++ = 1.; *o++ = 0.; *o = 0.;
1021     }
1022     }
1023     }
1024    
1025     if (m_faceOffset[2] > -1) {
1026     #pragma omp for nowait
1027 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1028     for (index_t k0 = 0; k0 < NE0; ++k0) {
1029 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1030 caltinay 3764 // set vector at four quadrature points
1031     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1032     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1033     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1034     *o++ = 0.; *o++ = -1.; *o = 0.;
1035     }
1036     }
1037     }
1038    
1039     if (m_faceOffset[3] > -1) {
1040     #pragma omp for nowait
1041 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1042     for (index_t k0 = 0; k0 < NE0; ++k0) {
1043 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1044 caltinay 3764 // set vector at four quadrature points
1045     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1046     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1047     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1048     *o++ = 0.; *o++ = 1.; *o = 0.;
1049     }
1050     }
1051     }
1052    
1053     if (m_faceOffset[4] > -1) {
1054     #pragma omp for nowait
1055 caltinay 5084 for (index_t k1 = 0; k1 < NE1; ++k1) {
1056     for (index_t k0 = 0; k0 < NE0; ++k0) {
1057 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1058 caltinay 3764 // set vector at four quadrature points
1059     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1060     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1061     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1062     *o++ = 0.; *o++ = 0.; *o = -1.;
1063     }
1064     }
1065     }
1066    
1067     if (m_faceOffset[5] > -1) {
1068     #pragma omp for nowait
1069 caltinay 5084 for (index_t k1 = 0; k1 < NE1; ++k1) {
1070     for (index_t k0 = 0; k0 < NE0; ++k0) {
1071 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1072 caltinay 3764 // set vector at four quadrature points
1073     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1074     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1075     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1076     *o++ = 0.; *o++ = 0.; *o = 1.;
1077     }
1078     }
1079     }
1080     } // end of parallel section
1081     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1082     out.requireWrite();
1083     #pragma omp parallel
1084     {
1085     if (m_faceOffset[0] > -1) {
1086     #pragma omp for nowait
1087 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1088     for (index_t k1 = 0; k1 < NE1; ++k1) {
1089 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1090 caltinay 3764 *o++ = -1.;
1091     *o++ = 0.;
1092     *o = 0.;
1093     }
1094     }
1095     }
1096    
1097     if (m_faceOffset[1] > -1) {
1098     #pragma omp for nowait
1099 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1100     for (index_t k1 = 0; k1 < NE1; ++k1) {
1101 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1102 caltinay 3764 *o++ = 1.;
1103     *o++ = 0.;
1104     *o = 0.;
1105     }
1106     }
1107     }
1108    
1109     if (m_faceOffset[2] > -1) {
1110     #pragma omp for nowait
1111 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1112     for (index_t k0 = 0; k0 < NE0; ++k0) {
1113 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1114 caltinay 3764 *o++ = 0.;
1115     *o++ = -1.;
1116     *o = 0.;
1117     }
1118     }
1119     }
1120    
1121     if (m_faceOffset[3] > -1) {
1122     #pragma omp for nowait
1123 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1124     for (index_t k0 = 0; k0 < NE0; ++k0) {
1125 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1126 caltinay 3764 *o++ = 0.;
1127     *o++ = 1.;
1128     *o = 0.;
1129     }
1130     }
1131     }
1132    
1133     if (m_faceOffset[4] > -1) {
1134     #pragma omp for nowait
1135 caltinay 5084 for (index_t k1 = 0; k1 < NE1; ++k1) {
1136     for (index_t k0 = 0; k0 < NE0; ++k0) {
1137 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1138 caltinay 3764 *o++ = 0.;
1139     *o++ = 0.;
1140     *o = -1.;
1141     }
1142     }
1143     }
1144    
1145     if (m_faceOffset[5] > -1) {
1146     #pragma omp for nowait
1147 caltinay 5084 for (index_t k1 = 0; k1 < NE1; ++k1) {
1148     for (index_t k0 = 0; k0 < NE0; ++k0) {
1149 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1150 caltinay 3764 *o++ = 0.;
1151     *o++ = 0.;
1152     *o = 1.;
1153     }
1154     }
1155     }
1156     } // end of parallel section
1157    
1158     } else {
1159     stringstream msg;
1160 caltinay 3791 msg << "setToNormal: invalid function space type "
1161     << out.getFunctionSpace().getTypeCode();
1162 caltinay 3764 throw RipleyException(msg.str());
1163     }
1164     }
1165    
1166     void Brick::setToSize(escript::Data& out) const
1167     {
1168     if (out.getFunctionSpace().getTypeCode() == Elements
1169     || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1170     out.requireWrite();
1171     const dim_t numQuad=out.getNumDataPointsPerSample();
1172 caltinay 4334 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1173 caltinay 5084 const int NE = getNumElements();
1174 caltinay 3764 #pragma omp parallel for
1175 caltinay 5084 for (index_t k = 0; k < NE; ++k) {
1176 caltinay 3764 double* o = out.getSampleDataRW(k);
1177     fill(o, o+numQuad, size);
1178     }
1179     } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1180     || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1181     out.requireWrite();
1182     const dim_t numQuad=out.getNumDataPointsPerSample();
1183 caltinay 5084 const int NE0 = m_NE[0];
1184     const int NE1 = m_NE[1];
1185     const int NE2 = m_NE[2];
1186 caltinay 3764 #pragma omp parallel
1187     {
1188     if (m_faceOffset[0] > -1) {
1189 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1190 caltinay 3764 #pragma omp for nowait
1191 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1192     for (index_t k1 = 0; k1 < NE1; ++k1) {
1193 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1194 caltinay 3764 fill(o, o+numQuad, size);
1195     }
1196     }
1197     }
1198    
1199     if (m_faceOffset[1] > -1) {
1200 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1201 caltinay 3764 #pragma omp for nowait
1202 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1203     for (index_t k1 = 0; k1 < NE1; ++k1) {
1204 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1205 caltinay 3764 fill(o, o+numQuad, size);
1206     }
1207     }
1208     }
1209    
1210     if (m_faceOffset[2] > -1) {
1211 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1212 caltinay 3764 #pragma omp for nowait
1213 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1214     for (index_t k0 = 0; k0 < NE0; ++k0) {
1215 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1216 caltinay 3764 fill(o, o+numQuad, size);
1217     }
1218     }
1219     }
1220    
1221     if (m_faceOffset[3] > -1) {
1222 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1223 caltinay 3764 #pragma omp for nowait
1224 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1225     for (index_t k0 = 0; k0 < NE0; ++k0) {
1226 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1227 caltinay 3764 fill(o, o+numQuad, size);
1228     }
1229     }
1230     }
1231    
1232     if (m_faceOffset[4] > -1) {
1233 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1234 caltinay 3764 #pragma omp for nowait
1235 caltinay 5084 for (index_t k1 = 0; k1 < NE1; ++k1) {
1236     for (index_t k0 = 0; k0 < NE0; ++k0) {
1237 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1238 caltinay 3764 fill(o, o+numQuad, size);
1239     }
1240     }
1241     }
1242    
1243     if (m_faceOffset[5] > -1) {
1244 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1245 caltinay 3764 #pragma omp for nowait
1246 caltinay 5084 for (index_t k1 = 0; k1 < NE1; ++k1) {
1247     for (index_t k0 = 0; k0 < NE0; ++k0) {
1248 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1249 caltinay 3764 fill(o, o+numQuad, size);
1250     }
1251     }
1252     }
1253     } // end of parallel section
1254    
1255     } else {
1256     stringstream msg;
1257 caltinay 3791 msg << "setToSize: invalid function space type "
1258     << out.getFunctionSpace().getTypeCode();
1259 caltinay 3764 throw RipleyException(msg.str());
1260     }
1261     }
1262    
1263     void Brick::Print_Mesh_Info(const bool full) const
1264     {
1265     RipleyDomain::Print_Mesh_Info(full);
1266     if (full) {
1267     cout << " Id Coordinates" << endl;
1268     cout.precision(15);
1269     cout.setf(ios::scientific, ios::floatfield);
1270     for (index_t i=0; i < getNumNodes(); i++) {
1271     cout << " " << setw(5) << m_nodeId[i]
1272 caltinay 4334 << " " << getLocalCoordinate(i%m_NN[0], 0)
1273     << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1274     << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1275 caltinay 3764 }
1276     }
1277     }
1278    
1279    
1280     //protected
1281     void Brick::assembleCoordinates(escript::Data& arg) const
1282     {
1283     escriptDataC x = arg.getDataC();
1284     int numDim = m_numDim;
1285     if (!isDataPointShapeEqual(&x, 1, &numDim))
1286     throw RipleyException("setToX: Invalid Data object shape");
1287     if (!numSamplesEqual(&x, 1, getNumNodes()))
1288     throw RipleyException("setToX: Illegal number of samples in Data object");
1289    
1290 caltinay 5084 const int NN0 = m_NN[0];
1291     const int NN1 = m_NN[1];
1292     const int NN2 = m_NN[2];
1293 caltinay 3764 arg.requireWrite();
1294     #pragma omp parallel for
1295 caltinay 5084 for (dim_t i2 = 0; i2 < NN2; i2++) {
1296     for (dim_t i1 = 0; i1 < NN1; i1++) {
1297     for (dim_t i0 = 0; i0 < NN0; i0++) {
1298     double* point = arg.getSampleDataRW(i0+NN0*i1+NN0*NN1*i2);
1299 caltinay 4334 point[0] = getLocalCoordinate(i0, 0);
1300     point[1] = getLocalCoordinate(i1, 1);
1301     point[2] = getLocalCoordinate(i2, 2);
1302 caltinay 3764 }
1303     }
1304     }
1305     }
1306    
1307     //protected
1308 caltinay 4626 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1309 caltinay 3764 {
1310 caltinay 3703 const dim_t numComp = in.getDataPointSize();
1311 caltinay 3731 const double C0 = .044658198738520451079;
1312     const double C1 = .16666666666666666667;
1313     const double C2 = .21132486540518711775;
1314     const double C3 = .25;
1315     const double C4 = .5;
1316     const double C5 = .62200846792814621559;
1317     const double C6 = .78867513459481288225;
1318 caltinay 5084 const int NE0 = m_NE[0];
1319     const int NE1 = m_NE[1];
1320     const int NE2 = m_NE[2];
1321 caltinay 3731
1322 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
1323 caltinay 3760 out.requireWrite();
1324 caltinay 3913 #pragma omp parallel
1325     {
1326     vector<double> f_000(numComp);
1327     vector<double> f_001(numComp);
1328     vector<double> f_010(numComp);
1329     vector<double> f_011(numComp);
1330     vector<double> f_100(numComp);
1331     vector<double> f_101(numComp);
1332     vector<double> f_110(numComp);
1333     vector<double> f_111(numComp);
1334     #pragma omp for
1335 caltinay 5084 for (index_t k2=0; k2 < NE2; ++k2) {
1336     for (index_t k1=0; k1 < NE1; ++k1) {
1337     for (index_t k0=0; k0 < NE0; ++k0) {
1338 caltinay 4334 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1339     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1340     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1341     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1342     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1343     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1344     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1345     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1346 caltinay 5084 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,NE0,NE1));
1347 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1348 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1349     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1350     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1351     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1352     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1353     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1354     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1355     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1356     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1357     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1358     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1359     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1360 caltinay 3913 o[INDEX3(i,0,0,numComp,3)] = V0;
1361     o[INDEX3(i,1,0,numComp,3)] = V4;
1362     o[INDEX3(i,2,0,numComp,3)] = V8;
1363     o[INDEX3(i,0,1,numComp,3)] = V0;
1364     o[INDEX3(i,1,1,numComp,3)] = V5;
1365     o[INDEX3(i,2,1,numComp,3)] = V9;
1366     o[INDEX3(i,0,2,numComp,3)] = V1;
1367     o[INDEX3(i,1,2,numComp,3)] = V4;
1368     o[INDEX3(i,2,2,numComp,3)] = V10;
1369     o[INDEX3(i,0,3,numComp,3)] = V1;
1370     o[INDEX3(i,1,3,numComp,3)] = V5;
1371     o[INDEX3(i,2,3,numComp,3)] = V11;
1372     o[INDEX3(i,0,4,numComp,3)] = V2;
1373     o[INDEX3(i,1,4,numComp,3)] = V6;
1374     o[INDEX3(i,2,4,numComp,3)] = V8;
1375     o[INDEX3(i,0,5,numComp,3)] = V2;
1376     o[INDEX3(i,1,5,numComp,3)] = V7;
1377     o[INDEX3(i,2,5,numComp,3)] = V9;
1378     o[INDEX3(i,0,6,numComp,3)] = V3;
1379     o[INDEX3(i,1,6,numComp,3)] = V6;
1380     o[INDEX3(i,2,6,numComp,3)] = V10;
1381     o[INDEX3(i,0,7,numComp,3)] = V3;
1382     o[INDEX3(i,1,7,numComp,3)] = V7;
1383     o[INDEX3(i,2,7,numComp,3)] = V11;
1384     } // end of component loop i
1385     } // end of k0 loop
1386     } // end of k1 loop
1387     } // end of k2 loop
1388     } // end of parallel section
1389 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1390 caltinay 3760 out.requireWrite();
1391 caltinay 3913 #pragma omp parallel
1392     {
1393     vector<double> f_000(numComp);
1394     vector<double> f_001(numComp);
1395     vector<double> f_010(numComp);
1396     vector<double> f_011(numComp);
1397     vector<double> f_100(numComp);
1398     vector<double> f_101(numComp);
1399     vector<double> f_110(numComp);
1400     vector<double> f_111(numComp);
1401     #pragma omp for
1402 caltinay 5084 for (index_t k2=0; k2 < NE2; ++k2) {
1403     for (index_t k1=0; k1 < NE1; ++k1) {
1404     for (index_t k0=0; k0 < NE0; ++k0) {
1405 caltinay 4334 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1406     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1407     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1408     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1409     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1410     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1411     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1412     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1413 caltinay 5084 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,NE0,NE1));
1414 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1415 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1416     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1417     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1418 caltinay 3913 } // end of component loop i
1419     } // end of k0 loop
1420     } // end of k1 loop
1421     } // end of k2 loop
1422     } // end of parallel section
1423 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1424 caltinay 3760 out.requireWrite();
1425 caltinay 3722 #pragma omp parallel
1426     {
1427 caltinay 3913 vector<double> f_000(numComp);
1428     vector<double> f_001(numComp);
1429     vector<double> f_010(numComp);
1430     vector<double> f_011(numComp);
1431     vector<double> f_100(numComp);
1432     vector<double> f_101(numComp);
1433     vector<double> f_110(numComp);
1434     vector<double> f_111(numComp);
1435 caltinay 3722 if (m_faceOffset[0] > -1) {
1436     #pragma omp for nowait
1437 caltinay 5084 for (index_t k2=0; k2 < NE2; ++k2) {
1438     for (index_t k1=0; k1 < NE1; ++k1) {
1439 caltinay 4334 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1440     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1441     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1442     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1443     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1444     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1445     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1446     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1447 caltinay 5084 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,NE1));
1448 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1449 caltinay 4375 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1450     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1451     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1452     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1453     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1454 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1455     o[INDEX3(i,2,0,numComp,3)] = V2;
1456 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1457 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1458     o[INDEX3(i,2,1,numComp,3)] = V3;
1459 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1460 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1461     o[INDEX3(i,2,2,numComp,3)] = V2;
1462 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1463 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1464     o[INDEX3(i,2,3,numComp,3)] = V3;
1465 caltinay 3764 } // end of component loop i
1466     } // end of k1 loop
1467     } // end of k2 loop
1468     } // end of face 0
1469 caltinay 3722 if (m_faceOffset[1] > -1) {
1470     #pragma omp for nowait
1471 caltinay 5084 for (index_t k2=0; k2 < NE2; ++k2) {
1472     for (index_t k1=0; k1 < NE1; ++k1) {
1473 caltinay 4334 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1474     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1475     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1476     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1477     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1478     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1479     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1480     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1481 caltinay 5084 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,NE1));
1482 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1483 caltinay 4375 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1484     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1485     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1486     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1487     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1488 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1489     o[INDEX3(i,2,0,numComp,3)] = V2;
1490 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1491 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1492     o[INDEX3(i,2,1,numComp,3)] = V3;
1493 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1494 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1495     o[INDEX3(i,2,2,numComp,3)] = V2;
1496 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1497 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1498     o[INDEX3(i,2,3,numComp,3)] = V3;
1499 caltinay 3764 } // end of component loop i
1500     } // end of k1 loop
1501     } // end of k2 loop
1502     } // end of face 1
1503 caltinay 3722 if (m_faceOffset[2] > -1) {
1504     #pragma omp for nowait
1505 caltinay 5084 for (index_t k2=0; k2 < NE2; ++k2) {
1506     for (index_t k0=0; k0 < NE0; ++k0) {
1507 caltinay 4334 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1508     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1509     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1510     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1511     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1512     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1513     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1514     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1515 caltinay 5084 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,NE0));
1516 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1517 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1518     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1519     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1520 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1521 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1522 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V1;
1523     o[INDEX3(i,0,1,numComp,3)] = V0;
1524 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1525 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V2;
1526     o[INDEX3(i,0,2,numComp,3)] = V0;
1527 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1528 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V1;
1529     o[INDEX3(i,0,3,numComp,3)] = V0;
1530 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1531 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V2;
1532 caltinay 3764 } // end of component loop i
1533     } // end of k0 loop
1534     } // end of k2 loop
1535     } // end of face 2
1536 caltinay 3722 if (m_faceOffset[3] > -1) {
1537     #pragma omp for nowait
1538 caltinay 5084 for (index_t k2=0; k2 < NE2; ++k2) {
1539     for (index_t k0=0; k0 < NE0; ++k0) {
1540 caltinay 4334 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1541     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1542     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1543     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1544     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1545     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1546     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1547     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1548 caltinay 5084 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,NE0));
1549 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1550 caltinay 4375 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1551     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1552     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1553     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1554 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1555 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1556 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V2;
1557     o[INDEX3(i,0,1,numComp,3)] = V0;
1558 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1559 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V3;
1560     o[INDEX3(i,0,2,numComp,3)] = V1;
1561 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1562 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V2;
1563     o[INDEX3(i,0,3,numComp,3)] = V1;
1564 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1565 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V3;
1566 caltinay 3764 } // end of component loop i
1567     } // end of k0 loop
1568     } // end of k2 loop
1569     } // end of face 3
1570 caltinay 3722 if (m_faceOffset[4] > -1) {
1571     #pragma omp for nowait
1572 caltinay 5084 for (index_t k1=0; k1 < NE1; ++k1) {
1573     for (index_t k0=0; k0 < NE0; ++k0) {
1574 caltinay 4334 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1575     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1576     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1577     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1578     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1579     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1580     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1581     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1582 caltinay 5084 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,NE0));
1583 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1584 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1585     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1586     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1587     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1588 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1589     o[INDEX3(i,1,0,numComp,3)] = V2;
1590 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1591 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1592     o[INDEX3(i,1,1,numComp,3)] = V3;
1593 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1594 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1595     o[INDEX3(i,1,2,numComp,3)] = V2;
1596 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1597 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1598     o[INDEX3(i,1,3,numComp,3)] = V3;
1599 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1600 caltinay 3764 } // end of component loop i
1601     } // end of k0 loop
1602     } // end of k1 loop
1603     } // end of face 4
1604 caltinay 3722 if (m_faceOffset[5] > -1) {
1605     #pragma omp for nowait
1606 caltinay 5084 for (index_t k1=0; k1 < NE1; ++k1) {
1607     for (index_t k0=0; k0 < NE0; ++k0) {
1608 caltinay 4334 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1609     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1610     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1611     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1612     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1613     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1614     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1615     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1616 caltinay 5084 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,NE0));
1617 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1618 caltinay 4375 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];