/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4940 - (hide annotations)
Thu May 15 01:40:06 2014 UTC (4 years, 10 months ago) by caltinay
File size: 166324 byte(s)
A branch to have fun with diagonal storage for ripley.

1 caltinay 3691
2 jfenwick 3981 /*****************************************************************************
3 caltinay 3691 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 caltinay 3691 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 caltinay 3691
17 sshaw 4861 #include <limits>
18    
19 caltinay 3691 #include <ripley/Brick.h>
20 caltinay 3791 #include <paso/SystemMatrix.h>
21 caltinay 4334 #include <esysUtils/esysFileWriter.h>
22 sshaw 4622 #include <ripley/DefaultAssembler3D.h>
23 sshaw 4629 #include <ripley/WaveAssembler3D.h>
24 sshaw 4712 #include <ripley/LameAssembler3D.h>
25     #include <ripley/domainhelpers.h>
26 caltinay 4477 #include <boost/scoped_array.hpp>
27    
28 caltinay 4013 #ifdef USE_NETCDF
29     #include <netcdfcpp.h>
30     #endif
31    
32 caltinay 3691 #if USE_SILO
33     #include <silo.h>
34     #ifdef ESYS_MPI
35     #include <pmpio.h>
36     #endif
37     #endif
38    
39     #include <iomanip>
40    
41 jfenwick 4650 #include "esysUtils/EsysRandom.h"
42     #include "blocktools.h"
43    
44    
45 caltinay 3691 using namespace std;
46 caltinay 4334 using esysUtils::FileWriter;
47 caltinay 3691
48     namespace ripley {
49    
50 sshaw 4712 int indexOfMax(int a, int b, int c) {
51     if (a > b) {
52     if (c > a) {
53     return 2;
54     }
55     return 0;
56     } else if (b > c) {
57     return 1;
58     }
59     return 2;
60     }
61    
62 caltinay 3781 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
63 sshaw 4622 double x1, double y1, double z1, int d0, int d1, int d2,
64     const std::vector<double>& points, const std::vector<int>& tags,
65 jfenwick 4934 const simap_t& tagnamestonums,
66     escript::SubWorld_ptr w) :
67     RipleyDomain(3, w)
68 caltinay 3691 {
69 sshaw 4861 if (static_cast<long>(n0 + 1) * static_cast<long>(n1 + 1)
70     * static_cast<long>(n2 + 1) > std::numeric_limits<int>::max())
71     throw RipleyException("The number of elements has overflowed, this "
72     "limit may be raised in future releases.");
73    
74 sshaw 4851 if (n0 <= 0 || n1 <= 0 || n2 <= 0)
75     throw RipleyException("Number of elements in each spatial dimension "
76     "must be positive");
77    
78 caltinay 3943 // ignore subdivision parameters for serial run
79     if (m_mpiInfo->size == 1) {
80     d0=1;
81     d1=1;
82     d2=1;
83     }
84     bool warn=false;
85 sshaw 4712
86     std::vector<int> factors;
87     int ranks = m_mpiInfo->size;
88     int epr[3] = {n0,n1,n2};
89     int d[3] = {d0,d1,d2};
90     if (d0<=0 || d1<=0 || d2<=0) {
91     for (int i = 0; i < 3; i++) {
92     if (d[i] < 1) {
93     d[i] = 1;
94     continue;
95 caltinay 3943 }
96 sshaw 4712 epr[i] = -1; // can no longer be max
97     if (ranks % d[i] != 0) {
98     throw RipleyException("Invalid number of spatial subdivisions");
99 caltinay 3943 }
100 sshaw 4712 //remove
101     ranks /= d[i];
102 caltinay 3943 }
103 sshaw 4712 factorise(factors, ranks);
104     if (factors.size() != 0) {
105     warn = true;
106 caltinay 3943 }
107 sshaw 4712 }
108     while (factors.size() > 0) {
109     int i = indexOfMax(epr[0],epr[1],epr[2]);
110     int f = factors.back();
111     factors.pop_back();
112     d[i] *= f;
113     epr[i] /= f;
114 caltinay 3943 }
115 sshaw 4712 d0 = d[0]; d1 = d[1]; d2 = d[2];
116 caltinay 3943
117 caltinay 3691 // ensure number of subdivisions is valid and nodes can be distributed
118     // among number of ranks
119 sshaw 4712 if (d0*d1*d2 != m_mpiInfo->size){
120 caltinay 3691 throw RipleyException("Invalid number of spatial subdivisions");
121 sshaw 4712 }
122 caltinay 3943 if (warn) {
123     cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
124     << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
125     }
126 caltinay 3691
127 caltinay 4334 double l0 = x1-x0;
128     double l1 = y1-y0;
129     double l2 = z1-z0;
130     m_dx[0] = l0/n0;
131     m_dx[1] = l1/n1;
132     m_dx[2] = l2/n2;
133    
134     if ((n0+1)%d0 > 0) {
135 caltinay 3943 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
136 caltinay 4334 l0=m_dx[0]*n0;
137 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N0="
138 caltinay 4334 << n0 << ", l0=" << l0 << endl;
139 caltinay 3943 }
140 caltinay 4334 if ((n1+1)%d1 > 0) {
141 caltinay 3943 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
142 caltinay 4334 l1=m_dx[1]*n1;
143 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N1="
144 caltinay 4334 << n1 << ", l1=" << l1 << endl;
145 caltinay 3943 }
146 caltinay 4334 if ((n2+1)%d2 > 0) {
147 caltinay 3943 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
148 caltinay 4334 l2=m_dx[2]*n2;
149 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N2="
150 caltinay 4334 << n2 << ", l2=" << l2 << endl;
151 caltinay 3943 }
152    
153 caltinay 4334 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
154 caltinay 3753 throw RipleyException("Too few elements for the number of ranks");
155    
156 caltinay 4334 m_gNE[0] = n0;
157     m_gNE[1] = n1;
158     m_gNE[2] = n2;
159     m_origin[0] = x0;
160     m_origin[1] = y0;
161     m_origin[2] = z0;
162     m_length[0] = l0;
163     m_length[1] = l1;
164     m_length[2] = l2;
165     m_NX[0] = d0;
166     m_NX[1] = d1;
167     m_NX[2] = d2;
168    
169 caltinay 3753 // local number of elements (including overlap)
170 caltinay 4334 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
171     if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
172     m_NE[0]++;
173     else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
174     m_ownNE[0]--;
175 caltinay 3764
176 caltinay 4334 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
177     if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
178     m_NE[1]++;
179     else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
180     m_ownNE[1]--;
181 caltinay 3764
182 caltinay 4334 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
183     if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
184     m_NE[2]++;
185     else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
186     m_ownNE[2]--;
187 caltinay 3753
188     // local number of nodes
189 caltinay 4334 m_NN[0] = m_NE[0]+1;
190     m_NN[1] = m_NE[1]+1;
191     m_NN[2] = m_NE[2]+1;
192 caltinay 3753
193 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
194 caltinay 4334 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
195     if (m_offset[0] > 0)
196     m_offset[0]--;
197     m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
198     if (m_offset[1] > 0)
199     m_offset[1]--;
200     m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
201     if (m_offset[2] > 0)
202     m_offset[2]--;
203 caltinay 3753
204 caltinay 3691 populateSampleIds();
205 caltinay 3756 createPattern();
206 sshaw 4622
207     assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
208 sshaw 4629 for (map<string, int>::const_iterator i = tagnamestonums.begin();
209     i != tagnamestonums.end(); i++) {
210     setTagMap(i->first, i->second);
211     }
212 sshaw 4622 addPoints(tags.size(), &points[0], &tags[0]);
213 caltinay 3691 }
214    
215    
216     Brick::~Brick()
217     {
218 sshaw 4622 delete assembler;
219 caltinay 3691 }
220    
221     string Brick::getDescription() const
222     {
223     return "ripley::Brick";
224     }
225    
226     bool Brick::operator==(const AbstractDomain& other) const
227     {
228 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
229     if (o) {
230     return (RipleyDomain::operator==(other) &&
231 caltinay 4334 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
232     && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
233     && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
234     && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
235 caltinay 3744 }
236 caltinay 3691
237     return false;
238     }
239    
240 caltinay 4013 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
241 caltinay 4618 const ReaderParameters& params) const
242 caltinay 4013 {
243     #ifdef USE_NETCDF
244     // check destination function space
245     int myN0, myN1, myN2;
246     if (out.getFunctionSpace().getTypeCode() == Nodes) {
247 caltinay 4334 myN0 = m_NN[0];
248     myN1 = m_NN[1];
249     myN2 = m_NN[2];
250 caltinay 4013 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
251     out.getFunctionSpace().getTypeCode() == ReducedElements) {
252 caltinay 4334 myN0 = m_NE[0];
253     myN1 = m_NE[1];
254     myN2 = m_NE[2];
255 caltinay 4013 } else
256     throw RipleyException("readNcGrid(): invalid function space for output data object");
257    
258 caltinay 4615 if (params.first.size() != 3)
259 caltinay 4013 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
260    
261 caltinay 4615 if (params.numValues.size() != 3)
262 caltinay 4013 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
263    
264 caltinay 4615 if (params.multiplier.size() != 3)
265 caltinay 4277 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
266 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
267     if (params.multiplier[i]<1)
268 caltinay 4277 throw RipleyException("readNcGrid(): all multipliers must be positive");
269    
270 caltinay 4013 // check file existence and size
271     NcFile f(filename.c_str(), NcFile::ReadOnly);
272     if (!f.is_valid())
273     throw RipleyException("readNcGrid(): cannot open file");
274    
275     NcVar* var = f.get_var(varname.c_str());
276     if (!var)
277     throw RipleyException("readNcGrid(): invalid variable name");
278    
279     // TODO: rank>0 data support
280     const int numComp = out.getDataPointSize();
281     if (numComp > 1)
282     throw RipleyException("readNcGrid(): only scalar data supported");
283    
284     const int dims = var->num_dims();
285 caltinay 4477 boost::scoped_array<long> edges(var->edges());
286 caltinay 4013
287     // is this a slice of the data object (dims!=3)?
288     // note the expected ordering of edges (as in numpy: z,y,x)
289 caltinay 4615 if ( (dims==3 && (params.numValues[2] > edges[0] ||
290     params.numValues[1] > edges[1] ||
291     params.numValues[0] > edges[2]))
292     || (dims==2 && params.numValues[2]>1)
293     || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
294 caltinay 4013 throw RipleyException("readNcGrid(): not enough data in file");
295     }
296    
297     // check if this rank contributes anything
298 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
299     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
300     params.first[1] >= m_offset[1]+myN1 ||
301     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
302     params.first[2] >= m_offset[2]+myN2 ||
303     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
304 caltinay 4013 return;
305     }
306    
307     // now determine how much this rank has to write
308    
309     // first coordinates in data object to write to
310 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
311     const int first1 = max(0, params.first[1]-m_offset[1]);
312     const int first2 = max(0, params.first[2]-m_offset[2]);
313 caltinay 4618 // indices to first value in file (not accounting for reverse yet)
314     int idx0 = max(0, m_offset[0]-params.first[0]);
315     int idx1 = max(0, m_offset[1]-params.first[1]);
316     int idx2 = max(0, m_offset[2]-params.first[2]);
317 caltinay 4277 // number of values to read
318 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
319     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
320     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
321 caltinay 4013
322 caltinay 4618 // make sure we read the right block if going backwards through file
323     if (params.reverse[0])
324     idx0 = edges[dims-1]-num0-idx0;
325     if (dims>1 && params.reverse[1])
326     idx1 = edges[dims-2]-num1-idx1;
327     if (dims>2 && params.reverse[2])
328     idx2 = edges[dims-3]-num2-idx2;
329    
330    
331 caltinay 4013 vector<double> values(num0*num1*num2);
332     if (dims==3) {
333     var->set_cur(idx2, idx1, idx0);
334     var->get(&values[0], num2, num1, num0);
335     } else if (dims==2) {
336     var->set_cur(idx1, idx0);
337     var->get(&values[0], num1, num0);
338     } else {
339     var->set_cur(idx0);
340     var->get(&values[0], num0);
341     }
342    
343     const int dpp = out.getNumDataPointsPerSample();
344     out.requireWrite();
345    
346 caltinay 4618 // helpers for reversing
347     const int x0 = (params.reverse[0] ? num0-1 : 0);
348     const int x_mult = (params.reverse[0] ? -1 : 1);
349     const int y0 = (params.reverse[1] ? num1-1 : 0);
350     const int y_mult = (params.reverse[1] ? -1 : 1);
351     const int z0 = (params.reverse[2] ? num2-1 : 0);
352     const int z_mult = (params.reverse[2] ? -1 : 1);
353    
354 caltinay 4013 for (index_t z=0; z<num2; z++) {
355     for (index_t y=0; y<num1; y++) {
356     #pragma omp parallel for
357     for (index_t x=0; x<num0; x++) {
358 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
359     +(first1+y*params.multiplier[1])*myN0
360     +(first2+z*params.multiplier[2])*myN0*myN1;
361 caltinay 4618 const int srcIndex=(z0+z_mult*z)*num1*num0
362     +(y0+y_mult*y)*num0
363     +(x0+x_mult*x);
364 caltinay 4174 if (!isnan(values[srcIndex])) {
365 caltinay 4615 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
366     for (index_t m1=0; m1<params.multiplier[1]; m1++) {
367     for (index_t m0=0; m0<params.multiplier[0]; m0++) {
368 caltinay 4277 const int dataIndex = baseIndex+m0
369     +m1*myN0
370     +m2*myN0*myN1;
371     double* dest = out.getSampleDataRW(dataIndex);
372     for (index_t q=0; q<dpp; q++) {
373     *dest++ = values[srcIndex];
374     }
375     }
376     }
377 caltinay 4174 }
378 caltinay 4013 }
379     }
380     }
381     }
382     #else
383     throw RipleyException("readNcGrid(): not compiled with netCDF support");
384     #endif
385     }
386    
387 sshaw 4738 #ifdef USE_BOOSTIO
388     void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
389     const ReaderParameters& params) const
390     {
391     // the mapping is not universally correct but should work on our
392     // supported platforms
393     switch (params.dataType) {
394     case DATATYPE_INT32:
395     readBinaryGridZippedImpl<int>(out, filename, params);
396     break;
397     case DATATYPE_FLOAT32:
398     readBinaryGridZippedImpl<float>(out, filename, params);
399     break;
400     case DATATYPE_FLOAT64:
401     readBinaryGridZippedImpl<double>(out, filename, params);
402     break;
403     default:
404     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
405     }
406     }
407     #endif
408    
409 caltinay 4334 void Brick::readBinaryGrid(escript::Data& out, string filename,
410 caltinay 4618 const ReaderParameters& params) const
411 caltinay 4334 {
412 caltinay 4495 // the mapping is not universally correct but should work on our
413     // supported platforms
414 caltinay 4615 switch (params.dataType) {
415 caltinay 4495 case DATATYPE_INT32:
416 caltinay 4615 readBinaryGridImpl<int>(out, filename, params);
417 caltinay 4495 break;
418     case DATATYPE_FLOAT32:
419 caltinay 4615 readBinaryGridImpl<float>(out, filename, params);
420 caltinay 4495 break;
421     case DATATYPE_FLOAT64:
422 caltinay 4615 readBinaryGridImpl<double>(out, filename, params);
423 caltinay 4495 break;
424     default:
425     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
426     }
427     }
428    
429     template<typename ValueType>
430     void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
431 caltinay 4618 const ReaderParameters& params) const
432 caltinay 4495 {
433 caltinay 4334 // check destination function space
434     int myN0, myN1, myN2;
435     if (out.getFunctionSpace().getTypeCode() == Nodes) {
436     myN0 = m_NN[0];
437     myN1 = m_NN[1];
438     myN2 = m_NN[2];
439     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
440     out.getFunctionSpace().getTypeCode() == ReducedElements) {
441     myN0 = m_NE[0];
442     myN1 = m_NE[1];
443     myN2 = m_NE[2];
444     } else
445     throw RipleyException("readBinaryGrid(): invalid function space for output data object");
446    
447 caltinay 4615 if (params.first.size() != 3)
448 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
449    
450 caltinay 4615 if (params.numValues.size() != 3)
451 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
452    
453 caltinay 4615 if (params.multiplier.size() != 3)
454 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
455 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
456     if (params.multiplier[i]<1)
457 caltinay 4334 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
458    
459     // check file existence and size
460     ifstream f(filename.c_str(), ifstream::binary);
461     if (f.fail()) {
462     throw RipleyException("readBinaryGrid(): cannot open file");
463     }
464     f.seekg(0, ios::end);
465     const int numComp = out.getDataPointSize();
466     const int filesize = f.tellg();
467 caltinay 4615 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
468 caltinay 4334 if (filesize < reqsize) {
469     f.close();
470     throw RipleyException("readBinaryGrid(): not enough data in file");
471     }
472    
473     // check if this rank contributes anything
474 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
475     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
476     params.first[1] >= m_offset[1]+myN1 ||
477     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
478     params.first[2] >= m_offset[2]+myN2 ||
479     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
480 caltinay 4334 f.close();
481     return;
482     }
483    
484     // now determine how much this rank has to write
485    
486     // first coordinates in data object to write to
487 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
488     const int first1 = max(0, params.first[1]-m_offset[1]);
489     const int first2 = max(0, params.first[2]-m_offset[2]);
490 caltinay 4334 // indices to first value in file
491 caltinay 4615 const int idx0 = max(0, m_offset[0]-params.first[0]);
492     const int idx1 = max(0, m_offset[1]-params.first[1]);
493     const int idx2 = max(0, m_offset[2]-params.first[2]);
494 caltinay 4334 // number of values to read
495 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
496     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
497     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
498 caltinay 4334
499     out.requireWrite();
500 caltinay 4495 vector<ValueType> values(num0*numComp);
501 caltinay 4334 const int dpp = out.getNumDataPointsPerSample();
502    
503 caltinay 4529 for (int z=0; z<num2; z++) {
504     for (int y=0; y<num1; y++) {
505 caltinay 4615 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
506     +(idx2+z)*params.numValues[0]*params.numValues[1]);
507 caltinay 4495 f.seekg(fileofs*sizeof(ValueType));
508     f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
509 caltinay 4334
510 caltinay 4529 for (int x=0; x<num0; x++) {
511 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
512     +(first1+y*params.multiplier[1])*myN0
513     +(first2+z*params.multiplier[2])*myN0*myN1;
514     for (int m2=0; m2<params.multiplier[2]; m2++) {
515     for (int m1=0; m1<params.multiplier[1]; m1++) {
516     for (int m0=0; m0<params.multiplier[0]; m0++) {
517 caltinay 4334 const int dataIndex = baseIndex+m0
518     +m1*myN0
519     +m2*myN0*myN1;
520     double* dest = out.getSampleDataRW(dataIndex);
521 caltinay 4529 for (int c=0; c<numComp; c++) {
522     ValueType val = values[x*numComp+c];
523    
524 caltinay 4615 if (params.byteOrder != BYTEORDER_NATIVE) {
525 caltinay 4529 char* cval = reinterpret_cast<char*>(&val);
526     // this will alter val!!
527     byte_swap32(cval);
528     }
529     if (!std::isnan(val)) {
530     for (int q=0; q<dpp; q++) {
531     *dest++ = static_cast<double>(val);
532 caltinay 4334 }
533     }
534     }
535     }
536     }
537     }
538     }
539     }
540     }
541    
542     f.close();
543     }
544    
545 sshaw 4738 #ifdef USE_BOOSTIO
546     template<typename ValueType>
547     void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
548     const ReaderParameters& params) const
549     {
550     // check destination function space
551     int myN0, myN1, myN2;
552     if (out.getFunctionSpace().getTypeCode() == Nodes) {
553     myN0 = m_NN[0];
554     myN1 = m_NN[1];
555     myN2 = m_NN[2];
556     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
557     out.getFunctionSpace().getTypeCode() == ReducedElements) {
558     myN0 = m_NE[0];
559     myN1 = m_NE[1];
560     myN2 = m_NE[2];
561     } else
562     throw RipleyException("readBinaryGridFromZipped(): invalid function space for output data object");
563    
564     if (params.first.size() != 3)
565     throw RipleyException("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
566    
567     if (params.numValues.size() != 3)
568     throw RipleyException("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
569    
570     if (params.multiplier.size() != 3)
571     throw RipleyException("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
572     for (size_t i=0; i<params.multiplier.size(); i++)
573     if (params.multiplier[i]<1)
574     throw RipleyException("readBinaryGridFromZipped(): all multipliers must be positive");
575    
576     // check file existence and size
577     ifstream f(filename.c_str(), ifstream::binary);
578     if (f.fail()) {
579     throw RipleyException("readBinaryGridFromZipped(): cannot open file");
580     }
581     f.seekg(0, ios::end);
582     const int numComp = out.getDataPointSize();
583     int filesize = f.tellg();
584     f.seekg(0, ios::beg);
585     std::vector<char> compressed(filesize);
586     f.read((char*)&compressed[0], filesize);
587     f.close();
588     std::vector<char> decompressed = unzip(compressed);
589     filesize = decompressed.size();
590     const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
591     if (filesize < reqsize) {
592     throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
593     }
594    
595     // check if this rank contributes anything
596     if (params.first[0] >= m_offset[0]+myN0 ||
597     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
598     params.first[1] >= m_offset[1]+myN1 ||
599     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
600     params.first[2] >= m_offset[2]+myN2 ||
601     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
602     return;
603     }
604    
605     // now determine how much this rank has to write
606    
607     // first coordinates in data object to write to
608     const int first0 = max(0, params.first[0]-m_offset[0]);
609     const int first1 = max(0, params.first[1]-m_offset[1]);
610     const int first2 = max(0, params.first[2]-m_offset[2]);
611     // indices to first value in file
612     const int idx0 = max(0, m_offset[0]-params.first[0]);
613     const int idx1 = max(0, m_offset[1]-params.first[1]);
614     const int idx2 = max(0, m_offset[2]-params.first[2]);
615     // number of values to read
616     const int num0 = min(params.numValues[0]-idx0, myN0-first0);
617     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
618     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
619    
620     out.requireWrite();
621     vector<ValueType> values(num0*numComp);
622     const int dpp = out.getNumDataPointsPerSample();
623    
624     for (int z=0; z<num2; z++) {
625     for (int y=0; y<num1; y++) {
626     const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
627     +(idx2+z)*params.numValues[0]*params.numValues[1]);
628     memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
629    
630     for (int x=0; x<num0; x++) {
631     const int baseIndex = first0+x*params.multiplier[0]
632     +(first1+y*params.multiplier[1])*myN0
633     +(first2+z*params.multiplier[2])*myN0*myN1;
634     for (int m2=0; m2<params.multiplier[2]; m2++) {
635     for (int m1=0; m1<params.multiplier[1]; m1++) {
636     for (int m0=0; m0<params.multiplier[0]; m0++) {
637     const int dataIndex = baseIndex+m0
638     +m1*myN0
639     +m2*myN0*myN1;
640     double* dest = out.getSampleDataRW(dataIndex);
641     for (int c=0; c<numComp; c++) {
642     ValueType val = values[x*numComp+c];
643    
644     if (params.byteOrder != BYTEORDER_NATIVE) {
645     char* cval = reinterpret_cast<char*>(&val);
646     // this will alter val!!
647     byte_swap32(cval);
648     }
649     if (!std::isnan(val)) {
650     for (int q=0; q<dpp; q++) {
651     *dest++ = static_cast<double>(val);
652     }
653     }
654     }
655     }
656     }
657     }
658     }
659     }
660     }
661     }
662     #endif
663    
664 caltinay 4357 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
665     int byteOrder, int dataType) const
666 caltinay 4334 {
667 caltinay 4357 // the mapping is not universally correct but should work on our
668     // supported platforms
669     switch (dataType) {
670     case DATATYPE_INT32:
671     writeBinaryGridImpl<int>(in, filename, byteOrder);
672     break;
673     case DATATYPE_FLOAT32:
674     writeBinaryGridImpl<float>(in, filename, byteOrder);
675     break;
676     case DATATYPE_FLOAT64:
677     writeBinaryGridImpl<double>(in, filename, byteOrder);
678     break;
679     default:
680     throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
681     }
682     }
683    
684     template<typename ValueType>
685     void Brick::writeBinaryGridImpl(const escript::Data& in,
686     const string& filename, int byteOrder) const
687     {
688 caltinay 4334 // check function space and determine number of points
689     int myN0, myN1, myN2;
690     int totalN0, totalN1, totalN2;
691     if (in.getFunctionSpace().getTypeCode() == Nodes) {
692     myN0 = m_NN[0];
693     myN1 = m_NN[1];
694     myN2 = m_NN[2];
695     totalN0 = m_gNE[0]+1;
696     totalN1 = m_gNE[1]+1;
697     totalN2 = m_gNE[2]+1;
698     } else if (in.getFunctionSpace().getTypeCode() == Elements ||
699     in.getFunctionSpace().getTypeCode() == ReducedElements) {
700     myN0 = m_NE[0];
701     myN1 = m_NE[1];
702     myN2 = m_NE[2];
703     totalN0 = m_gNE[0];
704     totalN1 = m_gNE[1];
705     totalN2 = m_gNE[2];
706     } else
707     throw RipleyException("writeBinaryGrid(): invalid function space of data object");
708    
709     const int numComp = in.getDataPointSize();
710     const int dpp = in.getNumDataPointsPerSample();
711 caltinay 4357 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
712 caltinay 4334
713     if (numComp > 1 || dpp > 1)
714     throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
715    
716     // from here on we know that each sample consists of one value
717 caltinay 4482 FileWriter fw;
718     fw.openFile(filename, fileSize);
719 caltinay 4334 MPIBarrier();
720    
721     for (index_t z=0; z<myN2; z++) {
722     for (index_t y=0; y<myN1; y++) {
723     const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
724 caltinay 4357 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
725 caltinay 4334 ostringstream oss;
726    
727     for (index_t x=0; x<myN0; x++) {
728 caltinay 4626 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
729 caltinay 4357 ValueType fvalue = static_cast<ValueType>(*sample);
730     if (byteOrder == BYTEORDER_NATIVE) {
731 caltinay 4334 oss.write((char*)&fvalue, sizeof(fvalue));
732     } else {
733     char* value = reinterpret_cast<char*>(&fvalue);
734 caltinay 4357 oss.write(byte_swap32(value), sizeof(fvalue));
735 caltinay 4334 }
736     }
737 caltinay 4482 fw.writeAt(oss, fileofs);
738 caltinay 4334 }
739     }
740 caltinay 4482 fw.close();
741 caltinay 4334 }
742    
743 caltinay 3691 void Brick::dump(const string& fileName) const
744     {
745     #if USE_SILO
746     string fn(fileName);
747     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
748     fn+=".silo";
749     }
750    
751     int driver=DB_HDF5;
752     string siloPath;
753     DBfile* dbfile = NULL;
754    
755     #ifdef ESYS_MPI
756     PMPIO_baton_t* baton = NULL;
757 gross 3793 const int NUM_SILO_FILES = 1;
758     const char* blockDirFmt = "/block%04d";
759 caltinay 3691 #endif
760    
761     if (m_mpiInfo->size > 1) {
762     #ifdef ESYS_MPI
763     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
764     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
765     PMPIO_DefaultClose, (void*)&driver);
766     // try the fallback driver in case of error
767     if (!baton && driver != DB_PDB) {
768     driver = DB_PDB;
769     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
770     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
771     PMPIO_DefaultClose, (void*)&driver);
772     }
773     if (baton) {
774     char str[64];
775     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
776     siloPath = str;
777     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
778     }
779     #endif
780     } else {
781     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
782     getDescription().c_str(), driver);
783     // try the fallback driver in case of error
784     if (!dbfile && driver != DB_PDB) {
785     driver = DB_PDB;
786     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
787     getDescription().c_str(), driver);
788     }
789     }
790    
791     if (!dbfile)
792     throw RipleyException("dump: Could not create Silo file");
793    
794     /*
795     if (driver==DB_HDF5) {
796     // gzip level 1 already provides good compression with minimal
797     // performance penalty. Some tests showed that gzip levels >3 performed
798     // rather badly on escript data both in terms of time and space
799     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
800     }
801     */
802    
803 caltinay 4334 boost::scoped_ptr<double> x(new double[m_NN[0]]);
804     boost::scoped_ptr<double> y(new double[m_NN[1]]);
805     boost::scoped_ptr<double> z(new double[m_NN[2]]);
806 caltinay 3691 double* coords[3] = { x.get(), y.get(), z.get() };
807     #pragma omp parallel
808     {
809     #pragma omp for
810 caltinay 4334 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
811     coords[0][i0]=getLocalCoordinate(i0, 0);
812 caltinay 3691 }
813     #pragma omp for
814 caltinay 4334 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
815     coords[1][i1]=getLocalCoordinate(i1, 1);
816 caltinay 3691 }
817     #pragma omp for
818 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
819     coords[2][i2]=getLocalCoordinate(i2, 2);
820 caltinay 3691 }
821     }
822 caltinay 4334 int* dims = const_cast<int*>(getNumNodesPerDim());
823    
824     // write mesh
825     DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
826 caltinay 3691 DB_COLLINEAR, NULL);
827    
828 caltinay 4334 // write node ids
829     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
830 caltinay 3698 NULL, 0, DB_INT, DB_NODECENT, NULL);
831 caltinay 3691
832 caltinay 3698 // write element ids
833 caltinay 4334 dims = const_cast<int*>(getNumElementsPerDim());
834 caltinay 3698 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
835 caltinay 4334 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
836 caltinay 3698
837     // rank 0 writes multimesh and multivar
838 caltinay 3691 if (m_mpiInfo->rank == 0) {
839     vector<string> tempstrings;
840     vector<char*> names;
841     for (dim_t i=0; i<m_mpiInfo->size; i++) {
842     stringstream path;
843     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
844     tempstrings.push_back(path.str());
845     names.push_back((char*)tempstrings.back().c_str());
846     }
847     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
848     DBSetDir(dbfile, "/");
849     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
850     &types[0], NULL);
851     tempstrings.clear();
852     names.clear();
853     for (dim_t i=0; i<m_mpiInfo->size; i++) {
854     stringstream path;
855     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
856     tempstrings.push_back(path.str());
857     names.push_back((char*)tempstrings.back().c_str());
858     }
859     types.assign(m_mpiInfo->size, DB_QUADVAR);
860     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
861     &types[0], NULL);
862 caltinay 3698 tempstrings.clear();
863     names.clear();
864     for (dim_t i=0; i<m_mpiInfo->size; i++) {
865     stringstream path;
866     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
867     tempstrings.push_back(path.str());
868     names.push_back((char*)tempstrings.back().c_str());
869     }
870     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
871     &types[0], NULL);
872 caltinay 3691 }
873    
874     if (m_mpiInfo->size > 1) {
875     #ifdef ESYS_MPI
876     PMPIO_HandOffBaton(baton, dbfile);
877     PMPIO_Finish(baton);
878     #endif
879     } else {
880     DBClose(dbfile);
881     }
882    
883     #else // USE_SILO
884 caltinay 3791 throw RipleyException("dump: no Silo support");
885 caltinay 3691 #endif
886     }
887    
888     const int* Brick::borrowSampleReferenceIDs(int fsType) const
889     {
890 caltinay 3697 switch (fsType) {
891     case Nodes:
892 caltinay 3748 case ReducedNodes: //FIXME: reduced
893 caltinay 3753 return &m_nodeId[0];
894 caltinay 3757 case DegreesOfFreedom:
895     case ReducedDegreesOfFreedom: //FIXME: reduced
896 caltinay 3753 return &m_dofId[0];
897 caltinay 3697 case Elements:
898 caltinay 3733 case ReducedElements:
899 caltinay 3697 return &m_elementId[0];
900 caltinay 3757 case FaceElements:
901 caltinay 3733 case ReducedFaceElements:
902 caltinay 3697 return &m_faceId[0];
903 sshaw 4660 case Points:
904     return &m_diracPointNodeIDs[0];
905 caltinay 3697 default:
906     break;
907     }
908 caltinay 3691
909 caltinay 3697 stringstream msg;
910 caltinay 3791 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
911 caltinay 3697 throw RipleyException(msg.str());
912 caltinay 3691 }
913    
914 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
915 caltinay 3691 {
916 caltinay 3759 if (getMPISize()==1)
917     return true;
918    
919 caltinay 3757 switch (fsType) {
920     case Nodes:
921     case ReducedNodes: //FIXME: reduced
922     return (m_dofMap[id] < getNumDOF());
923     case DegreesOfFreedom:
924     case ReducedDegreesOfFreedom:
925     return true;
926     case Elements:
927     case ReducedElements:
928     {
929     // check ownership of element's _last_ node
930 caltinay 4334 const index_t x=id%m_NE[0] + 1;
931     const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
932     const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
933     return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
934 caltinay 3757 }
935     case FaceElements:
936     case ReducedFaceElements:
937 caltinay 3759 {
938     // check ownership of face element's last node
939     dim_t n=0;
940 caltinay 4334 for (size_t i=0; i<6; i++) {
941     n+=m_faceCount[i];
942 caltinay 3759 if (id<n) {
943 caltinay 4334 const index_t j=id-n+m_faceCount[i];
944 caltinay 3759 if (i>=4) { // front or back
945 caltinay 4334 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
946     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
947 caltinay 3759 } else if (i>=2) { // bottom or top
948 caltinay 4334 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
949     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
950 caltinay 3759 } else { // left or right
951 caltinay 4334 const index_t first=(i==0 ? 0 : m_NN[0]-1);
952     return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
953 caltinay 3759 }
954     }
955     }
956     return false;
957     }
958 caltinay 3757 default:
959     break;
960 caltinay 3753 }
961 caltinay 3757
962     stringstream msg;
963 caltinay 3791 msg << "ownSample: invalid function space type " << fsType;
964 caltinay 3757 throw RipleyException(msg.str());
965 caltinay 3691 }
966    
967 caltinay 3764 void Brick::setToNormal(escript::Data& out) const
968 caltinay 3703 {
969 caltinay 3764 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
970     out.requireWrite();
971     #pragma omp parallel
972     {
973     if (m_faceOffset[0] > -1) {
974     #pragma omp for nowait
975 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
976     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
977     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
978 caltinay 3764 // set vector at four quadrature points
979     *o++ = -1.; *o++ = 0.; *o++ = 0.;
980     *o++ = -1.; *o++ = 0.; *o++ = 0.;
981     *o++ = -1.; *o++ = 0.; *o++ = 0.;
982     *o++ = -1.; *o++ = 0.; *o = 0.;
983     }
984     }
985     }
986    
987     if (m_faceOffset[1] > -1) {
988     #pragma omp for nowait
989 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
990     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
991     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
992 caltinay 3764 // set vector at four quadrature points
993     *o++ = 1.; *o++ = 0.; *o++ = 0.;
994     *o++ = 1.; *o++ = 0.; *o++ = 0.;
995     *o++ = 1.; *o++ = 0.; *o++ = 0.;
996     *o++ = 1.; *o++ = 0.; *o = 0.;
997     }
998     }
999     }
1000    
1001     if (m_faceOffset[2] > -1) {
1002     #pragma omp for nowait
1003 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1004     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1005     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1006 caltinay 3764 // set vector at four quadrature points
1007     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1008     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1009     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1010     *o++ = 0.; *o++ = -1.; *o = 0.;
1011     }
1012     }
1013     }
1014    
1015     if (m_faceOffset[3] > -1) {
1016     #pragma omp for nowait
1017 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1018     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1019     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1020 caltinay 3764 // set vector at four quadrature points
1021     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1022     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1023     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1024     *o++ = 0.; *o++ = 1.; *o = 0.;
1025     }
1026     }
1027     }
1028    
1029     if (m_faceOffset[4] > -1) {
1030     #pragma omp for nowait
1031 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1032     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1033     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1034 caltinay 3764 // set vector at four quadrature points
1035     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1036     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1037     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1038     *o++ = 0.; *o++ = 0.; *o = -1.;
1039     }
1040     }
1041     }
1042    
1043     if (m_faceOffset[5] > -1) {
1044     #pragma omp for nowait
1045 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1046     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1047     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1048 caltinay 3764 // set vector at four quadrature points
1049     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1050     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1051     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1052     *o++ = 0.; *o++ = 0.; *o = 1.;
1053     }
1054     }
1055     }
1056     } // end of parallel section
1057     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1058     out.requireWrite();
1059     #pragma omp parallel
1060     {
1061     if (m_faceOffset[0] > -1) {
1062     #pragma omp for nowait
1063 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1064     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1065     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1066 caltinay 3764 *o++ = -1.;
1067     *o++ = 0.;
1068     *o = 0.;
1069     }
1070     }
1071     }
1072    
1073     if (m_faceOffset[1] > -1) {
1074     #pragma omp for nowait
1075 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1076     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1077     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1078 caltinay 3764 *o++ = 1.;
1079     *o++ = 0.;
1080     *o = 0.;
1081     }
1082     }
1083     }
1084    
1085     if (m_faceOffset[2] > -1) {
1086     #pragma omp for nowait
1087 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1088     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1089     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1090 caltinay 3764 *o++ = 0.;
1091     *o++ = -1.;
1092     *o = 0.;
1093     }
1094     }
1095     }
1096    
1097     if (m_faceOffset[3] > -1) {
1098     #pragma omp for nowait
1099 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1100     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1101     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1102 caltinay 3764 *o++ = 0.;
1103     *o++ = 1.;
1104     *o = 0.;
1105     }
1106     }
1107     }
1108    
1109     if (m_faceOffset[4] > -1) {
1110     #pragma omp for nowait
1111 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1112     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1113     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1114 caltinay 3764 *o++ = 0.;
1115     *o++ = 0.;
1116     *o = -1.;
1117     }
1118     }
1119     }
1120    
1121     if (m_faceOffset[5] > -1) {
1122     #pragma omp for nowait
1123 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1124     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1125     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1126 caltinay 3764 *o++ = 0.;
1127     *o++ = 0.;
1128     *o = 1.;
1129     }
1130     }
1131     }
1132     } // end of parallel section
1133    
1134     } else {
1135     stringstream msg;
1136 caltinay 3791 msg << "setToNormal: invalid function space type "
1137     << out.getFunctionSpace().getTypeCode();
1138 caltinay 3764 throw RipleyException(msg.str());
1139     }
1140     }
1141    
1142     void Brick::setToSize(escript::Data& out) const
1143     {
1144     if (out.getFunctionSpace().getTypeCode() == Elements
1145     || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1146     out.requireWrite();
1147     const dim_t numQuad=out.getNumDataPointsPerSample();
1148 caltinay 4334 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1149 caltinay 3764 #pragma omp parallel for
1150     for (index_t k = 0; k < getNumElements(); ++k) {
1151     double* o = out.getSampleDataRW(k);
1152     fill(o, o+numQuad, size);
1153     }
1154     } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1155     || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1156     out.requireWrite();
1157     const dim_t numQuad=out.getNumDataPointsPerSample();
1158     #pragma omp parallel
1159     {
1160     if (m_faceOffset[0] > -1) {
1161 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1162 caltinay 3764 #pragma omp for nowait
1163 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1164     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1165     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1166 caltinay 3764 fill(o, o+numQuad, size);
1167     }
1168     }
1169     }
1170    
1171     if (m_faceOffset[1] > -1) {
1172 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1173 caltinay 3764 #pragma omp for nowait
1174 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1175     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1176     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1177 caltinay 3764 fill(o, o+numQuad, size);
1178     }
1179     }
1180     }
1181    
1182     if (m_faceOffset[2] > -1) {
1183 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1184 caltinay 3764 #pragma omp for nowait
1185 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1186     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1187     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1188 caltinay 3764 fill(o, o+numQuad, size);
1189     }
1190     }
1191     }
1192    
1193     if (m_faceOffset[3] > -1) {
1194 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1195 caltinay 3764 #pragma omp for nowait
1196 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1197     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1198     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1199 caltinay 3764 fill(o, o+numQuad, size);
1200     }
1201     }
1202     }
1203    
1204     if (m_faceOffset[4] > -1) {
1205 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1206 caltinay 3764 #pragma omp for nowait
1207 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1208     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1209     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1210 caltinay 3764 fill(o, o+numQuad, size);
1211     }
1212     }
1213     }
1214    
1215     if (m_faceOffset[5] > -1) {
1216 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1217 caltinay 3764 #pragma omp for nowait
1218 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1219     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1220     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1221 caltinay 3764 fill(o, o+numQuad, size);
1222     }
1223     }
1224     }
1225     } // end of parallel section
1226    
1227     } else {
1228     stringstream msg;
1229 caltinay 3791 msg << "setToSize: invalid function space type "
1230     << out.getFunctionSpace().getTypeCode();
1231 caltinay 3764 throw RipleyException(msg.str());
1232     }
1233     }
1234    
1235     void Brick::Print_Mesh_Info(const bool full) const
1236     {
1237     RipleyDomain::Print_Mesh_Info(full);
1238     if (full) {
1239     cout << " Id Coordinates" << endl;
1240     cout.precision(15);
1241     cout.setf(ios::scientific, ios::floatfield);
1242     for (index_t i=0; i < getNumNodes(); i++) {
1243     cout << " " << setw(5) << m_nodeId[i]
1244 caltinay 4334 << " " << getLocalCoordinate(i%m_NN[0], 0)
1245     << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1246     << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1247 caltinay 3764 }
1248     }
1249     }
1250    
1251    
1252     //protected
1253     void Brick::assembleCoordinates(escript::Data& arg) const
1254     {
1255     escriptDataC x = arg.getDataC();
1256     int numDim = m_numDim;
1257     if (!isDataPointShapeEqual(&x, 1, &numDim))
1258     throw RipleyException("setToX: Invalid Data object shape");
1259     if (!numSamplesEqual(&x, 1, getNumNodes()))
1260     throw RipleyException("setToX: Illegal number of samples in Data object");
1261    
1262     arg.requireWrite();
1263     #pragma omp parallel for
1264 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1265     for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1266     for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1267     double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1268     point[0] = getLocalCoordinate(i0, 0);
1269     point[1] = getLocalCoordinate(i1, 1);
1270     point[2] = getLocalCoordinate(i2, 2);
1271 caltinay 3764 }
1272     }
1273     }
1274     }
1275    
1276     //protected
1277 caltinay 4626 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1278 caltinay 3764 {
1279 caltinay 3703 const dim_t numComp = in.getDataPointSize();
1280 caltinay 3731 const double C0 = .044658198738520451079;
1281     const double C1 = .16666666666666666667;
1282     const double C2 = .21132486540518711775;
1283     const double C3 = .25;
1284     const double C4 = .5;
1285     const double C5 = .62200846792814621559;
1286     const double C6 = .78867513459481288225;
1287    
1288 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
1289 caltinay 3760 out.requireWrite();
1290 caltinay 3913 #pragma omp parallel
1291     {
1292     vector<double> f_000(numComp);
1293     vector<double> f_001(numComp);
1294     vector<double> f_010(numComp);
1295     vector<double> f_011(numComp);
1296     vector<double> f_100(numComp);
1297     vector<double> f_101(numComp);
1298     vector<double> f_110(numComp);
1299     vector<double> f_111(numComp);
1300     #pragma omp for
1301 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1302     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1303     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1304     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1305     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1306     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1307     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1308     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1309     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1310     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1311     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1312     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1313 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1314 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1315     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1316     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1317     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1318     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1319     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1320     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1321     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1322     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1323     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1324     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1325     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1326 caltinay 3913 o[INDEX3(i,0,0,numComp,3)] = V0;
1327     o[INDEX3(i,1,0,numComp,3)] = V4;
1328     o[INDEX3(i,2,0,numComp,3)] = V8;
1329     o[INDEX3(i,0,1,numComp,3)] = V0;
1330     o[INDEX3(i,1,1,numComp,3)] = V5;
1331     o[INDEX3(i,2,1,numComp,3)] = V9;
1332     o[INDEX3(i,0,2,numComp,3)] = V1;
1333     o[INDEX3(i,1,2,numComp,3)] = V4;
1334     o[INDEX3(i,2,2,numComp,3)] = V10;
1335     o[INDEX3(i,0,3,numComp,3)] = V1;
1336     o[INDEX3(i,1,3,numComp,3)] = V5;
1337     o[INDEX3(i,2,3,numComp,3)] = V11;
1338     o[INDEX3(i,0,4,numComp,3)] = V2;
1339     o[INDEX3(i,1,4,numComp,3)] = V6;
1340     o[INDEX3(i,2,4,numComp,3)] = V8;
1341     o[INDEX3(i,0,5,numComp,3)] = V2;
1342     o[INDEX3(i,1,5,numComp,3)] = V7;
1343     o[INDEX3(i,2,5,numComp,3)] = V9;
1344     o[INDEX3(i,0,6,numComp,3)] = V3;
1345     o[INDEX3(i,1,6,numComp,3)] = V6;
1346     o[INDEX3(i,2,6,numComp,3)] = V10;
1347     o[INDEX3(i,0,7,numComp,3)] = V3;
1348     o[INDEX3(i,1,7,numComp,3)] = V7;
1349     o[INDEX3(i,2,7,numComp,3)] = V11;
1350     } // end of component loop i
1351     } // end of k0 loop
1352     } // end of k1 loop
1353     } // end of k2 loop
1354     } // end of parallel section
1355 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1356 caltinay 3760 out.requireWrite();
1357 caltinay 3913 #pragma omp parallel
1358     {
1359     vector<double> f_000(numComp);
1360     vector<double> f_001(numComp);
1361     vector<double> f_010(numComp);
1362     vector<double> f_011(numComp);
1363     vector<double> f_100(numComp);
1364     vector<double> f_101(numComp);
1365     vector<double> f_110(numComp);
1366     vector<double> f_111(numComp);
1367     #pragma omp for
1368 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1369     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1370     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1371     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1372     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1373     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1374     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1375     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1376     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1377     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1378     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1379     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1380 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1381 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1382     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1383     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1384 caltinay 3913 } // end of component loop i
1385     } // end of k0 loop
1386     } // end of k1 loop
1387     } // end of k2 loop
1388     } // end of parallel section
1389 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1390 caltinay 3760 out.requireWrite();
1391 caltinay 3722 #pragma omp parallel
1392     {
1393 caltinay 3913 vector<double> f_000(numComp);
1394     vector<double> f_001(numComp);
1395     vector<double> f_010(numComp);
1396     vector<double> f_011(numComp);
1397     vector<double> f_100(numComp);
1398     vector<double> f_101(numComp);
1399     vector<double> f_110(numComp);
1400     vector<double> f_111(numComp);
1401 caltinay 3722 if (m_faceOffset[0] > -1) {
1402     #pragma omp for nowait
1403 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1404     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1405     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1406     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1407     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1408     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1409     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1410     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1411     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1412     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1413     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1414 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1415 caltinay 4375 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1416     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1417     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1418     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1419     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1420 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1421     o[INDEX3(i,2,0,numComp,3)] = V2;
1422 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1423 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1424     o[INDEX3(i,2,1,numComp,3)] = V3;
1425 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1426 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1427     o[INDEX3(i,2,2,numComp,3)] = V2;
1428 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1429 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1430     o[INDEX3(i,2,3,numComp,3)] = V3;
1431 caltinay 3764 } // end of component loop i
1432     } // end of k1 loop
1433     } // end of k2 loop
1434     } // end of face 0
1435 caltinay 3722 if (m_faceOffset[1] > -1) {
1436     #pragma omp for nowait
1437 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1438     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1439     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1440     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1441     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1442     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1443     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1444     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1445     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1446     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1447     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1448 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1449 caltinay 4375 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1450     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1451     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1452     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1453     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1454 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1455     o[INDEX3(i,2,0,numComp,3)] = V2;
1456 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1457 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1458     o[INDEX3(i,2,1,numComp,3)] = V3;
1459 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1460 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1461     o[INDEX3(i,2,2,numComp,3)] = V2;
1462 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1463 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1464     o[INDEX3(i,2,3,numComp,3)] = V3;
1465 caltinay 3764 } // end of component loop i
1466     } // end of k1 loop
1467     } // end of k2 loop
1468     } // end of face 1
1469 caltinay 3722 if (m_faceOffset[2] > -1) {
1470     #pragma omp for nowait
1471 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1472     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1473     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1474     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1475     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1476     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1477     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1478     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1479     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1480     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1481     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1482 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1483 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1484     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1485     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1486 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1487 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1488 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V1;
1489     o[INDEX3(i,0,1,numComp,3)] = V0;
1490 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1491 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V2;
1492     o[INDEX3(i,0,2,numComp,3)] = V0;
1493 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1494 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V1;
1495     o[INDEX3(i,0,3,numComp,3)] = V0;
1496 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1497 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V2;
1498 caltinay 3764 } // end of component loop i
1499     } // end of k0 loop
1500     } // end of k2 loop
1501     } // end of face 2
1502 caltinay 3722 if (m_faceOffset[3] > -1) {
1503     #pragma omp for nowait
1504 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1505     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1506     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1507     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1508     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1509     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1510     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1511     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1512     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1513     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1514     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1515 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1516 caltinay 4375 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1517     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1518     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1519     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1520 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1521 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1522 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V2;
1523     o[INDEX3(i,0,1,numComp,3)] = V0;
1524 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1525 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V3;
1526     o[INDEX3(i,0,2,numComp,3)] = V1;
1527 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1528 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V2;
1529     o[INDEX3(i,0,3,numComp,3)] = V1;
1530 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1531 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V3;
1532 caltinay 3764 } // end of component loop i
1533     } // end of k0 loop
1534     } // end of k2 loop
1535     } // end of face 3
1536 caltinay 3722 if (m_faceOffset[4] > -1) {
1537     #pragma omp for nowait
1538 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1539     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1540     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1541     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1542     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1543     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1544     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1545     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1546     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1547     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1548     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1549 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1550 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1551     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1552     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1553     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1554 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1555     o[INDEX3(i,1,0,numComp,3)] = V2;
1556 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1557 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1558     o[INDEX3(i,1,1,numComp,3)] = V3;
1559 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1560 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1561     o[INDEX3(i,1,2,numComp,3)] = V2;
1562 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1563 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1564     o[INDEX3(i,1,3,numComp,3)] = V3;
1565 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1566 caltinay 3764 } // end of component loop i
1567     } // end of k0 loop
1568     } // end of k1 loop
1569     } // end of face 4
1570 caltinay 3722 if (m_faceOffset[5] > -1) {
1571     #pragma omp for nowait
1572 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1573     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1574     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1575     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1576     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1577     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1578     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1579     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1580     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1581     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1582     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1583 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1584 caltinay 4375 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1585     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1586     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1587     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1588 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1589     o[INDEX3(i,1,0,numComp,3)] = V2;
1590 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1591 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1592     o[INDEX3(i,1,1,numComp,3)] = V3;
1593 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1594 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1595     o[INDEX3(i,1,2,numComp,3)] = V2;
1596 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]