/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4941 - (hide annotations)
Thu May 15 01:49:48 2014 UTC (4 years, 11 months ago) by caltinay
File size: 164926 byte(s)
first proof of concept - self-contained ripley system matrix with diagonal
storage, CG solver, no preconditioner, no MPI, no blocks, quite a few
quick'n'dirty hacks.
Solves poisson faster than paso :-)


1 caltinay 3691
2 jfenwick 3981 /*****************************************************************************
3 caltinay 3691 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 caltinay 3691 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 caltinay 3691
17 sshaw 4861 #include <limits>
18    
19 caltinay 3691 #include <ripley/Brick.h>
20 caltinay 4334 #include <esysUtils/esysFileWriter.h>
21 sshaw 4622 #include <ripley/DefaultAssembler3D.h>
22 sshaw 4629 #include <ripley/WaveAssembler3D.h>
23 sshaw 4712 #include <ripley/LameAssembler3D.h>
24     #include <ripley/domainhelpers.h>
25 caltinay 4477 #include <boost/scoped_array.hpp>
26    
27 caltinay 4013 #ifdef USE_NETCDF
28     #include <netcdfcpp.h>
29     #endif
30    
31 caltinay 3691 #if USE_SILO
32     #include <silo.h>
33     #ifdef ESYS_MPI
34     #include <pmpio.h>
35     #endif
36     #endif
37    
38     #include <iomanip>
39    
40 jfenwick 4650 #include "esysUtils/EsysRandom.h"
41     #include "blocktools.h"
42    
43    
44 caltinay 3691 using namespace std;
45 caltinay 4334 using esysUtils::FileWriter;
46 caltinay 3691
47     namespace ripley {
48    
49 sshaw 4712 int indexOfMax(int a, int b, int c) {
50     if (a > b) {
51     if (c > a) {
52     return 2;
53     }
54     return 0;
55     } else if (b > c) {
56     return 1;
57     }
58     return 2;
59     }
60    
61 caltinay 3781 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
62 sshaw 4622 double x1, double y1, double z1, int d0, int d1, int d2,
63     const std::vector<double>& points, const std::vector<int>& tags,
64 jfenwick 4934 const simap_t& tagnamestonums,
65     escript::SubWorld_ptr w) :
66     RipleyDomain(3, w)
67 caltinay 3691 {
68 sshaw 4861 if (static_cast<long>(n0 + 1) * static_cast<long>(n1 + 1)
69     * static_cast<long>(n2 + 1) > std::numeric_limits<int>::max())
70     throw RipleyException("The number of elements has overflowed, this "
71     "limit may be raised in future releases.");
72    
73 sshaw 4851 if (n0 <= 0 || n1 <= 0 || n2 <= 0)
74     throw RipleyException("Number of elements in each spatial dimension "
75     "must be positive");
76    
77 caltinay 3943 // ignore subdivision parameters for serial run
78     if (m_mpiInfo->size == 1) {
79     d0=1;
80     d1=1;
81     d2=1;
82     }
83     bool warn=false;
84 sshaw 4712
85     std::vector<int> factors;
86     int ranks = m_mpiInfo->size;
87     int epr[3] = {n0,n1,n2};
88     int d[3] = {d0,d1,d2};
89     if (d0<=0 || d1<=0 || d2<=0) {
90     for (int i = 0; i < 3; i++) {
91     if (d[i] < 1) {
92     d[i] = 1;
93     continue;
94 caltinay 3943 }
95 sshaw 4712 epr[i] = -1; // can no longer be max
96     if (ranks % d[i] != 0) {
97     throw RipleyException("Invalid number of spatial subdivisions");
98 caltinay 3943 }
99 sshaw 4712 //remove
100     ranks /= d[i];
101 caltinay 3943 }
102 sshaw 4712 factorise(factors, ranks);
103     if (factors.size() != 0) {
104     warn = true;
105 caltinay 3943 }
106 sshaw 4712 }
107     while (factors.size() > 0) {
108     int i = indexOfMax(epr[0],epr[1],epr[2]);
109     int f = factors.back();
110     factors.pop_back();
111     d[i] *= f;
112     epr[i] /= f;
113 caltinay 3943 }
114 sshaw 4712 d0 = d[0]; d1 = d[1]; d2 = d[2];
115 caltinay 3943
116 caltinay 3691 // ensure number of subdivisions is valid and nodes can be distributed
117     // among number of ranks
118 sshaw 4712 if (d0*d1*d2 != m_mpiInfo->size){
119 caltinay 3691 throw RipleyException("Invalid number of spatial subdivisions");
120 sshaw 4712 }
121 caltinay 3943 if (warn) {
122     cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
123     << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
124     }
125 caltinay 3691
126 caltinay 4334 double l0 = x1-x0;
127     double l1 = y1-y0;
128     double l2 = z1-z0;
129     m_dx[0] = l0/n0;
130     m_dx[1] = l1/n1;
131     m_dx[2] = l2/n2;
132    
133     if ((n0+1)%d0 > 0) {
134 caltinay 3943 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
135 caltinay 4334 l0=m_dx[0]*n0;
136 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N0="
137 caltinay 4334 << n0 << ", l0=" << l0 << endl;
138 caltinay 3943 }
139 caltinay 4334 if ((n1+1)%d1 > 0) {
140 caltinay 3943 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
141 caltinay 4334 l1=m_dx[1]*n1;
142 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N1="
143 caltinay 4334 << n1 << ", l1=" << l1 << endl;
144 caltinay 3943 }
145 caltinay 4334 if ((n2+1)%d2 > 0) {
146 caltinay 3943 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
147 caltinay 4334 l2=m_dx[2]*n2;
148 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N2="
149 caltinay 4334 << n2 << ", l2=" << l2 << endl;
150 caltinay 3943 }
151    
152 caltinay 4334 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
153 caltinay 3753 throw RipleyException("Too few elements for the number of ranks");
154    
155 caltinay 4334 m_gNE[0] = n0;
156     m_gNE[1] = n1;
157     m_gNE[2] = n2;
158     m_origin[0] = x0;
159     m_origin[1] = y0;
160     m_origin[2] = z0;
161     m_length[0] = l0;
162     m_length[1] = l1;
163     m_length[2] = l2;
164     m_NX[0] = d0;
165     m_NX[1] = d1;
166     m_NX[2] = d2;
167    
168 caltinay 3753 // local number of elements (including overlap)
169 caltinay 4334 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
170     if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
171     m_NE[0]++;
172     else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
173     m_ownNE[0]--;
174 caltinay 3764
175 caltinay 4334 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
176     if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
177     m_NE[1]++;
178     else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
179     m_ownNE[1]--;
180 caltinay 3764
181 caltinay 4334 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
182     if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
183     m_NE[2]++;
184     else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
185     m_ownNE[2]--;
186 caltinay 3753
187     // local number of nodes
188 caltinay 4334 m_NN[0] = m_NE[0]+1;
189     m_NN[1] = m_NE[1]+1;
190     m_NN[2] = m_NE[2]+1;
191 caltinay 3753
192 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
193 caltinay 4334 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
194     if (m_offset[0] > 0)
195     m_offset[0]--;
196     m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
197     if (m_offset[1] > 0)
198     m_offset[1]--;
199     m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
200     if (m_offset[2] > 0)
201     m_offset[2]--;
202 caltinay 3753
203 caltinay 3691 populateSampleIds();
204 caltinay 3756 createPattern();
205 sshaw 4622
206     assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
207 sshaw 4629 for (map<string, int>::const_iterator i = tagnamestonums.begin();
208     i != tagnamestonums.end(); i++) {
209     setTagMap(i->first, i->second);
210     }
211 sshaw 4622 addPoints(tags.size(), &points[0], &tags[0]);
212 caltinay 3691 }
213    
214    
215     Brick::~Brick()
216     {
217 sshaw 4622 delete assembler;
218 caltinay 3691 }
219    
220     string Brick::getDescription() const
221     {
222     return "ripley::Brick";
223     }
224    
225     bool Brick::operator==(const AbstractDomain& other) const
226     {
227 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
228     if (o) {
229     return (RipleyDomain::operator==(other) &&
230 caltinay 4334 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
231     && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
232     && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
233     && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
234 caltinay 3744 }
235 caltinay 3691
236     return false;
237     }
238    
239 caltinay 4013 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
240 caltinay 4618 const ReaderParameters& params) const
241 caltinay 4013 {
242     #ifdef USE_NETCDF
243     // check destination function space
244     int myN0, myN1, myN2;
245     if (out.getFunctionSpace().getTypeCode() == Nodes) {
246 caltinay 4334 myN0 = m_NN[0];
247     myN1 = m_NN[1];
248     myN2 = m_NN[2];
249 caltinay 4013 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
250     out.getFunctionSpace().getTypeCode() == ReducedElements) {
251 caltinay 4334 myN0 = m_NE[0];
252     myN1 = m_NE[1];
253     myN2 = m_NE[2];
254 caltinay 4013 } else
255     throw RipleyException("readNcGrid(): invalid function space for output data object");
256    
257 caltinay 4615 if (params.first.size() != 3)
258 caltinay 4013 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
259    
260 caltinay 4615 if (params.numValues.size() != 3)
261 caltinay 4013 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
262    
263 caltinay 4615 if (params.multiplier.size() != 3)
264 caltinay 4277 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
265 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
266     if (params.multiplier[i]<1)
267 caltinay 4277 throw RipleyException("readNcGrid(): all multipliers must be positive");
268    
269 caltinay 4013 // check file existence and size
270     NcFile f(filename.c_str(), NcFile::ReadOnly);
271     if (!f.is_valid())
272     throw RipleyException("readNcGrid(): cannot open file");
273    
274     NcVar* var = f.get_var(varname.c_str());
275     if (!var)
276     throw RipleyException("readNcGrid(): invalid variable name");
277    
278     // TODO: rank>0 data support
279     const int numComp = out.getDataPointSize();
280     if (numComp > 1)
281     throw RipleyException("readNcGrid(): only scalar data supported");
282    
283     const int dims = var->num_dims();
284 caltinay 4477 boost::scoped_array<long> edges(var->edges());
285 caltinay 4013
286     // is this a slice of the data object (dims!=3)?
287     // note the expected ordering of edges (as in numpy: z,y,x)
288 caltinay 4615 if ( (dims==3 && (params.numValues[2] > edges[0] ||
289     params.numValues[1] > edges[1] ||
290     params.numValues[0] > edges[2]))
291     || (dims==2 && params.numValues[2]>1)
292     || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
293 caltinay 4013 throw RipleyException("readNcGrid(): not enough data in file");
294     }
295    
296     // check if this rank contributes anything
297 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
298     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
299     params.first[1] >= m_offset[1]+myN1 ||
300     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
301     params.first[2] >= m_offset[2]+myN2 ||
302     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
303 caltinay 4013 return;
304     }
305    
306     // now determine how much this rank has to write
307    
308     // first coordinates in data object to write to
309 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
310     const int first1 = max(0, params.first[1]-m_offset[1]);
311     const int first2 = max(0, params.first[2]-m_offset[2]);
312 caltinay 4618 // indices to first value in file (not accounting for reverse yet)
313     int idx0 = max(0, m_offset[0]-params.first[0]);
314     int idx1 = max(0, m_offset[1]-params.first[1]);
315     int idx2 = max(0, m_offset[2]-params.first[2]);
316 caltinay 4277 // number of values to read
317 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
318     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
319     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
320 caltinay 4013
321 caltinay 4618 // make sure we read the right block if going backwards through file
322     if (params.reverse[0])
323     idx0 = edges[dims-1]-num0-idx0;
324     if (dims>1 && params.reverse[1])
325     idx1 = edges[dims-2]-num1-idx1;
326     if (dims>2 && params.reverse[2])
327     idx2 = edges[dims-3]-num2-idx2;
328    
329    
330 caltinay 4013 vector<double> values(num0*num1*num2);
331     if (dims==3) {
332     var->set_cur(idx2, idx1, idx0);
333     var->get(&values[0], num2, num1, num0);
334     } else if (dims==2) {
335     var->set_cur(idx1, idx0);
336     var->get(&values[0], num1, num0);
337     } else {
338     var->set_cur(idx0);
339     var->get(&values[0], num0);
340     }
341    
342     const int dpp = out.getNumDataPointsPerSample();
343     out.requireWrite();
344    
345 caltinay 4618 // helpers for reversing
346     const int x0 = (params.reverse[0] ? num0-1 : 0);
347     const int x_mult = (params.reverse[0] ? -1 : 1);
348     const int y0 = (params.reverse[1] ? num1-1 : 0);
349     const int y_mult = (params.reverse[1] ? -1 : 1);
350     const int z0 = (params.reverse[2] ? num2-1 : 0);
351     const int z_mult = (params.reverse[2] ? -1 : 1);
352    
353 caltinay 4013 for (index_t z=0; z<num2; z++) {
354     for (index_t y=0; y<num1; y++) {
355     #pragma omp parallel for
356     for (index_t x=0; x<num0; x++) {
357 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
358     +(first1+y*params.multiplier[1])*myN0
359     +(first2+z*params.multiplier[2])*myN0*myN1;
360 caltinay 4618 const int srcIndex=(z0+z_mult*z)*num1*num0
361     +(y0+y_mult*y)*num0
362     +(x0+x_mult*x);
363 caltinay 4174 if (!isnan(values[srcIndex])) {
364 caltinay 4615 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
365     for (index_t m1=0; m1<params.multiplier[1]; m1++) {
366     for (index_t m0=0; m0<params.multiplier[0]; m0++) {
367 caltinay 4277 const int dataIndex = baseIndex+m0
368     +m1*myN0
369     +m2*myN0*myN1;
370     double* dest = out.getSampleDataRW(dataIndex);
371     for (index_t q=0; q<dpp; q++) {
372     *dest++ = values[srcIndex];
373     }
374     }
375     }
376 caltinay 4174 }
377 caltinay 4013 }
378     }
379     }
380     }
381     #else
382     throw RipleyException("readNcGrid(): not compiled with netCDF support");
383     #endif
384     }
385    
386 sshaw 4738 #ifdef USE_BOOSTIO
387     void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
388     const ReaderParameters& params) const
389     {
390     // the mapping is not universally correct but should work on our
391     // supported platforms
392     switch (params.dataType) {
393     case DATATYPE_INT32:
394     readBinaryGridZippedImpl<int>(out, filename, params);
395     break;
396     case DATATYPE_FLOAT32:
397     readBinaryGridZippedImpl<float>(out, filename, params);
398     break;
399     case DATATYPE_FLOAT64:
400     readBinaryGridZippedImpl<double>(out, filename, params);
401     break;
402     default:
403     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
404     }
405     }
406     #endif
407    
408 caltinay 4334 void Brick::readBinaryGrid(escript::Data& out, string filename,
409 caltinay 4618 const ReaderParameters& params) const
410 caltinay 4334 {
411 caltinay 4495 // the mapping is not universally correct but should work on our
412     // supported platforms
413 caltinay 4615 switch (params.dataType) {
414 caltinay 4495 case DATATYPE_INT32:
415 caltinay 4615 readBinaryGridImpl<int>(out, filename, params);
416 caltinay 4495 break;
417     case DATATYPE_FLOAT32:
418 caltinay 4615 readBinaryGridImpl<float>(out, filename, params);
419 caltinay 4495 break;
420     case DATATYPE_FLOAT64:
421 caltinay 4615 readBinaryGridImpl<double>(out, filename, params);
422 caltinay 4495 break;
423     default:
424     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
425     }
426     }
427    
428     template<typename ValueType>
429     void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
430 caltinay 4618 const ReaderParameters& params) const
431 caltinay 4495 {
432 caltinay 4334 // check destination function space
433     int myN0, myN1, myN2;
434     if (out.getFunctionSpace().getTypeCode() == Nodes) {
435     myN0 = m_NN[0];
436     myN1 = m_NN[1];
437     myN2 = m_NN[2];
438     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
439     out.getFunctionSpace().getTypeCode() == ReducedElements) {
440     myN0 = m_NE[0];
441     myN1 = m_NE[1];
442     myN2 = m_NE[2];
443     } else
444     throw RipleyException("readBinaryGrid(): invalid function space for output data object");
445    
446 caltinay 4615 if (params.first.size() != 3)
447 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
448    
449 caltinay 4615 if (params.numValues.size() != 3)
450 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
451    
452 caltinay 4615 if (params.multiplier.size() != 3)
453 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
454 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
455     if (params.multiplier[i]<1)
456 caltinay 4334 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
457    
458     // check file existence and size
459     ifstream f(filename.c_str(), ifstream::binary);
460     if (f.fail()) {
461     throw RipleyException("readBinaryGrid(): cannot open file");
462     }
463     f.seekg(0, ios::end);
464     const int numComp = out.getDataPointSize();
465     const int filesize = f.tellg();
466 caltinay 4615 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
467 caltinay 4334 if (filesize < reqsize) {
468     f.close();
469     throw RipleyException("readBinaryGrid(): not enough data in file");
470     }
471    
472     // check if this rank contributes anything
473 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
474     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
475     params.first[1] >= m_offset[1]+myN1 ||
476     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
477     params.first[2] >= m_offset[2]+myN2 ||
478     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
479 caltinay 4334 f.close();
480     return;
481     }
482    
483     // now determine how much this rank has to write
484    
485     // first coordinates in data object to write to
486 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
487     const int first1 = max(0, params.first[1]-m_offset[1]);
488     const int first2 = max(0, params.first[2]-m_offset[2]);
489 caltinay 4334 // indices to first value in file
490 caltinay 4615 const int idx0 = max(0, m_offset[0]-params.first[0]);
491     const int idx1 = max(0, m_offset[1]-params.first[1]);
492     const int idx2 = max(0, m_offset[2]-params.first[2]);
493 caltinay 4334 // number of values to read
494 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
495     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
496     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
497 caltinay 4334
498     out.requireWrite();
499 caltinay 4495 vector<ValueType> values(num0*numComp);
500 caltinay 4334 const int dpp = out.getNumDataPointsPerSample();
501    
502 caltinay 4529 for (int z=0; z<num2; z++) {
503     for (int y=0; y<num1; y++) {
504 caltinay 4615 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
505     +(idx2+z)*params.numValues[0]*params.numValues[1]);
506 caltinay 4495 f.seekg(fileofs*sizeof(ValueType));
507     f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
508 caltinay 4334
509 caltinay 4529 for (int x=0; x<num0; x++) {
510 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
511     +(first1+y*params.multiplier[1])*myN0
512     +(first2+z*params.multiplier[2])*myN0*myN1;
513     for (int m2=0; m2<params.multiplier[2]; m2++) {
514     for (int m1=0; m1<params.multiplier[1]; m1++) {
515     for (int m0=0; m0<params.multiplier[0]; m0++) {
516 caltinay 4334 const int dataIndex = baseIndex+m0
517     +m1*myN0
518     +m2*myN0*myN1;
519     double* dest = out.getSampleDataRW(dataIndex);
520 caltinay 4529 for (int c=0; c<numComp; c++) {
521     ValueType val = values[x*numComp+c];
522    
523 caltinay 4615 if (params.byteOrder != BYTEORDER_NATIVE) {
524 caltinay 4529 char* cval = reinterpret_cast<char*>(&val);
525     // this will alter val!!
526     byte_swap32(cval);
527     }
528     if (!std::isnan(val)) {
529     for (int q=0; q<dpp; q++) {
530     *dest++ = static_cast<double>(val);
531 caltinay 4334 }
532     }
533     }
534     }
535     }
536     }
537     }
538     }
539     }
540    
541     f.close();
542     }
543    
544 sshaw 4738 #ifdef USE_BOOSTIO
545     template<typename ValueType>
546     void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
547     const ReaderParameters& params) const
548     {
549     // check destination function space
550     int myN0, myN1, myN2;
551     if (out.getFunctionSpace().getTypeCode() == Nodes) {
552     myN0 = m_NN[0];
553     myN1 = m_NN[1];
554     myN2 = m_NN[2];
555     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
556     out.getFunctionSpace().getTypeCode() == ReducedElements) {
557     myN0 = m_NE[0];
558     myN1 = m_NE[1];
559     myN2 = m_NE[2];
560     } else
561     throw RipleyException("readBinaryGridFromZipped(): invalid function space for output data object");
562    
563     if (params.first.size() != 3)
564     throw RipleyException("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
565    
566     if (params.numValues.size() != 3)
567     throw RipleyException("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
568    
569     if (params.multiplier.size() != 3)
570     throw RipleyException("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
571     for (size_t i=0; i<params.multiplier.size(); i++)
572     if (params.multiplier[i]<1)
573     throw RipleyException("readBinaryGridFromZipped(): all multipliers must be positive");
574    
575     // check file existence and size
576     ifstream f(filename.c_str(), ifstream::binary);
577     if (f.fail()) {
578     throw RipleyException("readBinaryGridFromZipped(): cannot open file");
579     }
580     f.seekg(0, ios::end);
581     const int numComp = out.getDataPointSize();
582     int filesize = f.tellg();
583     f.seekg(0, ios::beg);
584     std::vector<char> compressed(filesize);
585     f.read((char*)&compressed[0], filesize);
586     f.close();
587     std::vector<char> decompressed = unzip(compressed);
588     filesize = decompressed.size();
589     const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
590     if (filesize < reqsize) {
591     throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
592     }
593    
594     // check if this rank contributes anything
595     if (params.first[0] >= m_offset[0]+myN0 ||
596     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
597     params.first[1] >= m_offset[1]+myN1 ||
598     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
599     params.first[2] >= m_offset[2]+myN2 ||
600     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
601     return;
602     }
603    
604     // now determine how much this rank has to write
605    
606     // first coordinates in data object to write to
607     const int first0 = max(0, params.first[0]-m_offset[0]);
608     const int first1 = max(0, params.first[1]-m_offset[1]);
609     const int first2 = max(0, params.first[2]-m_offset[2]);
610     // indices to first value in file
611     const int idx0 = max(0, m_offset[0]-params.first[0]);
612     const int idx1 = max(0, m_offset[1]-params.first[1]);
613     const int idx2 = max(0, m_offset[2]-params.first[2]);
614     // number of values to read
615     const int num0 = min(params.numValues[0]-idx0, myN0-first0);
616     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
617     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
618    
619     out.requireWrite();
620     vector<ValueType> values(num0*numComp);
621     const int dpp = out.getNumDataPointsPerSample();
622    
623     for (int z=0; z<num2; z++) {
624     for (int y=0; y<num1; y++) {
625     const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
626     +(idx2+z)*params.numValues[0]*params.numValues[1]);
627     memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
628    
629     for (int x=0; x<num0; x++) {
630     const int baseIndex = first0+x*params.multiplier[0]
631     +(first1+y*params.multiplier[1])*myN0
632     +(first2+z*params.multiplier[2])*myN0*myN1;
633     for (int m2=0; m2<params.multiplier[2]; m2++) {
634     for (int m1=0; m1<params.multiplier[1]; m1++) {
635     for (int m0=0; m0<params.multiplier[0]; m0++) {
636     const int dataIndex = baseIndex+m0
637     +m1*myN0
638     +m2*myN0*myN1;
639     double* dest = out.getSampleDataRW(dataIndex);
640     for (int c=0; c<numComp; c++) {
641     ValueType val = values[x*numComp+c];
642    
643     if (params.byteOrder != BYTEORDER_NATIVE) {
644     char* cval = reinterpret_cast<char*>(&val);
645     // this will alter val!!
646     byte_swap32(cval);
647     }
648     if (!std::isnan(val)) {
649     for (int q=0; q<dpp; q++) {
650     *dest++ = static_cast<double>(val);
651     }
652     }
653     }
654     }
655     }
656     }
657     }
658     }
659     }
660     }
661     #endif
662    
663 caltinay 4357 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
664     int byteOrder, int dataType) const
665 caltinay 4334 {
666 caltinay 4357 // the mapping is not universally correct but should work on our
667     // supported platforms
668     switch (dataType) {
669     case DATATYPE_INT32:
670     writeBinaryGridImpl<int>(in, filename, byteOrder);
671     break;
672     case DATATYPE_FLOAT32:
673     writeBinaryGridImpl<float>(in, filename, byteOrder);
674     break;
675     case DATATYPE_FLOAT64:
676     writeBinaryGridImpl<double>(in, filename, byteOrder);
677     break;
678     default:
679     throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
680     }
681     }
682    
683     template<typename ValueType>
684     void Brick::writeBinaryGridImpl(const escript::Data& in,
685     const string& filename, int byteOrder) const
686     {
687 caltinay 4334 // check function space and determine number of points
688     int myN0, myN1, myN2;
689     int totalN0, totalN1, totalN2;
690     if (in.getFunctionSpace().getTypeCode() == Nodes) {
691     myN0 = m_NN[0];
692     myN1 = m_NN[1];
693     myN2 = m_NN[2];
694     totalN0 = m_gNE[0]+1;
695     totalN1 = m_gNE[1]+1;
696     totalN2 = m_gNE[2]+1;
697     } else if (in.getFunctionSpace().getTypeCode() == Elements ||
698     in.getFunctionSpace().getTypeCode() == ReducedElements) {
699     myN0 = m_NE[0];
700     myN1 = m_NE[1];
701     myN2 = m_NE[2];
702     totalN0 = m_gNE[0];
703     totalN1 = m_gNE[1];
704     totalN2 = m_gNE[2];
705     } else
706     throw RipleyException("writeBinaryGrid(): invalid function space of data object");
707    
708     const int numComp = in.getDataPointSize();
709     const int dpp = in.getNumDataPointsPerSample();
710 caltinay 4357 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
711 caltinay 4334
712     if (numComp > 1 || dpp > 1)
713     throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
714    
715     // from here on we know that each sample consists of one value
716 caltinay 4482 FileWriter fw;
717     fw.openFile(filename, fileSize);
718 caltinay 4334 MPIBarrier();
719    
720     for (index_t z=0; z<myN2; z++) {
721     for (index_t y=0; y<myN1; y++) {
722     const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
723 caltinay 4357 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
724 caltinay 4334 ostringstream oss;
725    
726     for (index_t x=0; x<myN0; x++) {
727 caltinay 4626 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
728 caltinay 4357 ValueType fvalue = static_cast<ValueType>(*sample);
729     if (byteOrder == BYTEORDER_NATIVE) {
730 caltinay 4334 oss.write((char*)&fvalue, sizeof(fvalue));
731     } else {
732     char* value = reinterpret_cast<char*>(&fvalue);
733 caltinay 4357 oss.write(byte_swap32(value), sizeof(fvalue));
734 caltinay 4334 }
735     }
736 caltinay 4482 fw.writeAt(oss, fileofs);
737 caltinay 4334 }
738     }
739 caltinay 4482 fw.close();
740 caltinay 4334 }
741    
742 caltinay 3691 void Brick::dump(const string& fileName) const
743     {
744     #if USE_SILO
745     string fn(fileName);
746     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
747     fn+=".silo";
748     }
749    
750     int driver=DB_HDF5;
751     string siloPath;
752     DBfile* dbfile = NULL;
753    
754     #ifdef ESYS_MPI
755     PMPIO_baton_t* baton = NULL;
756 gross 3793 const int NUM_SILO_FILES = 1;
757     const char* blockDirFmt = "/block%04d";
758 caltinay 3691 #endif
759    
760     if (m_mpiInfo->size > 1) {
761     #ifdef ESYS_MPI
762     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
763     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
764     PMPIO_DefaultClose, (void*)&driver);
765     // try the fallback driver in case of error
766     if (!baton && driver != DB_PDB) {
767     driver = DB_PDB;
768     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
769     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
770     PMPIO_DefaultClose, (void*)&driver);
771     }
772     if (baton) {
773     char str[64];
774     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
775     siloPath = str;
776     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
777     }
778     #endif
779     } else {
780     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
781     getDescription().c_str(), driver);
782     // try the fallback driver in case of error
783     if (!dbfile && driver != DB_PDB) {
784     driver = DB_PDB;
785     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
786     getDescription().c_str(), driver);
787     }
788     }
789    
790     if (!dbfile)
791     throw RipleyException("dump: Could not create Silo file");
792    
793     /*
794     if (driver==DB_HDF5) {
795     // gzip level 1 already provides good compression with minimal
796     // performance penalty. Some tests showed that gzip levels >3 performed
797     // rather badly on escript data both in terms of time and space
798     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
799     }
800     */
801    
802 caltinay 4334 boost::scoped_ptr<double> x(new double[m_NN[0]]);
803     boost::scoped_ptr<double> y(new double[m_NN[1]]);
804     boost::scoped_ptr<double> z(new double[m_NN[2]]);
805 caltinay 3691 double* coords[3] = { x.get(), y.get(), z.get() };
806     #pragma omp parallel
807     {
808     #pragma omp for
809 caltinay 4334 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
810     coords[0][i0]=getLocalCoordinate(i0, 0);
811 caltinay 3691 }
812     #pragma omp for
813 caltinay 4334 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
814     coords[1][i1]=getLocalCoordinate(i1, 1);
815 caltinay 3691 }
816     #pragma omp for
817 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
818     coords[2][i2]=getLocalCoordinate(i2, 2);
819 caltinay 3691 }
820     }
821 caltinay 4334 int* dims = const_cast<int*>(getNumNodesPerDim());
822    
823     // write mesh
824     DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
825 caltinay 3691 DB_COLLINEAR, NULL);
826    
827 caltinay 4334 // write node ids
828     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
829 caltinay 3698 NULL, 0, DB_INT, DB_NODECENT, NULL);
830 caltinay 3691
831 caltinay 3698 // write element ids
832 caltinay 4334 dims = const_cast<int*>(getNumElementsPerDim());
833 caltinay 3698 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
834 caltinay 4334 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
835 caltinay 3698
836     // rank 0 writes multimesh and multivar
837 caltinay 3691 if (m_mpiInfo->rank == 0) {
838     vector<string> tempstrings;
839     vector<char*> names;
840     for (dim_t i=0; i<m_mpiInfo->size; i++) {
841     stringstream path;
842     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
843     tempstrings.push_back(path.str());
844     names.push_back((char*)tempstrings.back().c_str());
845     }
846     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
847     DBSetDir(dbfile, "/");
848     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
849     &types[0], NULL);
850     tempstrings.clear();
851     names.clear();
852     for (dim_t i=0; i<m_mpiInfo->size; i++) {
853     stringstream path;
854     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
855     tempstrings.push_back(path.str());
856     names.push_back((char*)tempstrings.back().c_str());
857     }
858     types.assign(m_mpiInfo->size, DB_QUADVAR);
859     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
860     &types[0], NULL);
861 caltinay 3698 tempstrings.clear();
862     names.clear();
863     for (dim_t i=0; i<m_mpiInfo->size; i++) {
864     stringstream path;
865     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
866     tempstrings.push_back(path.str());
867     names.push_back((char*)tempstrings.back().c_str());
868     }
869     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
870     &types[0], NULL);
871 caltinay 3691 }
872    
873     if (m_mpiInfo->size > 1) {
874     #ifdef ESYS_MPI
875     PMPIO_HandOffBaton(baton, dbfile);
876     PMPIO_Finish(baton);
877     #endif
878     } else {
879     DBClose(dbfile);
880     }
881    
882     #else // USE_SILO
883 caltinay 3791 throw RipleyException("dump: no Silo support");
884 caltinay 3691 #endif
885     }
886    
887     const int* Brick::borrowSampleReferenceIDs(int fsType) const
888     {
889 caltinay 3697 switch (fsType) {
890     case Nodes:
891 caltinay 3748 case ReducedNodes: //FIXME: reduced
892 caltinay 3753 return &m_nodeId[0];
893 caltinay 3757 case DegreesOfFreedom:
894     case ReducedDegreesOfFreedom: //FIXME: reduced
895 caltinay 3753 return &m_dofId[0];
896 caltinay 3697 case Elements:
897 caltinay 3733 case ReducedElements:
898 caltinay 3697 return &m_elementId[0];
899 caltinay 3757 case FaceElements:
900 caltinay 3733 case ReducedFaceElements:
901 caltinay 3697 return &m_faceId[0];
902 sshaw 4660 case Points:
903     return &m_diracPointNodeIDs[0];
904 caltinay 3697 default:
905     break;
906     }
907 caltinay 3691
908 caltinay 3697 stringstream msg;
909 caltinay 3791 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
910 caltinay 3697 throw RipleyException(msg.str());
911 caltinay 3691 }
912    
913 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
914 caltinay 3691 {
915 caltinay 3759 if (getMPISize()==1)
916     return true;
917    
918 caltinay 3757 switch (fsType) {
919     case Nodes:
920     case ReducedNodes: //FIXME: reduced
921     return (m_dofMap[id] < getNumDOF());
922     case DegreesOfFreedom:
923     case ReducedDegreesOfFreedom:
924     return true;
925     case Elements:
926     case ReducedElements:
927     {
928     // check ownership of element's _last_ node
929 caltinay 4334 const index_t x=id%m_NE[0] + 1;
930     const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
931     const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
932     return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
933 caltinay 3757 }
934     case FaceElements:
935     case ReducedFaceElements:
936 caltinay 3759 {
937     // check ownership of face element's last node
938     dim_t n=0;
939 caltinay 4334 for (size_t i=0; i<6; i++) {
940     n+=m_faceCount[i];
941 caltinay 3759 if (id<n) {
942 caltinay 4334 const index_t j=id-n+m_faceCount[i];
943 caltinay 3759 if (i>=4) { // front or back
944 caltinay 4334 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
945     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
946 caltinay 3759 } else if (i>=2) { // bottom or top
947 caltinay 4334 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
948     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
949 caltinay 3759 } else { // left or right
950 caltinay 4334 const index_t first=(i==0 ? 0 : m_NN[0]-1);
951     return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
952 caltinay 3759 }
953     }
954     }
955     return false;
956     }
957 caltinay 3757 default:
958     break;
959 caltinay 3753 }
960 caltinay 3757
961     stringstream msg;
962 caltinay 3791 msg << "ownSample: invalid function space type " << fsType;
963 caltinay 3757 throw RipleyException(msg.str());
964 caltinay 3691 }
965    
966 caltinay 3764 void Brick::setToNormal(escript::Data& out) const
967 caltinay 3703 {
968 caltinay 3764 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
969     out.requireWrite();
970     #pragma omp parallel
971     {
972     if (m_faceOffset[0] > -1) {
973     #pragma omp for nowait
974 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
975     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
976     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
977 caltinay 3764 // set vector at four quadrature points
978     *o++ = -1.; *o++ = 0.; *o++ = 0.;
979     *o++ = -1.; *o++ = 0.; *o++ = 0.;
980     *o++ = -1.; *o++ = 0.; *o++ = 0.;
981     *o++ = -1.; *o++ = 0.; *o = 0.;
982     }
983     }
984     }
985    
986     if (m_faceOffset[1] > -1) {
987     #pragma omp for nowait
988 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
989     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
990     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
991 caltinay 3764 // set vector at four quadrature points
992     *o++ = 1.; *o++ = 0.; *o++ = 0.;
993     *o++ = 1.; *o++ = 0.; *o++ = 0.;
994     *o++ = 1.; *o++ = 0.; *o++ = 0.;
995     *o++ = 1.; *o++ = 0.; *o = 0.;
996     }
997     }
998     }
999    
1000     if (m_faceOffset[2] > -1) {
1001     #pragma omp for nowait
1002 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1003     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1004     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1005 caltinay 3764 // set vector at four quadrature points
1006     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1007     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1008     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1009     *o++ = 0.; *o++ = -1.; *o = 0.;
1010     }
1011     }
1012     }
1013    
1014     if (m_faceOffset[3] > -1) {
1015     #pragma omp for nowait
1016 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1017     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1018     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1019 caltinay 3764 // set vector at four quadrature points
1020     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1021     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1022     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1023     *o++ = 0.; *o++ = 1.; *o = 0.;
1024     }
1025     }
1026     }
1027    
1028     if (m_faceOffset[4] > -1) {
1029     #pragma omp for nowait
1030 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1031     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1032     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1033 caltinay 3764 // set vector at four quadrature points
1034     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1035     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1036     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1037     *o++ = 0.; *o++ = 0.; *o = -1.;
1038     }
1039     }
1040     }
1041    
1042     if (m_faceOffset[5] > -1) {
1043     #pragma omp for nowait
1044 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1045     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1046     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1047 caltinay 3764 // set vector at four quadrature points
1048     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1049     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1050     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1051     *o++ = 0.; *o++ = 0.; *o = 1.;
1052     }
1053     }
1054     }
1055     } // end of parallel section
1056     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1057     out.requireWrite();
1058     #pragma omp parallel
1059     {
1060     if (m_faceOffset[0] > -1) {
1061     #pragma omp for nowait
1062 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1063     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1064     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1065 caltinay 3764 *o++ = -1.;
1066     *o++ = 0.;
1067     *o = 0.;
1068     }
1069     }
1070     }
1071    
1072     if (m_faceOffset[1] > -1) {
1073     #pragma omp for nowait
1074 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1075     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1076     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1077 caltinay 3764 *o++ = 1.;
1078     *o++ = 0.;
1079     *o = 0.;
1080     }
1081     }
1082     }
1083    
1084     if (m_faceOffset[2] > -1) {
1085     #pragma omp for nowait
1086 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1087     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1088     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1089 caltinay 3764 *o++ = 0.;
1090     *o++ = -1.;
1091     *o = 0.;
1092     }
1093     }
1094     }
1095    
1096     if (m_faceOffset[3] > -1) {
1097     #pragma omp for nowait
1098 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1099     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1100     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1101 caltinay 3764 *o++ = 0.;
1102     *o++ = 1.;
1103     *o = 0.;
1104     }
1105     }
1106     }
1107    
1108     if (m_faceOffset[4] > -1) {
1109     #pragma omp for nowait
1110 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1111     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1112     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1113 caltinay 3764 *o++ = 0.;
1114     *o++ = 0.;
1115     *o = -1.;
1116     }
1117     }
1118     }
1119    
1120     if (m_faceOffset[5] > -1) {
1121     #pragma omp for nowait
1122 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1123     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1124     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1125 caltinay 3764 *o++ = 0.;
1126     *o++ = 0.;
1127     *o = 1.;
1128     }
1129     }
1130     }
1131     } // end of parallel section
1132    
1133     } else {
1134     stringstream msg;
1135 caltinay 3791 msg << "setToNormal: invalid function space type "
1136     << out.getFunctionSpace().getTypeCode();
1137 caltinay 3764 throw RipleyException(msg.str());
1138     }
1139     }
1140    
1141     void Brick::setToSize(escript::Data& out) const
1142     {
1143     if (out.getFunctionSpace().getTypeCode() == Elements
1144     || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1145     out.requireWrite();
1146     const dim_t numQuad=out.getNumDataPointsPerSample();
1147 caltinay 4334 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1148 caltinay 3764 #pragma omp parallel for
1149     for (index_t k = 0; k < getNumElements(); ++k) {
1150     double* o = out.getSampleDataRW(k);
1151     fill(o, o+numQuad, size);
1152     }
1153     } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1154     || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1155     out.requireWrite();
1156     const dim_t numQuad=out.getNumDataPointsPerSample();
1157     #pragma omp parallel
1158     {
1159     if (m_faceOffset[0] > -1) {
1160 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1161 caltinay 3764 #pragma omp for nowait
1162 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1163     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1164     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1165 caltinay 3764 fill(o, o+numQuad, size);
1166     }
1167     }
1168     }
1169    
1170     if (m_faceOffset[1] > -1) {
1171 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1172 caltinay 3764 #pragma omp for nowait
1173 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1174     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1175     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1176 caltinay 3764 fill(o, o+numQuad, size);
1177     }
1178     }
1179     }
1180    
1181     if (m_faceOffset[2] > -1) {
1182 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1183 caltinay 3764 #pragma omp for nowait
1184 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1185     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1186     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1187 caltinay 3764 fill(o, o+numQuad, size);
1188     }
1189     }
1190     }
1191    
1192     if (m_faceOffset[3] > -1) {
1193 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1194 caltinay 3764 #pragma omp for nowait
1195 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1196     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1197     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1198 caltinay 3764 fill(o, o+numQuad, size);
1199     }
1200     }
1201     }
1202    
1203     if (m_faceOffset[4] > -1) {
1204 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1205 caltinay 3764 #pragma omp for nowait
1206 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1207     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1208     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1209 caltinay 3764 fill(o, o+numQuad, size);
1210     }
1211     }
1212     }
1213    
1214     if (m_faceOffset[5] > -1) {
1215 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1216 caltinay 3764 #pragma omp for nowait
1217 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1218     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1219     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1220 caltinay 3764 fill(o, o+numQuad, size);
1221     }
1222     }
1223     }
1224     } // end of parallel section
1225    
1226     } else {
1227     stringstream msg;
1228 caltinay 3791 msg << "setToSize: invalid function space type "
1229     << out.getFunctionSpace().getTypeCode();
1230 caltinay 3764 throw RipleyException(msg.str());
1231     }
1232     }
1233    
1234     void Brick::Print_Mesh_Info(const bool full) const
1235     {
1236     RipleyDomain::Print_Mesh_Info(full);
1237     if (full) {
1238     cout << " Id Coordinates" << endl;
1239     cout.precision(15);
1240     cout.setf(ios::scientific, ios::floatfield);
1241     for (index_t i=0; i < getNumNodes(); i++) {
1242     cout << " " << setw(5) << m_nodeId[i]
1243 caltinay 4334 << " " << getLocalCoordinate(i%m_NN[0], 0)
1244     << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1245     << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1246 caltinay 3764 }
1247     }
1248     }
1249    
1250    
1251     //protected
1252     void Brick::assembleCoordinates(escript::Data& arg) const
1253     {
1254     escriptDataC x = arg.getDataC();
1255     int numDim = m_numDim;
1256     if (!isDataPointShapeEqual(&x, 1, &numDim))
1257     throw RipleyException("setToX: Invalid Data object shape");
1258     if (!numSamplesEqual(&x, 1, getNumNodes()))
1259     throw RipleyException("setToX: Illegal number of samples in Data object");
1260    
1261     arg.requireWrite();
1262     #pragma omp parallel for
1263 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1264     for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1265     for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1266     double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1267     point[0] = getLocalCoordinate(i0, 0);
1268     point[1] = getLocalCoordinate(i1, 1);
1269     point[2] = getLocalCoordinate(i2, 2);
1270 caltinay 3764 }
1271     }
1272     }
1273     }
1274    
1275     //protected
1276 caltinay 4626 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1277 caltinay 3764 {
1278 caltinay 3703 const dim_t numComp = in.getDataPointSize();
1279 caltinay 3731 const double C0 = .044658198738520451079;
1280     const double C1 = .16666666666666666667;
1281     const double C2 = .21132486540518711775;
1282     const double C3 = .25;
1283     const double C4 = .5;
1284     const double C5 = .62200846792814621559;
1285     const double C6 = .78867513459481288225;
1286    
1287 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
1288 caltinay 3760 out.requireWrite();
1289 caltinay 3913 #pragma omp parallel
1290     {
1291     vector<double> f_000(numComp);
1292     vector<double> f_001(numComp);
1293     vector<double> f_010(numComp);
1294     vector<double> f_011(numComp);
1295     vector<double> f_100(numComp);
1296     vector<double> f_101(numComp);
1297     vector<double> f_110(numComp);
1298     vector<double> f_111(numComp);
1299     #pragma omp for
1300 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1301     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1302     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1303     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1304     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1305     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1306     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1307     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1308     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1309     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1310     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1311     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1312 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1313 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1314     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1315     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1316     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1317     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1318     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1319     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1320     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1321     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1322     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1323     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1324     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1325 caltinay 3913 o[INDEX3(i,0,0,numComp,3)] = V0;
1326     o[INDEX3(i,1,0,numComp,3)] = V4;
1327     o[INDEX3(i,2,0,numComp,3)] = V8;
1328     o[INDEX3(i,0,1,numComp,3)] = V0;
1329     o[INDEX3(i,1,1,numComp,3)] = V5;
1330     o[INDEX3(i,2,1,numComp,3)] = V9;
1331     o[INDEX3(i,0,2,numComp,3)] = V1;
1332     o[INDEX3(i,1,2,numComp,3)] = V4;
1333     o[INDEX3(i,2,2,numComp,3)] = V10;
1334     o[INDEX3(i,0,3,numComp,3)] = V1;
1335     o[INDEX3(i,1,3,numComp,3)] = V5;
1336     o[INDEX3(i,2,3,numComp,3)] = V11;
1337     o[INDEX3(i,0,4,numComp,3)] = V2;
1338     o[INDEX3(i,1,4,numComp,3)] = V6;
1339     o[INDEX3(i,2,4,numComp,3)] = V8;
1340     o[INDEX3(i,0,5,numComp,3)] = V2;
1341     o[INDEX3(i,1,5,numComp,3)] = V7;
1342     o[INDEX3(i,2,5,numComp,3)] = V9;
1343     o[INDEX3(i,0,6,numComp,3)] = V3;
1344     o[INDEX3(i,1,6,numComp,3)] = V6;
1345     o[INDEX3(i,2,6,numComp,3)] = V10;
1346     o[INDEX3(i,0,7,numComp,3)] = V3;
1347     o[INDEX3(i,1,7,numComp,3)] = V7;
1348     o[INDEX3(i,2,7,numComp,3)] = V11;
1349     } // end of component loop i
1350     } // end of k0 loop
1351     } // end of k1 loop
1352     } // end of k2 loop
1353     } // end of parallel section
1354 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1355 caltinay 3760 out.requireWrite();
1356 caltinay 3913 #pragma omp parallel
1357     {
1358     vector<double> f_000(numComp);
1359     vector<double> f_001(numComp);
1360     vector<double> f_010(numComp);
1361     vector<double> f_011(numComp);
1362     vector<double> f_100(numComp);
1363     vector<double> f_101(numComp);
1364     vector<double> f_110(numComp);
1365     vector<double> f_111(numComp);
1366     #pragma omp for
1367 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1368     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1369     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1370     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1371     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1372     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1373     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1374     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1375     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1376     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1377     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1378     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1379 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1380 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1381     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1382     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1383 caltinay 3913 } // end of component loop i
1384     } // end of k0 loop
1385     } // end of k1 loop
1386     } // end of k2 loop
1387     } // end of parallel section
1388 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1389 caltinay 3760 out.requireWrite();
1390 caltinay 3722 #pragma omp parallel
1391     {
1392 caltinay 3913 vector<double> f_000(numComp);
1393     vector<double> f_001(numComp);
1394     vector<double> f_010(numComp);
1395     vector<double> f_011(numComp);
1396     vector<double> f_100(numComp);
1397     vector<double> f_101(numComp);
1398     vector<double> f_110(numComp);
1399     vector<double> f_111(numComp);
1400 caltinay 3722 if (m_faceOffset[0] > -1) {
1401     #pragma omp for nowait
1402 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1403     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1404     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1405     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1406     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1407     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1408     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1409     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1410     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1411     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1412     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1413 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1414 caltinay 4375 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1415     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1416     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1417     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1418     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1419 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1420     o[INDEX3(i,2,0,numComp,3)] = V2;
1421 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1422 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1423     o[INDEX3(i,2,1,numComp,3)] = V3;
1424 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1425 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1426     o[INDEX3(i,2,2,numComp,3)] = V2;
1427 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1428 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1429     o[INDEX3(i,2,3,numComp,3)] = V3;
1430 caltinay 3764 } // end of component loop i
1431     } // end of k1 loop
1432     } // end of k2 loop
1433     } // end of face 0
1434 caltinay 3722 if (m_faceOffset[1] > -1) {
1435     #pragma omp for nowait
1436 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1437     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1438     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1439     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1440     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1441     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1442     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1443     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1444     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1445     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1446     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1447 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1448 caltinay 4375 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1449     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1450     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1451     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1452     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1453 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1454     o[INDEX3(i,2,0,numComp,3)] = V2;
1455 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1456 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1457     o[INDEX3(i,2,1,numComp,3)] = V3;
1458 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1459 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1460     o[INDEX3(i,2,2,numComp,3)] = V2;
1461 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1462 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1463     o[INDEX3(i,2,3,numComp,3)] = V3;
1464 caltinay 3764 } // end of component loop i
1465     } // end of k1 loop
1466     } // end of k2 loop
1467     } // end of face 1
1468 caltinay 3722 if (m_faceOffset[2] > -1) {
1469     #pragma omp for nowait
1470 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1471     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1472     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1473     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1474     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1475     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1476     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1477     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1478     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1479     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1480     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1481 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1482 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1483     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1484     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1485 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1486 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1487 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V1;
1488     o[INDEX3(i,0,1,numComp,3)] = V0;
1489 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1490 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V2;
1491     o[INDEX3(i,0,2,numComp,3)] = V0;
1492 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1493 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V1;
1494     o[INDEX3(i,0,3,numComp,3)] = V0;
1495 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1496 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V2;
1497 caltinay 3764 } // end of component loop i
1498     } // end of k0 loop
1499     } // end of k2 loop
1500     } // end of face 2
1501 caltinay 3722 if (m_faceOffset[3] > -1) {
1502     #pragma omp for nowait
1503 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1504     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1505     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1506     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1507     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1508     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1509     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1510     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1511     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1512     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1513     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1514 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1515 caltinay 4375 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1516     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1517     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1518     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1519 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1520 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1521 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V2;
1522     o[INDEX3(i,0,1,numComp,3)] = V0;
1523 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1524 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V3;
1525     o[INDEX3(i,0,2,numComp,3)] = V1;
1526 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1527 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V2;
1528     o[INDEX3(i,0,3,numComp,3)] = V1;
1529 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1530 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V3;
1531 caltinay 3764 } // end of component loop i
1532     } // end of k0 loop
1533     } // end of k2 loop
1534     } // end of face 3
1535 caltinay 3722 if (m_faceOffset[4] > -1) {
1536     #pragma omp for nowait
1537 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1538     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1539     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1540     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1541     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1542     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1543     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1544     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1545     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1546     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1547     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1548 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1549 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1550     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1551     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1552     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1553 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1554     o[INDEX3(i,1,0,numComp,3)] = V2;
1555 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1556 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1557     o[INDEX3(i,1,1,numComp,3)] = V3;
1558 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1559 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1560     o[INDEX3(i,1,2,numComp,3)] = V2;
1561 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1562 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1563     o[INDEX3(i,1,3,numComp,3)] = V3;
1564 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1565 caltinay 3764 } // end of component loop i
1566     } // end of k0 loop
1567     } // end of k1 loop
1568     } // end of face 4
1569 caltinay 3722 if (m_faceOffset[5] > -1) {
1570     #pragma omp for nowait
1571 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1572     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1573     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1574     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1575     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1576     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1577     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1578     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1579     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1580     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1581     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1582 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1583 caltinay 4375 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1584     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1585     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1586     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1587 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1588     o[INDEX3(i,1,0,numComp,3)] = V2;
1589 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1590 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1591     o[INDEX3(i,1,1,numComp,3)] = V3;
1592 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1593 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1594     o[INDEX3(i,1,2,numComp,3)] = V2;
1595 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) /