/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4860 - (hide annotations)
Thu Apr 10 04:08:42 2014 UTC (4 years, 11 months ago) by sshaw
Original Path: trunk/ripley/src/Brick.cpp
File size: 165995 byte(s)
wrapping resize exceptions for overly large domains (previous message to user was: 'RuntimeError: vector::_M_fill_insert')
1 caltinay 3691
2 jfenwick 3981 /*****************************************************************************
3 caltinay 3691 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 caltinay 3691 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 caltinay 3691
17     #include <ripley/Brick.h>
18 caltinay 3791 #include <paso/SystemMatrix.h>
19 caltinay 4334 #include <esysUtils/esysFileWriter.h>
20 sshaw 4622 #include <ripley/DefaultAssembler3D.h>
21 sshaw 4629 #include <ripley/WaveAssembler3D.h>
22 sshaw 4712 #include <ripley/LameAssembler3D.h>
23     #include <ripley/domainhelpers.h>
24 caltinay 4477 #include <boost/scoped_array.hpp>
25    
26 caltinay 4013 #ifdef USE_NETCDF
27     #include <netcdfcpp.h>
28     #endif
29    
30 caltinay 3691 #if USE_SILO
31     #include <silo.h>
32     #ifdef ESYS_MPI
33     #include <pmpio.h>
34     #endif
35     #endif
36    
37     #include <iomanip>
38    
39 jfenwick 4650 #include "esysUtils/EsysRandom.h"
40     #include "blocktools.h"
41    
42    
43 caltinay 3691 using namespace std;
44 caltinay 4334 using esysUtils::FileWriter;
45 caltinay 3691
46     namespace ripley {
47    
48 sshaw 4712 int indexOfMax(int a, int b, int c) {
49     if (a > b) {
50     if (c > a) {
51     return 2;
52     }
53     return 0;
54     } else if (b > c) {
55     return 1;
56     }
57     return 2;
58     }
59    
60 caltinay 3781 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
61 sshaw 4622 double x1, double y1, double z1, int d0, int d1, int d2,
62     const std::vector<double>& points, const std::vector<int>& tags,
63 jfenwick 4638 const simap_t& tagnamestonums) :
64 caltinay 4334 RipleyDomain(3)
65 caltinay 3691 {
66 sshaw 4851 if (n0 <= 0 || n1 <= 0 || n2 <= 0)
67     throw RipleyException("Number of elements in each spatial dimension "
68     "must be positive");
69    
70 caltinay 3943 // ignore subdivision parameters for serial run
71     if (m_mpiInfo->size == 1) {
72     d0=1;
73     d1=1;
74     d2=1;
75     }
76     bool warn=false;
77 sshaw 4712
78     std::vector<int> factors;
79     int ranks = m_mpiInfo->size;
80     int epr[3] = {n0,n1,n2};
81     int d[3] = {d0,d1,d2};
82     if (d0<=0 || d1<=0 || d2<=0) {
83     for (int i = 0; i < 3; i++) {
84     if (d[i] < 1) {
85     d[i] = 1;
86     continue;
87 caltinay 3943 }
88 sshaw 4712 epr[i] = -1; // can no longer be max
89     if (ranks % d[i] != 0) {
90     throw RipleyException("Invalid number of spatial subdivisions");
91 caltinay 3943 }
92 sshaw 4712 //remove
93     ranks /= d[i];
94 caltinay 3943 }
95 sshaw 4712 factorise(factors, ranks);
96     if (factors.size() != 0) {
97     warn = true;
98 caltinay 3943 }
99 sshaw 4712 }
100     while (factors.size() > 0) {
101     int i = indexOfMax(epr[0],epr[1],epr[2]);
102     int f = factors.back();
103     factors.pop_back();
104     d[i] *= f;
105     epr[i] /= f;
106 caltinay 3943 }
107 sshaw 4712 d0 = d[0]; d1 = d[1]; d2 = d[2];
108 caltinay 3943
109 caltinay 3691 // ensure number of subdivisions is valid and nodes can be distributed
110     // among number of ranks
111 sshaw 4712 if (d0*d1*d2 != m_mpiInfo->size){
112 caltinay 3691 throw RipleyException("Invalid number of spatial subdivisions");
113 sshaw 4712 }
114 caltinay 3943 if (warn) {
115     cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
116     << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
117     }
118 caltinay 3691
119 caltinay 4334 double l0 = x1-x0;
120     double l1 = y1-y0;
121     double l2 = z1-z0;
122     m_dx[0] = l0/n0;
123     m_dx[1] = l1/n1;
124     m_dx[2] = l2/n2;
125    
126     if ((n0+1)%d0 > 0) {
127 caltinay 3943 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
128 caltinay 4334 l0=m_dx[0]*n0;
129 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N0="
130 caltinay 4334 << n0 << ", l0=" << l0 << endl;
131 caltinay 3943 }
132 caltinay 4334 if ((n1+1)%d1 > 0) {
133 caltinay 3943 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
134 caltinay 4334 l1=m_dx[1]*n1;
135 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N1="
136 caltinay 4334 << n1 << ", l1=" << l1 << endl;
137 caltinay 3943 }
138 caltinay 4334 if ((n2+1)%d2 > 0) {
139 caltinay 3943 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
140 caltinay 4334 l2=m_dx[2]*n2;
141 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N2="
142 caltinay 4334 << n2 << ", l2=" << l2 << endl;
143 caltinay 3943 }
144    
145 caltinay 4334 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
146 caltinay 3753 throw RipleyException("Too few elements for the number of ranks");
147    
148 caltinay 4334 m_gNE[0] = n0;
149     m_gNE[1] = n1;
150     m_gNE[2] = n2;
151     m_origin[0] = x0;
152     m_origin[1] = y0;
153     m_origin[2] = z0;
154     m_length[0] = l0;
155     m_length[1] = l1;
156     m_length[2] = l2;
157     m_NX[0] = d0;
158     m_NX[1] = d1;
159     m_NX[2] = d2;
160    
161 caltinay 3753 // local number of elements (including overlap)
162 caltinay 4334 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
163     if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
164     m_NE[0]++;
165     else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
166     m_ownNE[0]--;
167 caltinay 3764
168 caltinay 4334 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
169     if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
170     m_NE[1]++;
171     else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
172     m_ownNE[1]--;
173 caltinay 3764
174 caltinay 4334 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
175     if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
176     m_NE[2]++;
177     else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
178     m_ownNE[2]--;
179 caltinay 3753
180     // local number of nodes
181 caltinay 4334 m_NN[0] = m_NE[0]+1;
182     m_NN[1] = m_NE[1]+1;
183     m_NN[2] = m_NE[2]+1;
184 caltinay 3753
185 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
186 caltinay 4334 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
187     if (m_offset[0] > 0)
188     m_offset[0]--;
189     m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
190     if (m_offset[1] > 0)
191     m_offset[1]--;
192     m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
193     if (m_offset[2] > 0)
194     m_offset[2]--;
195 caltinay 3753
196 caltinay 3691 populateSampleIds();
197 caltinay 3756 createPattern();
198 sshaw 4622
199     assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
200 sshaw 4629 for (map<string, int>::const_iterator i = tagnamestonums.begin();
201     i != tagnamestonums.end(); i++) {
202     setTagMap(i->first, i->second);
203     }
204 sshaw 4622 addPoints(tags.size(), &points[0], &tags[0]);
205 caltinay 3691 }
206    
207    
208     Brick::~Brick()
209     {
210 sshaw 4622 delete assembler;
211 caltinay 3691 }
212    
213     string Brick::getDescription() const
214     {
215     return "ripley::Brick";
216     }
217    
218     bool Brick::operator==(const AbstractDomain& other) const
219     {
220 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
221     if (o) {
222     return (RipleyDomain::operator==(other) &&
223 caltinay 4334 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
224     && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
225     && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
226     && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
227 caltinay 3744 }
228 caltinay 3691
229     return false;
230     }
231    
232 caltinay 4013 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
233 caltinay 4618 const ReaderParameters& params) const
234 caltinay 4013 {
235     #ifdef USE_NETCDF
236     // check destination function space
237     int myN0, myN1, myN2;
238     if (out.getFunctionSpace().getTypeCode() == Nodes) {
239 caltinay 4334 myN0 = m_NN[0];
240     myN1 = m_NN[1];
241     myN2 = m_NN[2];
242 caltinay 4013 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
243     out.getFunctionSpace().getTypeCode() == ReducedElements) {
244 caltinay 4334 myN0 = m_NE[0];
245     myN1 = m_NE[1];
246     myN2 = m_NE[2];
247 caltinay 4013 } else
248     throw RipleyException("readNcGrid(): invalid function space for output data object");
249    
250 caltinay 4615 if (params.first.size() != 3)
251 caltinay 4013 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
252    
253 caltinay 4615 if (params.numValues.size() != 3)
254 caltinay 4013 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
255    
256 caltinay 4615 if (params.multiplier.size() != 3)
257 caltinay 4277 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
258 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
259     if (params.multiplier[i]<1)
260 caltinay 4277 throw RipleyException("readNcGrid(): all multipliers must be positive");
261    
262 caltinay 4013 // check file existence and size
263     NcFile f(filename.c_str(), NcFile::ReadOnly);
264     if (!f.is_valid())
265     throw RipleyException("readNcGrid(): cannot open file");
266    
267     NcVar* var = f.get_var(varname.c_str());
268     if (!var)
269     throw RipleyException("readNcGrid(): invalid variable name");
270    
271     // TODO: rank>0 data support
272     const int numComp = out.getDataPointSize();
273     if (numComp > 1)
274     throw RipleyException("readNcGrid(): only scalar data supported");
275    
276     const int dims = var->num_dims();
277 caltinay 4477 boost::scoped_array<long> edges(var->edges());
278 caltinay 4013
279     // is this a slice of the data object (dims!=3)?
280     // note the expected ordering of edges (as in numpy: z,y,x)
281 caltinay 4615 if ( (dims==3 && (params.numValues[2] > edges[0] ||
282     params.numValues[1] > edges[1] ||
283     params.numValues[0] > edges[2]))
284     || (dims==2 && params.numValues[2]>1)
285     || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
286 caltinay 4013 throw RipleyException("readNcGrid(): not enough data in file");
287     }
288    
289     // check if this rank contributes anything
290 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
291     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
292     params.first[1] >= m_offset[1]+myN1 ||
293     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
294     params.first[2] >= m_offset[2]+myN2 ||
295     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
296 caltinay 4013 return;
297     }
298    
299     // now determine how much this rank has to write
300    
301     // first coordinates in data object to write to
302 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
303     const int first1 = max(0, params.first[1]-m_offset[1]);
304     const int first2 = max(0, params.first[2]-m_offset[2]);
305 caltinay 4618 // indices to first value in file (not accounting for reverse yet)
306     int idx0 = max(0, m_offset[0]-params.first[0]);
307     int idx1 = max(0, m_offset[1]-params.first[1]);
308     int idx2 = max(0, m_offset[2]-params.first[2]);
309 caltinay 4277 // number of values to read
310 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
311     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
312     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
313 caltinay 4013
314 caltinay 4618 // make sure we read the right block if going backwards through file
315     if (params.reverse[0])
316     idx0 = edges[dims-1]-num0-idx0;
317     if (dims>1 && params.reverse[1])
318     idx1 = edges[dims-2]-num1-idx1;
319     if (dims>2 && params.reverse[2])
320     idx2 = edges[dims-3]-num2-idx2;
321    
322    
323 caltinay 4013 vector<double> values(num0*num1*num2);
324     if (dims==3) {
325     var->set_cur(idx2, idx1, idx0);
326     var->get(&values[0], num2, num1, num0);
327     } else if (dims==2) {
328     var->set_cur(idx1, idx0);
329     var->get(&values[0], num1, num0);
330     } else {
331     var->set_cur(idx0);
332     var->get(&values[0], num0);
333     }
334    
335     const int dpp = out.getNumDataPointsPerSample();
336     out.requireWrite();
337    
338 caltinay 4618 // helpers for reversing
339     const int x0 = (params.reverse[0] ? num0-1 : 0);
340     const int x_mult = (params.reverse[0] ? -1 : 1);
341     const int y0 = (params.reverse[1] ? num1-1 : 0);
342     const int y_mult = (params.reverse[1] ? -1 : 1);
343     const int z0 = (params.reverse[2] ? num2-1 : 0);
344     const int z_mult = (params.reverse[2] ? -1 : 1);
345    
346 caltinay 4013 for (index_t z=0; z<num2; z++) {
347     for (index_t y=0; y<num1; y++) {
348     #pragma omp parallel for
349     for (index_t x=0; x<num0; x++) {
350 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
351     +(first1+y*params.multiplier[1])*myN0
352     +(first2+z*params.multiplier[2])*myN0*myN1;
353 caltinay 4618 const int srcIndex=(z0+z_mult*z)*num1*num0
354     +(y0+y_mult*y)*num0
355     +(x0+x_mult*x);
356 caltinay 4174 if (!isnan(values[srcIndex])) {
357 caltinay 4615 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
358     for (index_t m1=0; m1<params.multiplier[1]; m1++) {
359     for (index_t m0=0; m0<params.multiplier[0]; m0++) {
360 caltinay 4277 const int dataIndex = baseIndex+m0
361     +m1*myN0
362     +m2*myN0*myN1;
363     double* dest = out.getSampleDataRW(dataIndex);
364     for (index_t q=0; q<dpp; q++) {
365     *dest++ = values[srcIndex];
366     }
367     }
368     }
369 caltinay 4174 }
370 caltinay 4013 }
371     }
372     }
373     }
374     #else
375     throw RipleyException("readNcGrid(): not compiled with netCDF support");
376     #endif
377     }
378    
379 sshaw 4738 #ifdef USE_BOOSTIO
380     void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
381     const ReaderParameters& params) const
382     {
383     // the mapping is not universally correct but should work on our
384     // supported platforms
385     switch (params.dataType) {
386     case DATATYPE_INT32:
387     readBinaryGridZippedImpl<int>(out, filename, params);
388     break;
389     case DATATYPE_FLOAT32:
390     readBinaryGridZippedImpl<float>(out, filename, params);
391     break;
392     case DATATYPE_FLOAT64:
393     readBinaryGridZippedImpl<double>(out, filename, params);
394     break;
395     default:
396     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
397     }
398     }
399     #endif
400    
401 caltinay 4334 void Brick::readBinaryGrid(escript::Data& out, string filename,
402 caltinay 4618 const ReaderParameters& params) const
403 caltinay 4334 {
404 caltinay 4495 // the mapping is not universally correct but should work on our
405     // supported platforms
406 caltinay 4615 switch (params.dataType) {
407 caltinay 4495 case DATATYPE_INT32:
408 caltinay 4615 readBinaryGridImpl<int>(out, filename, params);
409 caltinay 4495 break;
410     case DATATYPE_FLOAT32:
411 caltinay 4615 readBinaryGridImpl<float>(out, filename, params);
412 caltinay 4495 break;
413     case DATATYPE_FLOAT64:
414 caltinay 4615 readBinaryGridImpl<double>(out, filename, params);
415 caltinay 4495 break;
416     default:
417     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
418     }
419     }
420    
421     template<typename ValueType>
422     void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
423 caltinay 4618 const ReaderParameters& params) const
424 caltinay 4495 {
425 caltinay 4334 // check destination function space
426     int myN0, myN1, myN2;
427     if (out.getFunctionSpace().getTypeCode() == Nodes) {
428     myN0 = m_NN[0];
429     myN1 = m_NN[1];
430     myN2 = m_NN[2];
431     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
432     out.getFunctionSpace().getTypeCode() == ReducedElements) {
433     myN0 = m_NE[0];
434     myN1 = m_NE[1];
435     myN2 = m_NE[2];
436     } else
437     throw RipleyException("readBinaryGrid(): invalid function space for output data object");
438    
439 caltinay 4615 if (params.first.size() != 3)
440 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
441    
442 caltinay 4615 if (params.numValues.size() != 3)
443 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
444    
445 caltinay 4615 if (params.multiplier.size() != 3)
446 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
447 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
448     if (params.multiplier[i]<1)
449 caltinay 4334 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
450    
451     // check file existence and size
452     ifstream f(filename.c_str(), ifstream::binary);
453     if (f.fail()) {
454     throw RipleyException("readBinaryGrid(): cannot open file");
455     }
456     f.seekg(0, ios::end);
457     const int numComp = out.getDataPointSize();
458     const int filesize = f.tellg();
459 caltinay 4615 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
460 caltinay 4334 if (filesize < reqsize) {
461     f.close();
462     throw RipleyException("readBinaryGrid(): not enough data in file");
463     }
464    
465     // check if this rank contributes anything
466 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
467     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
468     params.first[1] >= m_offset[1]+myN1 ||
469     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
470     params.first[2] >= m_offset[2]+myN2 ||
471     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
472 caltinay 4334 f.close();
473     return;
474     }
475    
476     // now determine how much this rank has to write
477    
478     // first coordinates in data object to write to
479 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
480     const int first1 = max(0, params.first[1]-m_offset[1]);
481     const int first2 = max(0, params.first[2]-m_offset[2]);
482 caltinay 4334 // indices to first value in file
483 caltinay 4615 const int idx0 = max(0, m_offset[0]-params.first[0]);
484     const int idx1 = max(0, m_offset[1]-params.first[1]);
485     const int idx2 = max(0, m_offset[2]-params.first[2]);
486 caltinay 4334 // number of values to read
487 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
488     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
489     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
490 caltinay 4334
491     out.requireWrite();
492 caltinay 4495 vector<ValueType> values(num0*numComp);
493 caltinay 4334 const int dpp = out.getNumDataPointsPerSample();
494    
495 caltinay 4529 for (int z=0; z<num2; z++) {
496     for (int y=0; y<num1; y++) {
497 caltinay 4615 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
498     +(idx2+z)*params.numValues[0]*params.numValues[1]);
499 caltinay 4495 f.seekg(fileofs*sizeof(ValueType));
500     f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
501 caltinay 4334
502 caltinay 4529 for (int x=0; x<num0; x++) {
503 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
504     +(first1+y*params.multiplier[1])*myN0
505     +(first2+z*params.multiplier[2])*myN0*myN1;
506     for (int m2=0; m2<params.multiplier[2]; m2++) {
507     for (int m1=0; m1<params.multiplier[1]; m1++) {
508     for (int m0=0; m0<params.multiplier[0]; m0++) {
509 caltinay 4334 const int dataIndex = baseIndex+m0
510     +m1*myN0
511     +m2*myN0*myN1;
512     double* dest = out.getSampleDataRW(dataIndex);
513 caltinay 4529 for (int c=0; c<numComp; c++) {
514     ValueType val = values[x*numComp+c];
515    
516 caltinay 4615 if (params.byteOrder != BYTEORDER_NATIVE) {
517 caltinay 4529 char* cval = reinterpret_cast<char*>(&val);
518     // this will alter val!!
519     byte_swap32(cval);
520     }
521     if (!std::isnan(val)) {
522     for (int q=0; q<dpp; q++) {
523     *dest++ = static_cast<double>(val);
524 caltinay 4334 }
525     }
526     }
527     }
528     }
529     }
530     }
531     }
532     }
533    
534     f.close();
535     }
536    
537 sshaw 4738 #ifdef USE_BOOSTIO
538     template<typename ValueType>
539     void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
540     const ReaderParameters& params) const
541     {
542     // check destination function space
543     int myN0, myN1, myN2;
544     if (out.getFunctionSpace().getTypeCode() == Nodes) {
545     myN0 = m_NN[0];
546     myN1 = m_NN[1];
547     myN2 = m_NN[2];
548     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
549     out.getFunctionSpace().getTypeCode() == ReducedElements) {
550     myN0 = m_NE[0];
551     myN1 = m_NE[1];
552     myN2 = m_NE[2];
553     } else
554     throw RipleyException("readBinaryGridFromZipped(): invalid function space for output data object");
555    
556     if (params.first.size() != 3)
557     throw RipleyException("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
558    
559     if (params.numValues.size() != 3)
560     throw RipleyException("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
561    
562     if (params.multiplier.size() != 3)
563     throw RipleyException("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
564     for (size_t i=0; i<params.multiplier.size(); i++)
565     if (params.multiplier[i]<1)
566     throw RipleyException("readBinaryGridFromZipped(): all multipliers must be positive");
567    
568     // check file existence and size
569     ifstream f(filename.c_str(), ifstream::binary);
570     if (f.fail()) {
571     throw RipleyException("readBinaryGridFromZipped(): cannot open file");
572     }
573     f.seekg(0, ios::end);
574     const int numComp = out.getDataPointSize();
575     int filesize = f.tellg();
576     f.seekg(0, ios::beg);
577     std::vector<char> compressed(filesize);
578     f.read((char*)&compressed[0], filesize);
579     f.close();
580     std::vector<char> decompressed = unzip(compressed);
581     filesize = decompressed.size();
582     const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
583     if (filesize < reqsize) {
584     throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
585     }
586    
587     // check if this rank contributes anything
588     if (params.first[0] >= m_offset[0]+myN0 ||
589     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
590     params.first[1] >= m_offset[1]+myN1 ||
591     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
592     params.first[2] >= m_offset[2]+myN2 ||
593     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
594     return;
595     }
596    
597     // now determine how much this rank has to write
598    
599     // first coordinates in data object to write to
600     const int first0 = max(0, params.first[0]-m_offset[0]);
601     const int first1 = max(0, params.first[1]-m_offset[1]);
602     const int first2 = max(0, params.first[2]-m_offset[2]);
603     // indices to first value in file
604     const int idx0 = max(0, m_offset[0]-params.first[0]);
605     const int idx1 = max(0, m_offset[1]-params.first[1]);
606     const int idx2 = max(0, m_offset[2]-params.first[2]);
607     // number of values to read
608     const int num0 = min(params.numValues[0]-idx0, myN0-first0);
609     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
610     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
611    
612     out.requireWrite();
613     vector<ValueType> values(num0*numComp);
614     const int dpp = out.getNumDataPointsPerSample();
615    
616     for (int z=0; z<num2; z++) {
617     for (int y=0; y<num1; y++) {
618     const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
619     +(idx2+z)*params.numValues[0]*params.numValues[1]);
620     memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
621    
622     for (int x=0; x<num0; x++) {
623     const int baseIndex = first0+x*params.multiplier[0]
624     +(first1+y*params.multiplier[1])*myN0
625     +(first2+z*params.multiplier[2])*myN0*myN1;
626     for (int m2=0; m2<params.multiplier[2]; m2++) {
627     for (int m1=0; m1<params.multiplier[1]; m1++) {
628     for (int m0=0; m0<params.multiplier[0]; m0++) {
629     const int dataIndex = baseIndex+m0
630     +m1*myN0
631     +m2*myN0*myN1;
632     double* dest = out.getSampleDataRW(dataIndex);
633     for (int c=0; c<numComp; c++) {
634     ValueType val = values[x*numComp+c];
635    
636     if (params.byteOrder != BYTEORDER_NATIVE) {
637     char* cval = reinterpret_cast<char*>(&val);
638     // this will alter val!!
639     byte_swap32(cval);
640     }
641     if (!std::isnan(val)) {
642     for (int q=0; q<dpp; q++) {
643     *dest++ = static_cast<double>(val);
644     }
645     }
646     }
647     }
648     }
649     }
650     }
651     }
652     }
653     }
654     #endif
655    
656 caltinay 4357 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
657     int byteOrder, int dataType) const
658 caltinay 4334 {
659 caltinay 4357 // the mapping is not universally correct but should work on our
660     // supported platforms
661     switch (dataType) {
662     case DATATYPE_INT32:
663     writeBinaryGridImpl<int>(in, filename, byteOrder);
664     break;
665     case DATATYPE_FLOAT32:
666     writeBinaryGridImpl<float>(in, filename, byteOrder);
667     break;
668     case DATATYPE_FLOAT64:
669     writeBinaryGridImpl<double>(in, filename, byteOrder);
670     break;
671     default:
672     throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
673     }
674     }
675    
676     template<typename ValueType>
677     void Brick::writeBinaryGridImpl(const escript::Data& in,
678     const string& filename, int byteOrder) const
679     {
680 caltinay 4334 // check function space and determine number of points
681     int myN0, myN1, myN2;
682     int totalN0, totalN1, totalN2;
683     if (in.getFunctionSpace().getTypeCode() == Nodes) {
684     myN0 = m_NN[0];
685     myN1 = m_NN[1];
686     myN2 = m_NN[2];
687     totalN0 = m_gNE[0]+1;
688     totalN1 = m_gNE[1]+1;
689     totalN2 = m_gNE[2]+1;
690     } else if (in.getFunctionSpace().getTypeCode() == Elements ||
691     in.getFunctionSpace().getTypeCode() == ReducedElements) {
692     myN0 = m_NE[0];
693     myN1 = m_NE[1];
694     myN2 = m_NE[2];
695     totalN0 = m_gNE[0];
696     totalN1 = m_gNE[1];
697     totalN2 = m_gNE[2];
698     } else
699     throw RipleyException("writeBinaryGrid(): invalid function space of data object");
700    
701     const int numComp = in.getDataPointSize();
702     const int dpp = in.getNumDataPointsPerSample();
703 caltinay 4357 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
704 caltinay 4334
705     if (numComp > 1 || dpp > 1)
706     throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
707    
708     // from here on we know that each sample consists of one value
709 caltinay 4482 FileWriter fw;
710     fw.openFile(filename, fileSize);
711 caltinay 4334 MPIBarrier();
712    
713     for (index_t z=0; z<myN2; z++) {
714     for (index_t y=0; y<myN1; y++) {
715     const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
716 caltinay 4357 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
717 caltinay 4334 ostringstream oss;
718    
719     for (index_t x=0; x<myN0; x++) {
720 caltinay 4626 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
721 caltinay 4357 ValueType fvalue = static_cast<ValueType>(*sample);
722     if (byteOrder == BYTEORDER_NATIVE) {
723 caltinay 4334 oss.write((char*)&fvalue, sizeof(fvalue));
724     } else {
725     char* value = reinterpret_cast<char*>(&fvalue);
726 caltinay 4357 oss.write(byte_swap32(value), sizeof(fvalue));
727 caltinay 4334 }
728     }
729 caltinay 4482 fw.writeAt(oss, fileofs);
730 caltinay 4334 }
731     }
732 caltinay 4482 fw.close();
733 caltinay 4334 }
734    
735 caltinay 3691 void Brick::dump(const string& fileName) const
736     {
737     #if USE_SILO
738     string fn(fileName);
739     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
740     fn+=".silo";
741     }
742    
743     int driver=DB_HDF5;
744     string siloPath;
745     DBfile* dbfile = NULL;
746    
747     #ifdef ESYS_MPI
748     PMPIO_baton_t* baton = NULL;
749 gross 3793 const int NUM_SILO_FILES = 1;
750     const char* blockDirFmt = "/block%04d";
751 caltinay 3691 #endif
752    
753     if (m_mpiInfo->size > 1) {
754     #ifdef ESYS_MPI
755     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
756     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
757     PMPIO_DefaultClose, (void*)&driver);
758     // try the fallback driver in case of error
759     if (!baton && driver != DB_PDB) {
760     driver = DB_PDB;
761     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
762     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
763     PMPIO_DefaultClose, (void*)&driver);
764     }
765     if (baton) {
766     char str[64];
767     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
768     siloPath = str;
769     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
770     }
771     #endif
772     } else {
773     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
774     getDescription().c_str(), driver);
775     // try the fallback driver in case of error
776     if (!dbfile && driver != DB_PDB) {
777     driver = DB_PDB;
778     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
779     getDescription().c_str(), driver);
780     }
781     }
782    
783     if (!dbfile)
784     throw RipleyException("dump: Could not create Silo file");
785    
786     /*
787     if (driver==DB_HDF5) {
788     // gzip level 1 already provides good compression with minimal
789     // performance penalty. Some tests showed that gzip levels >3 performed
790     // rather badly on escript data both in terms of time and space
791     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
792     }
793     */
794    
795 caltinay 4334 boost::scoped_ptr<double> x(new double[m_NN[0]]);
796     boost::scoped_ptr<double> y(new double[m_NN[1]]);
797     boost::scoped_ptr<double> z(new double[m_NN[2]]);
798 caltinay 3691 double* coords[3] = { x.get(), y.get(), z.get() };
799     #pragma omp parallel
800     {
801     #pragma omp for
802 caltinay 4334 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
803     coords[0][i0]=getLocalCoordinate(i0, 0);
804 caltinay 3691 }
805     #pragma omp for
806 caltinay 4334 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
807     coords[1][i1]=getLocalCoordinate(i1, 1);
808 caltinay 3691 }
809     #pragma omp for
810 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
811     coords[2][i2]=getLocalCoordinate(i2, 2);
812 caltinay 3691 }
813     }
814 caltinay 4334 int* dims = const_cast<int*>(getNumNodesPerDim());
815    
816     // write mesh
817     DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
818 caltinay 3691 DB_COLLINEAR, NULL);
819    
820 caltinay 4334 // write node ids
821     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
822 caltinay 3698 NULL, 0, DB_INT, DB_NODECENT, NULL);
823 caltinay 3691
824 caltinay 3698 // write element ids
825 caltinay 4334 dims = const_cast<int*>(getNumElementsPerDim());
826 caltinay 3698 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
827 caltinay 4334 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
828 caltinay 3698
829     // rank 0 writes multimesh and multivar
830 caltinay 3691 if (m_mpiInfo->rank == 0) {
831     vector<string> tempstrings;
832     vector<char*> names;
833     for (dim_t i=0; i<m_mpiInfo->size; i++) {
834     stringstream path;
835     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
836     tempstrings.push_back(path.str());
837     names.push_back((char*)tempstrings.back().c_str());
838     }
839     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
840     DBSetDir(dbfile, "/");
841     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
842     &types[0], NULL);
843     tempstrings.clear();
844     names.clear();
845     for (dim_t i=0; i<m_mpiInfo->size; i++) {
846     stringstream path;
847     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
848     tempstrings.push_back(path.str());
849     names.push_back((char*)tempstrings.back().c_str());
850     }
851     types.assign(m_mpiInfo->size, DB_QUADVAR);
852     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
853     &types[0], NULL);
854 caltinay 3698 tempstrings.clear();
855     names.clear();
856     for (dim_t i=0; i<m_mpiInfo->size; i++) {
857     stringstream path;
858     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
859     tempstrings.push_back(path.str());
860     names.push_back((char*)tempstrings.back().c_str());
861     }
862     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
863     &types[0], NULL);
864 caltinay 3691 }
865    
866     if (m_mpiInfo->size > 1) {
867     #ifdef ESYS_MPI
868     PMPIO_HandOffBaton(baton, dbfile);
869     PMPIO_Finish(baton);
870     #endif
871     } else {
872     DBClose(dbfile);
873     }
874    
875     #else // USE_SILO
876 caltinay 3791 throw RipleyException("dump: no Silo support");
877 caltinay 3691 #endif
878     }
879    
880     const int* Brick::borrowSampleReferenceIDs(int fsType) const
881     {
882 caltinay 3697 switch (fsType) {
883     case Nodes:
884 caltinay 3748 case ReducedNodes: //FIXME: reduced
885 caltinay 3753 return &m_nodeId[0];
886 caltinay 3757 case DegreesOfFreedom:
887     case ReducedDegreesOfFreedom: //FIXME: reduced
888 caltinay 3753 return &m_dofId[0];
889 caltinay 3697 case Elements:
890 caltinay 3733 case ReducedElements:
891 caltinay 3697 return &m_elementId[0];
892 caltinay 3757 case FaceElements:
893 caltinay 3733 case ReducedFaceElements:
894 caltinay 3697 return &m_faceId[0];
895 sshaw 4660 case Points:
896     return &m_diracPointNodeIDs[0];
897 caltinay 3697 default:
898     break;
899     }
900 caltinay 3691
901 caltinay 3697 stringstream msg;
902 caltinay 3791 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
903 caltinay 3697 throw RipleyException(msg.str());
904 caltinay 3691 }
905    
906 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
907 caltinay 3691 {
908 caltinay 3759 if (getMPISize()==1)
909     return true;
910    
911 caltinay 3757 switch (fsType) {
912     case Nodes:
913     case ReducedNodes: //FIXME: reduced
914     return (m_dofMap[id] < getNumDOF());
915     case DegreesOfFreedom:
916     case ReducedDegreesOfFreedom:
917     return true;
918     case Elements:
919     case ReducedElements:
920     {
921     // check ownership of element's _last_ node
922 caltinay 4334 const index_t x=id%m_NE[0] + 1;
923     const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
924     const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
925     return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
926 caltinay 3757 }
927     case FaceElements:
928     case ReducedFaceElements:
929 caltinay 3759 {
930     // check ownership of face element's last node
931     dim_t n=0;
932 caltinay 4334 for (size_t i=0; i<6; i++) {
933     n+=m_faceCount[i];
934 caltinay 3759 if (id<n) {
935 caltinay 4334 const index_t j=id-n+m_faceCount[i];
936 caltinay 3759 if (i>=4) { // front or back
937 caltinay 4334 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
938     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
939 caltinay 3759 } else if (i>=2) { // bottom or top
940 caltinay 4334 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
941     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
942 caltinay 3759 } else { // left or right
943 caltinay 4334 const index_t first=(i==0 ? 0 : m_NN[0]-1);
944     return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
945 caltinay 3759 }
946     }
947     }
948     return false;
949     }
950 caltinay 3757 default:
951     break;
952 caltinay 3753 }
953 caltinay 3757
954     stringstream msg;
955 caltinay 3791 msg << "ownSample: invalid function space type " << fsType;
956 caltinay 3757 throw RipleyException(msg.str());
957 caltinay 3691 }
958    
959 caltinay 3764 void Brick::setToNormal(escript::Data& out) const
960 caltinay 3703 {
961 caltinay 3764 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
962     out.requireWrite();
963     #pragma omp parallel
964     {
965     if (m_faceOffset[0] > -1) {
966     #pragma omp for nowait
967 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
968     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
969     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
970 caltinay 3764 // set vector at four quadrature points
971     *o++ = -1.; *o++ = 0.; *o++ = 0.;
972     *o++ = -1.; *o++ = 0.; *o++ = 0.;
973     *o++ = -1.; *o++ = 0.; *o++ = 0.;
974     *o++ = -1.; *o++ = 0.; *o = 0.;
975     }
976     }
977     }
978    
979     if (m_faceOffset[1] > -1) {
980     #pragma omp for nowait
981 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
982     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
983     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
984 caltinay 3764 // set vector at four quadrature points
985     *o++ = 1.; *o++ = 0.; *o++ = 0.;
986     *o++ = 1.; *o++ = 0.; *o++ = 0.;
987     *o++ = 1.; *o++ = 0.; *o++ = 0.;
988     *o++ = 1.; *o++ = 0.; *o = 0.;
989     }
990     }
991     }
992    
993     if (m_faceOffset[2] > -1) {
994     #pragma omp for nowait
995 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
996     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
997     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
998 caltinay 3764 // set vector at four quadrature points
999     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1000     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1001     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1002     *o++ = 0.; *o++ = -1.; *o = 0.;
1003     }
1004     }
1005     }
1006    
1007     if (m_faceOffset[3] > -1) {
1008     #pragma omp for nowait
1009 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1010     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1011     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1012 caltinay 3764 // set vector at four quadrature points
1013     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1014     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1015     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1016     *o++ = 0.; *o++ = 1.; *o = 0.;
1017     }
1018     }
1019     }
1020    
1021     if (m_faceOffset[4] > -1) {
1022     #pragma omp for nowait
1023 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1024     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1025     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1026 caltinay 3764 // set vector at four quadrature points
1027     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1028     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1029     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1030     *o++ = 0.; *o++ = 0.; *o = -1.;
1031     }
1032     }
1033     }
1034    
1035     if (m_faceOffset[5] > -1) {
1036     #pragma omp for nowait
1037 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1038     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1039     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1040 caltinay 3764 // set vector at four quadrature points
1041     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1042     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1043     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1044     *o++ = 0.; *o++ = 0.; *o = 1.;
1045     }
1046     }
1047     }
1048     } // end of parallel section
1049     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1050     out.requireWrite();
1051     #pragma omp parallel
1052     {
1053     if (m_faceOffset[0] > -1) {
1054     #pragma omp for nowait
1055 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1056     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1057     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1058 caltinay 3764 *o++ = -1.;
1059     *o++ = 0.;
1060     *o = 0.;
1061     }
1062     }
1063     }
1064    
1065     if (m_faceOffset[1] > -1) {
1066     #pragma omp for nowait
1067 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1068     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1069     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1070 caltinay 3764 *o++ = 1.;
1071     *o++ = 0.;
1072     *o = 0.;
1073     }
1074     }
1075     }
1076    
1077     if (m_faceOffset[2] > -1) {
1078     #pragma omp for nowait
1079 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1080     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1081     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1082 caltinay 3764 *o++ = 0.;
1083     *o++ = -1.;
1084     *o = 0.;
1085     }
1086     }
1087     }
1088    
1089     if (m_faceOffset[3] > -1) {
1090     #pragma omp for nowait
1091 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1092     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1093     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1094 caltinay 3764 *o++ = 0.;
1095     *o++ = 1.;
1096     *o = 0.;
1097     }
1098     }
1099     }
1100    
1101     if (m_faceOffset[4] > -1) {
1102     #pragma omp for nowait
1103 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1104     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1105     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1106 caltinay 3764 *o++ = 0.;
1107     *o++ = 0.;
1108     *o = -1.;
1109     }
1110     }
1111     }
1112    
1113     if (m_faceOffset[5] > -1) {
1114     #pragma omp for nowait
1115 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1116     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1117     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1118 caltinay 3764 *o++ = 0.;
1119     *o++ = 0.;
1120     *o = 1.;
1121     }
1122     }
1123     }
1124     } // end of parallel section
1125    
1126     } else {
1127     stringstream msg;
1128 caltinay 3791 msg << "setToNormal: invalid function space type "
1129     << out.getFunctionSpace().getTypeCode();
1130 caltinay 3764 throw RipleyException(msg.str());
1131     }
1132     }
1133    
1134     void Brick::setToSize(escript::Data& out) const
1135     {
1136     if (out.getFunctionSpace().getTypeCode() == Elements
1137     || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1138     out.requireWrite();
1139     const dim_t numQuad=out.getNumDataPointsPerSample();
1140 caltinay 4334 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1141 caltinay 3764 #pragma omp parallel for
1142     for (index_t k = 0; k < getNumElements(); ++k) {
1143     double* o = out.getSampleDataRW(k);
1144     fill(o, o+numQuad, size);
1145     }
1146     } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1147     || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1148     out.requireWrite();
1149     const dim_t numQuad=out.getNumDataPointsPerSample();
1150     #pragma omp parallel
1151     {
1152     if (m_faceOffset[0] > -1) {
1153 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1154 caltinay 3764 #pragma omp for nowait
1155 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1156     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1157     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1158 caltinay 3764 fill(o, o+numQuad, size);
1159     }
1160     }
1161     }
1162    
1163     if (m_faceOffset[1] > -1) {
1164 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1165 caltinay 3764 #pragma omp for nowait
1166 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1167     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1168     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1169 caltinay 3764 fill(o, o+numQuad, size);
1170     }
1171     }
1172     }
1173    
1174     if (m_faceOffset[2] > -1) {
1175 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1176 caltinay 3764 #pragma omp for nowait
1177 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1178     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1179     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1180 caltinay 3764 fill(o, o+numQuad, size);
1181     }
1182     }
1183     }
1184    
1185     if (m_faceOffset[3] > -1) {
1186 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1187 caltinay 3764 #pragma omp for nowait
1188 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1189     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1190     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1191 caltinay 3764 fill(o, o+numQuad, size);
1192     }
1193     }
1194     }
1195    
1196     if (m_faceOffset[4] > -1) {
1197 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1198 caltinay 3764 #pragma omp for nowait
1199 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1200     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1201     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1202 caltinay 3764 fill(o, o+numQuad, size);
1203     }
1204     }
1205     }
1206    
1207     if (m_faceOffset[5] > -1) {
1208 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1209 caltinay 3764 #pragma omp for nowait
1210 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1211     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1212     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1213 caltinay 3764 fill(o, o+numQuad, size);
1214     }
1215     }
1216     }
1217     } // end of parallel section
1218    
1219     } else {
1220     stringstream msg;
1221 caltinay 3791 msg << "setToSize: invalid function space type "
1222     << out.getFunctionSpace().getTypeCode();
1223 caltinay 3764 throw RipleyException(msg.str());
1224     }
1225     }
1226    
1227     void Brick::Print_Mesh_Info(const bool full) const
1228     {
1229     RipleyDomain::Print_Mesh_Info(full);
1230     if (full) {
1231     cout << " Id Coordinates" << endl;
1232     cout.precision(15);
1233     cout.setf(ios::scientific, ios::floatfield);
1234     for (index_t i=0; i < getNumNodes(); i++) {
1235     cout << " " << setw(5) << m_nodeId[i]
1236 caltinay 4334 << " " << getLocalCoordinate(i%m_NN[0], 0)
1237     << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1238     << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1239 caltinay 3764 }
1240     }
1241     }
1242    
1243    
1244     //protected
1245     void Brick::assembleCoordinates(escript::Data& arg) const
1246     {
1247     escriptDataC x = arg.getDataC();
1248     int numDim = m_numDim;
1249     if (!isDataPointShapeEqual(&x, 1, &numDim))
1250     throw RipleyException("setToX: Invalid Data object shape");
1251     if (!numSamplesEqual(&x, 1, getNumNodes()))
1252     throw RipleyException("setToX: Illegal number of samples in Data object");
1253    
1254     arg.requireWrite();
1255     #pragma omp parallel for
1256 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1257     for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1258     for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1259     double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1260     point[0] = getLocalCoordinate(i0, 0);
1261     point[1] = getLocalCoordinate(i1, 1);
1262     point[2] = getLocalCoordinate(i2, 2);
1263 caltinay 3764 }
1264     }
1265     }
1266     }
1267    
1268     //protected
1269 caltinay 4626 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1270 caltinay 3764 {
1271 caltinay 3703 const dim_t numComp = in.getDataPointSize();
1272 caltinay 3731 const double C0 = .044658198738520451079;
1273     const double C1 = .16666666666666666667;
1274     const double C2 = .21132486540518711775;
1275     const double C3 = .25;
1276     const double C4 = .5;
1277     const double C5 = .62200846792814621559;
1278     const double C6 = .78867513459481288225;
1279    
1280 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
1281 caltinay 3760 out.requireWrite();
1282 caltinay 3913 #pragma omp parallel
1283     {
1284     vector<double> f_000(numComp);
1285     vector<double> f_001(numComp);
1286     vector<double> f_010(numComp);
1287     vector<double> f_011(numComp);
1288     vector<double> f_100(numComp);
1289     vector<double> f_101(numComp);
1290     vector<double> f_110(numComp);
1291     vector<double> f_111(numComp);
1292     #pragma omp for
1293 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1294     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1295     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1296     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1297     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1298     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1299     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1300     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1301     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1302     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1303     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1304     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1305 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1306 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1307     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1308     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1309     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1310     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1311     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1312     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1313     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1314     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1315     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1316     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1317     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1318 caltinay 3913 o[INDEX3(i,0,0,numComp,3)] = V0;
1319     o[INDEX3(i,1,0,numComp,3)] = V4;
1320     o[INDEX3(i,2,0,numComp,3)] = V8;
1321     o[INDEX3(i,0,1,numComp,3)] = V0;
1322     o[INDEX3(i,1,1,numComp,3)] = V5;
1323     o[INDEX3(i,2,1,numComp,3)] = V9;
1324     o[INDEX3(i,0,2,numComp,3)] = V1;
1325     o[INDEX3(i,1,2,numComp,3)] = V4;
1326     o[INDEX3(i,2,2,numComp,3)] = V10;
1327     o[INDEX3(i,0,3,numComp,3)] = V1;
1328     o[INDEX3(i,1,3,numComp,3)] = V5;
1329     o[INDEX3(i,2,3,numComp,3)] = V11;
1330     o[INDEX3(i,0,4,numComp,3)] = V2;
1331     o[INDEX3(i,1,4,numComp,3)] = V6;
1332     o[INDEX3(i,2,4,numComp,3)] = V8;
1333     o[INDEX3(i,0,5,numComp,3)] = V2;
1334     o[INDEX3(i,1,5,numComp,3)] = V7;
1335     o[INDEX3(i,2,5,numComp,3)] = V9;
1336     o[INDEX3(i,0,6,numComp,3)] = V3;
1337     o[INDEX3(i,1,6,numComp,3)] = V6;
1338     o[INDEX3(i,2,6,numComp,3)] = V10;
1339     o[INDEX3(i,0,7,numComp,3)] = V3;
1340     o[INDEX3(i,1,7,numComp,3)] = V7;
1341     o[INDEX3(i,2,7,numComp,3)] = V11;
1342     } // end of component loop i
1343     } // end of k0 loop
1344     } // end of k1 loop
1345     } // end of k2 loop
1346     } // end of parallel section
1347 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1348 caltinay 3760 out.requireWrite();
1349 caltinay 3913 #pragma omp parallel
1350     {
1351     vector<double> f_000(numComp);
1352     vector<double> f_001(numComp);
1353     vector<double> f_010(numComp);
1354     vector<double> f_011(numComp);
1355     vector<double> f_100(numComp);
1356     vector<double> f_101(numComp);
1357     vector<double> f_110(numComp);
1358     vector<double> f_111(numComp);
1359     #pragma omp for
1360 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1361     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1362     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1363     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1364     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1365     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1366     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1367     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1368     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1369     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1370     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1371     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1372 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1373 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1374     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1375     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1376 caltinay 3913 } // end of component loop i
1377     } // end of k0 loop
1378     } // end of k1 loop
1379     } // end of k2 loop
1380     } // end of parallel section
1381 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1382 caltinay 3760 out.requireWrite();
1383 caltinay 3722 #pragma omp parallel
1384     {
1385 caltinay 3913 vector<double> f_000(numComp);
1386     vector<double> f_001(numComp);
1387     vector<double> f_010(numComp);
1388     vector<double> f_011(numComp);
1389     vector<double> f_100(numComp);
1390     vector<double> f_101(numComp);
1391     vector<double> f_110(numComp);
1392     vector<double> f_111(numComp);
1393 caltinay 3722 if (m_faceOffset[0] > -1) {
1394     #pragma omp for nowait
1395 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1396     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1397     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1398     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1399     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1400     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1401     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1402     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1403     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1404     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1405     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1406 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1407 caltinay 4375 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1408     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1409     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1410     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1411     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1412 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1413     o[INDEX3(i,2,0,numComp,3)] = V2;
1414 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1415 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1416     o[INDEX3(i,2,1,numComp,3)] = V3;
1417 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1418 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1419     o[INDEX3(i,2,2,numComp,3)] = V2;
1420 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1421 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1422     o[INDEX3(i,2,3,numComp,3)] = V3;
1423 caltinay 3764 } // end of component loop i
1424     } // end of k1 loop
1425     } // end of k2 loop
1426     } // end of face 0
1427 caltinay 3722 if (m_faceOffset[1] > -1) {
1428     #pragma omp for nowait
1429 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1430     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1431     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1432     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1433     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1434     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1435     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1436     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1437     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1438     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1439     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1440 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1441 caltinay 4375 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1442     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1443     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1444     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1445     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1446 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1447     o[INDEX3(i,2,0,numComp,3)] = V2;
1448 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1449 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1450     o[INDEX3(i,2,1,numComp,3)] = V3;
1451 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1452 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1453     o[INDEX3(i,2,2,numComp,3)] = V2;
1454 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1455 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1456     o[INDEX3(i,2,3,numComp,3)] = V3;
1457 caltinay 3764 } // end of component loop i
1458     } // end of k1 loop
1459     } // end of k2 loop
1460     } // end of face 1
1461 caltinay 3722 if (m_faceOffset[2] > -1) {
1462     #pragma omp for nowait
1463 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1464     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1465     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1466     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1467     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1468     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1469     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1470     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1471     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1472     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1473     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1474 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1475 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1476     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1477     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1478 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1479 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1480 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V1;
1481     o[INDEX3(i,0,1,numComp,3)] = V0;
1482 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1483 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V2;
1484     o[INDEX3(i,0,2,numComp,3)] = V0;
1485 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1486 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V1;
1487     o[INDEX3(i,0,3,numComp,3)] = V0;
1488 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1489 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V2;
1490 caltinay 3764 } // end of component loop i
1491     } // end of k0 loop
1492     } // end of k2 loop
1493     } // end of face 2
1494 caltinay 3722 if (m_faceOffset[3] > -1) {
1495     #pragma omp for nowait
1496 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1497     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1498     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1499     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1500     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1501     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1502     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1503     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1504     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1505     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1506     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1507 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1508 caltinay 4375 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1509     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1510     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1511     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1512 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1513 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1514 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V2;
1515     o[INDEX3(i,0,1,numComp,3)] = V0;
1516 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1517 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V3;
1518     o[INDEX3(i,0,2,numComp,3)] = V1;
1519 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1520 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V2;
1521     o[INDEX3(i,0,3,numComp,3)] = V1;
1522 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1523 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V3;
1524 caltinay 3764 } // end of component loop i
1525     } // end of k0 loop
1526     } // end of k2 loop
1527     } // end of face 3
1528 caltinay 3722 if (m_faceOffset[4] > -1) {
1529     #pragma omp for nowait
1530 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1531     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1532     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1533     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1534     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1535     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1536     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1537     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1538     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1539     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1540     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1541 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1542 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1543     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1544     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1545     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1546 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1547     o[INDEX3(i,1,0,numComp,3)] = V2;
1548 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1549 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1550     o[INDEX3(i,1,1,numComp,3)] = V3;
1551 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1552 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1553     o[INDEX3(i,1,2,numComp,3)] = V2;
1554 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1555 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1556     o[INDEX3(i,1,3,numComp,3)] = V3;
1557 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1558 caltinay 3764 } // end of component loop i
1559     } // end of k0 loop
1560     } // end of k1 loop
1561     } // end of face 4
1562 caltinay 3722 if (m_faceOffset[5] > -1) {
1563     #pragma omp for nowait
1564 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1565     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1566     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1567     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1568     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1569     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1570     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1571     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1572     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1573     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1574     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1575 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1576 caltinay 4375 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1577     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1578     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1579     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1580 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1581     o[INDEX3(i,1,0,numComp,3)] = V2;
1582 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1583 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1584     o[INDEX3(i,1,1,numComp,3)] = V3;
1585 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1586 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1587     o[INDEX3(i,1,2,numComp,3)] = V2;
1588 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1589 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1590     o[INDEX3(i,1,3,numComp,3)] = V3;
1591 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5