/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4817 - (hide annotations)
Fri Mar 28 08:04:09 2014 UTC (4 years, 11 months ago) by caltinay
Original Path: trunk/ripley/src/Brick.cpp
File size: 165607 byte(s)
Coupler/Connector shared ptrs.

1 caltinay 3691
2 jfenwick 3981 /*****************************************************************************
3 caltinay 3691 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 caltinay 3691 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 caltinay 3691
17     #include <ripley/Brick.h>
18 caltinay 3791 #include <paso/SystemMatrix.h>
19 caltinay 4334 #include <esysUtils/esysFileWriter.h>
20 sshaw 4622 #include <ripley/DefaultAssembler3D.h>
21 sshaw 4629 #include <ripley/WaveAssembler3D.h>
22 sshaw 4712 #include <ripley/LameAssembler3D.h>
23     #include <ripley/domainhelpers.h>
24 caltinay 4477 #include <boost/scoped_array.hpp>
25    
26 caltinay 4013 #ifdef USE_NETCDF
27     #include <netcdfcpp.h>
28     #endif
29    
30 caltinay 3691 #if USE_SILO
31     #include <silo.h>
32     #ifdef ESYS_MPI
33     #include <pmpio.h>
34     #endif
35     #endif
36    
37     #include <iomanip>
38    
39 jfenwick 4650 #include "esysUtils/EsysRandom.h"
40     #include "blocktools.h"
41    
42    
43 caltinay 3691 using namespace std;
44 caltinay 4334 using esysUtils::FileWriter;
45 caltinay 3691
46     namespace ripley {
47    
48 sshaw 4712 int indexOfMax(int a, int b, int c) {
49     if (a > b) {
50     if (c > a) {
51     return 2;
52     }
53     return 0;
54     } else if (b > c) {
55     return 1;
56     }
57     return 2;
58     }
59    
60 caltinay 3781 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
61 sshaw 4622 double x1, double y1, double z1, int d0, int d1, int d2,
62     const std::vector<double>& points, const std::vector<int>& tags,
63 jfenwick 4638 const simap_t& tagnamestonums) :
64 caltinay 4334 RipleyDomain(3)
65 caltinay 3691 {
66 caltinay 3943 // ignore subdivision parameters for serial run
67     if (m_mpiInfo->size == 1) {
68     d0=1;
69     d1=1;
70     d2=1;
71     }
72     bool warn=false;
73 sshaw 4712
74     std::vector<int> factors;
75     int ranks = m_mpiInfo->size;
76     int epr[3] = {n0,n1,n2};
77     int d[3] = {d0,d1,d2};
78     if (d0<=0 || d1<=0 || d2<=0) {
79     for (int i = 0; i < 3; i++) {
80     if (d[i] < 1) {
81     d[i] = 1;
82     continue;
83 caltinay 3943 }
84 sshaw 4712 epr[i] = -1; // can no longer be max
85     if (ranks % d[i] != 0) {
86     throw RipleyException("Invalid number of spatial subdivisions");
87 caltinay 3943 }
88 sshaw 4712 //remove
89     ranks /= d[i];
90 caltinay 3943 }
91 sshaw 4712 factorise(factors, ranks);
92     if (factors.size() != 0) {
93     warn = true;
94 caltinay 3943 }
95 sshaw 4712 }
96     while (factors.size() > 0) {
97     int i = indexOfMax(epr[0],epr[1],epr[2]);
98     int f = factors.back();
99     factors.pop_back();
100     d[i] *= f;
101     epr[i] /= f;
102 caltinay 3943 }
103 sshaw 4712 d0 = d[0]; d1 = d[1]; d2 = d[2];
104 caltinay 3943
105 caltinay 3691 // ensure number of subdivisions is valid and nodes can be distributed
106     // among number of ranks
107 sshaw 4712 if (d0*d1*d2 != m_mpiInfo->size){
108 caltinay 3691 throw RipleyException("Invalid number of spatial subdivisions");
109 sshaw 4712 }
110 caltinay 3943 if (warn) {
111     cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
112     << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
113     }
114 caltinay 3691
115 caltinay 4334 double l0 = x1-x0;
116     double l1 = y1-y0;
117     double l2 = z1-z0;
118     m_dx[0] = l0/n0;
119     m_dx[1] = l1/n1;
120     m_dx[2] = l2/n2;
121    
122     if ((n0+1)%d0 > 0) {
123 caltinay 3943 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
124 caltinay 4334 l0=m_dx[0]*n0;
125 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N0="
126 caltinay 4334 << n0 << ", l0=" << l0 << endl;
127 caltinay 3943 }
128 caltinay 4334 if ((n1+1)%d1 > 0) {
129 caltinay 3943 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
130 caltinay 4334 l1=m_dx[1]*n1;
131 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N1="
132 caltinay 4334 << n1 << ", l1=" << l1 << endl;
133 caltinay 3943 }
134 caltinay 4334 if ((n2+1)%d2 > 0) {
135 caltinay 3943 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
136 caltinay 4334 l2=m_dx[2]*n2;
137 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N2="
138 caltinay 4334 << n2 << ", l2=" << l2 << endl;
139 caltinay 3943 }
140    
141 caltinay 4334 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
142 caltinay 3753 throw RipleyException("Too few elements for the number of ranks");
143    
144 caltinay 4334 m_gNE[0] = n0;
145     m_gNE[1] = n1;
146     m_gNE[2] = n2;
147     m_origin[0] = x0;
148     m_origin[1] = y0;
149     m_origin[2] = z0;
150     m_length[0] = l0;
151     m_length[1] = l1;
152     m_length[2] = l2;
153     m_NX[0] = d0;
154     m_NX[1] = d1;
155     m_NX[2] = d2;
156    
157 caltinay 3753 // local number of elements (including overlap)
158 caltinay 4334 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
159     if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
160     m_NE[0]++;
161     else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
162     m_ownNE[0]--;
163 caltinay 3764
164 caltinay 4334 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
165     if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
166     m_NE[1]++;
167     else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
168     m_ownNE[1]--;
169 caltinay 3764
170 caltinay 4334 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
171     if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
172     m_NE[2]++;
173     else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
174     m_ownNE[2]--;
175 caltinay 3753
176     // local number of nodes
177 caltinay 4334 m_NN[0] = m_NE[0]+1;
178     m_NN[1] = m_NE[1]+1;
179     m_NN[2] = m_NE[2]+1;
180 caltinay 3753
181 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
182 caltinay 4334 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
183     if (m_offset[0] > 0)
184     m_offset[0]--;
185     m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
186     if (m_offset[1] > 0)
187     m_offset[1]--;
188     m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
189     if (m_offset[2] > 0)
190     m_offset[2]--;
191 caltinay 3753
192 caltinay 3691 populateSampleIds();
193 caltinay 3756 createPattern();
194 sshaw 4622
195     assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
196 sshaw 4629 for (map<string, int>::const_iterator i = tagnamestonums.begin();
197     i != tagnamestonums.end(); i++) {
198     setTagMap(i->first, i->second);
199     }
200 sshaw 4622 addPoints(tags.size(), &points[0], &tags[0]);
201 caltinay 3691 }
202    
203    
204     Brick::~Brick()
205     {
206 caltinay 4800 paso::SystemMatrixPattern_free(m_pattern);
207 sshaw 4622 delete assembler;
208 caltinay 3691 }
209    
210     string Brick::getDescription() const
211     {
212     return "ripley::Brick";
213     }
214    
215     bool Brick::operator==(const AbstractDomain& other) const
216     {
217 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
218     if (o) {
219     return (RipleyDomain::operator==(other) &&
220 caltinay 4334 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
221     && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
222     && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
223     && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
224 caltinay 3744 }
225 caltinay 3691
226     return false;
227     }
228    
229 caltinay 4013 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
230 caltinay 4618 const ReaderParameters& params) const
231 caltinay 4013 {
232     #ifdef USE_NETCDF
233     // check destination function space
234     int myN0, myN1, myN2;
235     if (out.getFunctionSpace().getTypeCode() == Nodes) {
236 caltinay 4334 myN0 = m_NN[0];
237     myN1 = m_NN[1];
238     myN2 = m_NN[2];
239 caltinay 4013 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
240     out.getFunctionSpace().getTypeCode() == ReducedElements) {
241 caltinay 4334 myN0 = m_NE[0];
242     myN1 = m_NE[1];
243     myN2 = m_NE[2];
244 caltinay 4013 } else
245     throw RipleyException("readNcGrid(): invalid function space for output data object");
246    
247 caltinay 4615 if (params.first.size() != 3)
248 caltinay 4013 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
249    
250 caltinay 4615 if (params.numValues.size() != 3)
251 caltinay 4013 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
252    
253 caltinay 4615 if (params.multiplier.size() != 3)
254 caltinay 4277 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
255 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
256     if (params.multiplier[i]<1)
257 caltinay 4277 throw RipleyException("readNcGrid(): all multipliers must be positive");
258    
259 caltinay 4013 // check file existence and size
260     NcFile f(filename.c_str(), NcFile::ReadOnly);
261     if (!f.is_valid())
262     throw RipleyException("readNcGrid(): cannot open file");
263    
264     NcVar* var = f.get_var(varname.c_str());
265     if (!var)
266     throw RipleyException("readNcGrid(): invalid variable name");
267    
268     // TODO: rank>0 data support
269     const int numComp = out.getDataPointSize();
270     if (numComp > 1)
271     throw RipleyException("readNcGrid(): only scalar data supported");
272    
273     const int dims = var->num_dims();
274 caltinay 4477 boost::scoped_array<long> edges(var->edges());
275 caltinay 4013
276     // is this a slice of the data object (dims!=3)?
277     // note the expected ordering of edges (as in numpy: z,y,x)
278 caltinay 4615 if ( (dims==3 && (params.numValues[2] > edges[0] ||
279     params.numValues[1] > edges[1] ||
280     params.numValues[0] > edges[2]))
281     || (dims==2 && params.numValues[2]>1)
282     || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
283 caltinay 4013 throw RipleyException("readNcGrid(): not enough data in file");
284     }
285    
286     // check if this rank contributes anything
287 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
288     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
289     params.first[1] >= m_offset[1]+myN1 ||
290     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
291     params.first[2] >= m_offset[2]+myN2 ||
292     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
293 caltinay 4013 return;
294     }
295    
296     // now determine how much this rank has to write
297    
298     // first coordinates in data object to write to
299 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
300     const int first1 = max(0, params.first[1]-m_offset[1]);
301     const int first2 = max(0, params.first[2]-m_offset[2]);
302 caltinay 4618 // indices to first value in file (not accounting for reverse yet)
303     int idx0 = max(0, m_offset[0]-params.first[0]);
304     int idx1 = max(0, m_offset[1]-params.first[1]);
305     int idx2 = max(0, m_offset[2]-params.first[2]);
306 caltinay 4277 // number of values to read
307 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
308     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
309     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
310 caltinay 4013
311 caltinay 4618 // make sure we read the right block if going backwards through file
312     if (params.reverse[0])
313     idx0 = edges[dims-1]-num0-idx0;
314     if (dims>1 && params.reverse[1])
315     idx1 = edges[dims-2]-num1-idx1;
316     if (dims>2 && params.reverse[2])
317     idx2 = edges[dims-3]-num2-idx2;
318    
319    
320 caltinay 4013 vector<double> values(num0*num1*num2);
321     if (dims==3) {
322     var->set_cur(idx2, idx1, idx0);
323     var->get(&values[0], num2, num1, num0);
324     } else if (dims==2) {
325     var->set_cur(idx1, idx0);
326     var->get(&values[0], num1, num0);
327     } else {
328     var->set_cur(idx0);
329     var->get(&values[0], num0);
330     }
331    
332     const int dpp = out.getNumDataPointsPerSample();
333     out.requireWrite();
334    
335 caltinay 4618 // helpers for reversing
336     const int x0 = (params.reverse[0] ? num0-1 : 0);
337     const int x_mult = (params.reverse[0] ? -1 : 1);
338     const int y0 = (params.reverse[1] ? num1-1 : 0);
339     const int y_mult = (params.reverse[1] ? -1 : 1);
340     const int z0 = (params.reverse[2] ? num2-1 : 0);
341     const int z_mult = (params.reverse[2] ? -1 : 1);
342    
343 caltinay 4013 for (index_t z=0; z<num2; z++) {
344     for (index_t y=0; y<num1; y++) {
345     #pragma omp parallel for
346     for (index_t x=0; x<num0; x++) {
347 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
348     +(first1+y*params.multiplier[1])*myN0
349     +(first2+z*params.multiplier[2])*myN0*myN1;
350 caltinay 4618 const int srcIndex=(z0+z_mult*z)*num1*num0
351     +(y0+y_mult*y)*num0
352     +(x0+x_mult*x);
353 caltinay 4174 if (!isnan(values[srcIndex])) {
354 caltinay 4615 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
355     for (index_t m1=0; m1<params.multiplier[1]; m1++) {
356     for (index_t m0=0; m0<params.multiplier[0]; m0++) {
357 caltinay 4277 const int dataIndex = baseIndex+m0
358     +m1*myN0
359     +m2*myN0*myN1;
360     double* dest = out.getSampleDataRW(dataIndex);
361     for (index_t q=0; q<dpp; q++) {
362     *dest++ = values[srcIndex];
363     }
364     }
365     }
366 caltinay 4174 }
367 caltinay 4013 }
368     }
369     }
370     }
371     #else
372     throw RipleyException("readNcGrid(): not compiled with netCDF support");
373     #endif
374     }
375    
376 sshaw 4738 #ifdef USE_BOOSTIO
377     void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
378     const ReaderParameters& params) const
379     {
380     // the mapping is not universally correct but should work on our
381     // supported platforms
382     switch (params.dataType) {
383     case DATATYPE_INT32:
384     readBinaryGridZippedImpl<int>(out, filename, params);
385     break;
386     case DATATYPE_FLOAT32:
387     readBinaryGridZippedImpl<float>(out, filename, params);
388     break;
389     case DATATYPE_FLOAT64:
390     readBinaryGridZippedImpl<double>(out, filename, params);
391     break;
392     default:
393     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
394     }
395     }
396     #endif
397    
398 caltinay 4334 void Brick::readBinaryGrid(escript::Data& out, string filename,
399 caltinay 4618 const ReaderParameters& params) const
400 caltinay 4334 {
401 caltinay 4495 // the mapping is not universally correct but should work on our
402     // supported platforms
403 caltinay 4615 switch (params.dataType) {
404 caltinay 4495 case DATATYPE_INT32:
405 caltinay 4615 readBinaryGridImpl<int>(out, filename, params);
406 caltinay 4495 break;
407     case DATATYPE_FLOAT32:
408 caltinay 4615 readBinaryGridImpl<float>(out, filename, params);
409 caltinay 4495 break;
410     case DATATYPE_FLOAT64:
411 caltinay 4615 readBinaryGridImpl<double>(out, filename, params);
412 caltinay 4495 break;
413     default:
414     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
415     }
416     }
417    
418     template<typename ValueType>
419     void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
420 caltinay 4618 const ReaderParameters& params) const
421 caltinay 4495 {
422 caltinay 4334 // check destination function space
423     int myN0, myN1, myN2;
424     if (out.getFunctionSpace().getTypeCode() == Nodes) {
425     myN0 = m_NN[0];
426     myN1 = m_NN[1];
427     myN2 = m_NN[2];
428     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
429     out.getFunctionSpace().getTypeCode() == ReducedElements) {
430     myN0 = m_NE[0];
431     myN1 = m_NE[1];
432     myN2 = m_NE[2];
433     } else
434     throw RipleyException("readBinaryGrid(): invalid function space for output data object");
435    
436 caltinay 4615 if (params.first.size() != 3)
437 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
438    
439 caltinay 4615 if (params.numValues.size() != 3)
440 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
441    
442 caltinay 4615 if (params.multiplier.size() != 3)
443 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
444 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
445     if (params.multiplier[i]<1)
446 caltinay 4334 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
447    
448     // check file existence and size
449     ifstream f(filename.c_str(), ifstream::binary);
450     if (f.fail()) {
451     throw RipleyException("readBinaryGrid(): cannot open file");
452     }
453     f.seekg(0, ios::end);
454     const int numComp = out.getDataPointSize();
455     const int filesize = f.tellg();
456 caltinay 4615 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
457 caltinay 4334 if (filesize < reqsize) {
458     f.close();
459     throw RipleyException("readBinaryGrid(): not enough data in file");
460     }
461    
462     // check if this rank contributes anything
463 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
464     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
465     params.first[1] >= m_offset[1]+myN1 ||
466     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
467     params.first[2] >= m_offset[2]+myN2 ||
468     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
469 caltinay 4334 f.close();
470     return;
471     }
472    
473     // now determine how much this rank has to write
474    
475     // first coordinates in data object to write to
476 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
477     const int first1 = max(0, params.first[1]-m_offset[1]);
478     const int first2 = max(0, params.first[2]-m_offset[2]);
479 caltinay 4334 // indices to first value in file
480 caltinay 4615 const int idx0 = max(0, m_offset[0]-params.first[0]);
481     const int idx1 = max(0, m_offset[1]-params.first[1]);
482     const int idx2 = max(0, m_offset[2]-params.first[2]);
483 caltinay 4334 // number of values to read
484 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
485     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
486     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
487 caltinay 4334
488     out.requireWrite();
489 caltinay 4495 vector<ValueType> values(num0*numComp);
490 caltinay 4334 const int dpp = out.getNumDataPointsPerSample();
491    
492 caltinay 4529 for (int z=0; z<num2; z++) {
493     for (int y=0; y<num1; y++) {
494 caltinay 4615 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
495     +(idx2+z)*params.numValues[0]*params.numValues[1]);
496 caltinay 4495 f.seekg(fileofs*sizeof(ValueType));
497     f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
498 caltinay 4334
499 caltinay 4529 for (int x=0; x<num0; x++) {
500 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
501     +(first1+y*params.multiplier[1])*myN0
502     +(first2+z*params.multiplier[2])*myN0*myN1;
503     for (int m2=0; m2<params.multiplier[2]; m2++) {
504     for (int m1=0; m1<params.multiplier[1]; m1++) {
505     for (int m0=0; m0<params.multiplier[0]; m0++) {
506 caltinay 4334 const int dataIndex = baseIndex+m0
507     +m1*myN0
508     +m2*myN0*myN1;
509     double* dest = out.getSampleDataRW(dataIndex);
510 caltinay 4529 for (int c=0; c<numComp; c++) {
511     ValueType val = values[x*numComp+c];
512    
513 caltinay 4615 if (params.byteOrder != BYTEORDER_NATIVE) {
514 caltinay 4529 char* cval = reinterpret_cast<char*>(&val);
515     // this will alter val!!
516     byte_swap32(cval);
517     }
518     if (!std::isnan(val)) {
519     for (int q=0; q<dpp; q++) {
520     *dest++ = static_cast<double>(val);
521 caltinay 4334 }
522     }
523     }
524     }
525     }
526     }
527     }
528     }
529     }
530    
531     f.close();
532     }
533    
534 sshaw 4738 #ifdef USE_BOOSTIO
535     template<typename ValueType>
536     void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
537     const ReaderParameters& params) const
538     {
539     // check destination function space
540     int myN0, myN1, myN2;
541     if (out.getFunctionSpace().getTypeCode() == Nodes) {
542     myN0 = m_NN[0];
543     myN1 = m_NN[1];
544     myN2 = m_NN[2];
545     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
546     out.getFunctionSpace().getTypeCode() == ReducedElements) {
547     myN0 = m_NE[0];
548     myN1 = m_NE[1];
549     myN2 = m_NE[2];
550     } else
551     throw RipleyException("readBinaryGridFromZipped(): invalid function space for output data object");
552    
553     if (params.first.size() != 3)
554     throw RipleyException("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
555    
556     if (params.numValues.size() != 3)
557     throw RipleyException("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
558    
559     if (params.multiplier.size() != 3)
560     throw RipleyException("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
561     for (size_t i=0; i<params.multiplier.size(); i++)
562     if (params.multiplier[i]<1)
563     throw RipleyException("readBinaryGridFromZipped(): all multipliers must be positive");
564    
565     // check file existence and size
566     ifstream f(filename.c_str(), ifstream::binary);
567     if (f.fail()) {
568     throw RipleyException("readBinaryGridFromZipped(): cannot open file");
569     }
570     f.seekg(0, ios::end);
571     const int numComp = out.getDataPointSize();
572     int filesize = f.tellg();
573     f.seekg(0, ios::beg);
574     std::vector<char> compressed(filesize);
575     f.read((char*)&compressed[0], filesize);
576     f.close();
577     std::vector<char> decompressed = unzip(compressed);
578     filesize = decompressed.size();
579     const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
580     if (filesize < reqsize) {
581     throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
582     }
583    
584     // check if this rank contributes anything
585     if (params.first[0] >= m_offset[0]+myN0 ||
586     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
587     params.first[1] >= m_offset[1]+myN1 ||
588     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
589     params.first[2] >= m_offset[2]+myN2 ||
590     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
591     return;
592     }
593    
594     // now determine how much this rank has to write
595    
596     // first coordinates in data object to write to
597     const int first0 = max(0, params.first[0]-m_offset[0]);
598     const int first1 = max(0, params.first[1]-m_offset[1]);
599     const int first2 = max(0, params.first[2]-m_offset[2]);
600     // indices to first value in file
601     const int idx0 = max(0, m_offset[0]-params.first[0]);
602     const int idx1 = max(0, m_offset[1]-params.first[1]);
603     const int idx2 = max(0, m_offset[2]-params.first[2]);
604     // number of values to read
605     const int num0 = min(params.numValues[0]-idx0, myN0-first0);
606     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
607     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
608    
609     out.requireWrite();
610     vector<ValueType> values(num0*numComp);
611     const int dpp = out.getNumDataPointsPerSample();
612    
613     for (int z=0; z<num2; z++) {
614     for (int y=0; y<num1; y++) {
615     const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
616     +(idx2+z)*params.numValues[0]*params.numValues[1]);
617     memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
618    
619     for (int x=0; x<num0; x++) {
620     const int baseIndex = first0+x*params.multiplier[0]
621     +(first1+y*params.multiplier[1])*myN0
622     +(first2+z*params.multiplier[2])*myN0*myN1;
623     for (int m2=0; m2<params.multiplier[2]; m2++) {
624     for (int m1=0; m1<params.multiplier[1]; m1++) {
625     for (int m0=0; m0<params.multiplier[0]; m0++) {
626     const int dataIndex = baseIndex+m0
627     +m1*myN0
628     +m2*myN0*myN1;
629     double* dest = out.getSampleDataRW(dataIndex);
630     for (int c=0; c<numComp; c++) {
631     ValueType val = values[x*numComp+c];
632    
633     if (params.byteOrder != BYTEORDER_NATIVE) {
634     char* cval = reinterpret_cast<char*>(&val);
635     // this will alter val!!
636     byte_swap32(cval);
637     }
638     if (!std::isnan(val)) {
639     for (int q=0; q<dpp; q++) {
640     *dest++ = static_cast<double>(val);
641     }
642     }
643     }
644     }
645     }
646     }
647     }
648     }
649     }
650     }
651     #endif
652    
653 caltinay 4357 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
654     int byteOrder, int dataType) const
655 caltinay 4334 {
656 caltinay 4357 // the mapping is not universally correct but should work on our
657     // supported platforms
658     switch (dataType) {
659     case DATATYPE_INT32:
660     writeBinaryGridImpl<int>(in, filename, byteOrder);
661     break;
662     case DATATYPE_FLOAT32:
663     writeBinaryGridImpl<float>(in, filename, byteOrder);
664     break;
665     case DATATYPE_FLOAT64:
666     writeBinaryGridImpl<double>(in, filename, byteOrder);
667     break;
668     default:
669     throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
670     }
671     }
672    
673     template<typename ValueType>
674     void Brick::writeBinaryGridImpl(const escript::Data& in,
675     const string& filename, int byteOrder) const
676     {
677 caltinay 4334 // check function space and determine number of points
678     int myN0, myN1, myN2;
679     int totalN0, totalN1, totalN2;
680     if (in.getFunctionSpace().getTypeCode() == Nodes) {
681     myN0 = m_NN[0];
682     myN1 = m_NN[1];
683     myN2 = m_NN[2];
684     totalN0 = m_gNE[0]+1;
685     totalN1 = m_gNE[1]+1;
686     totalN2 = m_gNE[2]+1;
687     } else if (in.getFunctionSpace().getTypeCode() == Elements ||
688     in.getFunctionSpace().getTypeCode() == ReducedElements) {
689     myN0 = m_NE[0];
690     myN1 = m_NE[1];
691     myN2 = m_NE[2];
692     totalN0 = m_gNE[0];
693     totalN1 = m_gNE[1];
694     totalN2 = m_gNE[2];
695     } else
696     throw RipleyException("writeBinaryGrid(): invalid function space of data object");
697    
698     const int numComp = in.getDataPointSize();
699     const int dpp = in.getNumDataPointsPerSample();
700 caltinay 4357 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
701 caltinay 4334
702     if (numComp > 1 || dpp > 1)
703     throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
704    
705     // from here on we know that each sample consists of one value
706 caltinay 4482 FileWriter fw;
707     fw.openFile(filename, fileSize);
708 caltinay 4334 MPIBarrier();
709    
710     for (index_t z=0; z<myN2; z++) {
711     for (index_t y=0; y<myN1; y++) {
712     const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
713 caltinay 4357 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
714 caltinay 4334 ostringstream oss;
715    
716     for (index_t x=0; x<myN0; x++) {
717 caltinay 4626 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
718 caltinay 4357 ValueType fvalue = static_cast<ValueType>(*sample);
719     if (byteOrder == BYTEORDER_NATIVE) {
720 caltinay 4334 oss.write((char*)&fvalue, sizeof(fvalue));
721     } else {
722     char* value = reinterpret_cast<char*>(&fvalue);
723 caltinay 4357 oss.write(byte_swap32(value), sizeof(fvalue));
724 caltinay 4334 }
725     }
726 caltinay 4482 fw.writeAt(oss, fileofs);
727 caltinay 4334 }
728     }
729 caltinay 4482 fw.close();
730 caltinay 4334 }
731    
732 caltinay 3691 void Brick::dump(const string& fileName) const
733     {
734     #if USE_SILO
735     string fn(fileName);
736     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
737     fn+=".silo";
738     }
739    
740     int driver=DB_HDF5;
741     string siloPath;
742     DBfile* dbfile = NULL;
743    
744     #ifdef ESYS_MPI
745     PMPIO_baton_t* baton = NULL;
746 gross 3793 const int NUM_SILO_FILES = 1;
747     const char* blockDirFmt = "/block%04d";
748 caltinay 3691 #endif
749    
750     if (m_mpiInfo->size > 1) {
751     #ifdef ESYS_MPI
752     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
753     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
754     PMPIO_DefaultClose, (void*)&driver);
755     // try the fallback driver in case of error
756     if (!baton && driver != DB_PDB) {
757     driver = DB_PDB;
758     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
759     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
760     PMPIO_DefaultClose, (void*)&driver);
761     }
762     if (baton) {
763     char str[64];
764     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
765     siloPath = str;
766     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
767     }
768     #endif
769     } else {
770     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
771     getDescription().c_str(), driver);
772     // try the fallback driver in case of error
773     if (!dbfile && driver != DB_PDB) {
774     driver = DB_PDB;
775     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
776     getDescription().c_str(), driver);
777     }
778     }
779    
780     if (!dbfile)
781     throw RipleyException("dump: Could not create Silo file");
782    
783     /*
784     if (driver==DB_HDF5) {
785     // gzip level 1 already provides good compression with minimal
786     // performance penalty. Some tests showed that gzip levels >3 performed
787     // rather badly on escript data both in terms of time and space
788     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
789     }
790     */
791    
792 caltinay 4334 boost::scoped_ptr<double> x(new double[m_NN[0]]);
793     boost::scoped_ptr<double> y(new double[m_NN[1]]);
794     boost::scoped_ptr<double> z(new double[m_NN[2]]);
795 caltinay 3691 double* coords[3] = { x.get(), y.get(), z.get() };
796     #pragma omp parallel
797     {
798     #pragma omp for
799 caltinay 4334 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
800     coords[0][i0]=getLocalCoordinate(i0, 0);
801 caltinay 3691 }
802     #pragma omp for
803 caltinay 4334 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
804     coords[1][i1]=getLocalCoordinate(i1, 1);
805 caltinay 3691 }
806     #pragma omp for
807 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
808     coords[2][i2]=getLocalCoordinate(i2, 2);
809 caltinay 3691 }
810     }
811 caltinay 4334 int* dims = const_cast<int*>(getNumNodesPerDim());
812    
813     // write mesh
814     DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
815 caltinay 3691 DB_COLLINEAR, NULL);
816    
817 caltinay 4334 // write node ids
818     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
819 caltinay 3698 NULL, 0, DB_INT, DB_NODECENT, NULL);
820 caltinay 3691
821 caltinay 3698 // write element ids
822 caltinay 4334 dims = const_cast<int*>(getNumElementsPerDim());
823 caltinay 3698 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
824 caltinay 4334 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
825 caltinay 3698
826     // rank 0 writes multimesh and multivar
827 caltinay 3691 if (m_mpiInfo->rank == 0) {
828     vector<string> tempstrings;
829     vector<char*> names;
830     for (dim_t i=0; i<m_mpiInfo->size; i++) {
831     stringstream path;
832     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
833     tempstrings.push_back(path.str());
834     names.push_back((char*)tempstrings.back().c_str());
835     }
836     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
837     DBSetDir(dbfile, "/");
838     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
839     &types[0], NULL);
840     tempstrings.clear();
841     names.clear();
842     for (dim_t i=0; i<m_mpiInfo->size; i++) {
843     stringstream path;
844     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
845     tempstrings.push_back(path.str());
846     names.push_back((char*)tempstrings.back().c_str());
847     }
848     types.assign(m_mpiInfo->size, DB_QUADVAR);
849     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
850     &types[0], NULL);
851 caltinay 3698 tempstrings.clear();
852     names.clear();
853     for (dim_t i=0; i<m_mpiInfo->size; i++) {
854     stringstream path;
855     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
856     tempstrings.push_back(path.str());
857     names.push_back((char*)tempstrings.back().c_str());
858     }
859     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
860     &types[0], NULL);
861 caltinay 3691 }
862    
863     if (m_mpiInfo->size > 1) {
864     #ifdef ESYS_MPI
865     PMPIO_HandOffBaton(baton, dbfile);
866     PMPIO_Finish(baton);
867     #endif
868     } else {
869     DBClose(dbfile);
870     }
871    
872     #else // USE_SILO
873 caltinay 3791 throw RipleyException("dump: no Silo support");
874 caltinay 3691 #endif
875     }
876    
877     const int* Brick::borrowSampleReferenceIDs(int fsType) const
878     {
879 caltinay 3697 switch (fsType) {
880     case Nodes:
881 caltinay 3748 case ReducedNodes: //FIXME: reduced
882 caltinay 3753 return &m_nodeId[0];
883 caltinay 3757 case DegreesOfFreedom:
884     case ReducedDegreesOfFreedom: //FIXME: reduced
885 caltinay 3753 return &m_dofId[0];
886 caltinay 3697 case Elements:
887 caltinay 3733 case ReducedElements:
888 caltinay 3697 return &m_elementId[0];
889 caltinay 3757 case FaceElements:
890 caltinay 3733 case ReducedFaceElements:
891 caltinay 3697 return &m_faceId[0];
892 sshaw 4660 case Points:
893     return &m_diracPointNodeIDs[0];
894 caltinay 3697 default:
895     break;
896     }
897 caltinay 3691
898 caltinay 3697 stringstream msg;
899 caltinay 3791 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
900 caltinay 3697 throw RipleyException(msg.str());
901 caltinay 3691 }
902    
903 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
904 caltinay 3691 {
905 caltinay 3759 if (getMPISize()==1)
906     return true;
907    
908 caltinay 3757 switch (fsType) {
909     case Nodes:
910     case ReducedNodes: //FIXME: reduced
911     return (m_dofMap[id] < getNumDOF());
912     case DegreesOfFreedom:
913     case ReducedDegreesOfFreedom:
914     return true;
915     case Elements:
916     case ReducedElements:
917     {
918     // check ownership of element's _last_ node
919 caltinay 4334 const index_t x=id%m_NE[0] + 1;
920     const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
921     const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
922     return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
923 caltinay 3757 }
924     case FaceElements:
925     case ReducedFaceElements:
926 caltinay 3759 {
927     // check ownership of face element's last node
928     dim_t n=0;
929 caltinay 4334 for (size_t i=0; i<6; i++) {
930     n+=m_faceCount[i];
931 caltinay 3759 if (id<n) {
932 caltinay 4334 const index_t j=id-n+m_faceCount[i];
933 caltinay 3759 if (i>=4) { // front or back
934 caltinay 4334 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
935     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
936 caltinay 3759 } else if (i>=2) { // bottom or top
937 caltinay 4334 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
938     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
939 caltinay 3759 } else { // left or right
940 caltinay 4334 const index_t first=(i==0 ? 0 : m_NN[0]-1);
941     return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
942 caltinay 3759 }
943     }
944     }
945     return false;
946     }
947 caltinay 3757 default:
948     break;
949 caltinay 3753 }
950 caltinay 3757
951     stringstream msg;
952 caltinay 3791 msg << "ownSample: invalid function space type " << fsType;
953 caltinay 3757 throw RipleyException(msg.str());
954 caltinay 3691 }
955    
956 caltinay 3764 void Brick::setToNormal(escript::Data& out) const
957 caltinay 3703 {
958 caltinay 3764 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
959     out.requireWrite();
960     #pragma omp parallel
961     {
962     if (m_faceOffset[0] > -1) {
963     #pragma omp for nowait
964 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
965     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
966     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
967 caltinay 3764 // set vector at four quadrature points
968     *o++ = -1.; *o++ = 0.; *o++ = 0.;
969     *o++ = -1.; *o++ = 0.; *o++ = 0.;
970     *o++ = -1.; *o++ = 0.; *o++ = 0.;
971     *o++ = -1.; *o++ = 0.; *o = 0.;
972     }
973     }
974     }
975    
976     if (m_faceOffset[1] > -1) {
977     #pragma omp for nowait
978 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
979     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
980     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
981 caltinay 3764 // set vector at four quadrature points
982     *o++ = 1.; *o++ = 0.; *o++ = 0.;
983     *o++ = 1.; *o++ = 0.; *o++ = 0.;
984     *o++ = 1.; *o++ = 0.; *o++ = 0.;
985     *o++ = 1.; *o++ = 0.; *o = 0.;
986     }
987     }
988     }
989    
990     if (m_faceOffset[2] > -1) {
991     #pragma omp for nowait
992 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
993     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
994     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
995 caltinay 3764 // set vector at four quadrature points
996     *o++ = 0.; *o++ = -1.; *o++ = 0.;
997     *o++ = 0.; *o++ = -1.; *o++ = 0.;
998     *o++ = 0.; *o++ = -1.; *o++ = 0.;
999     *o++ = 0.; *o++ = -1.; *o = 0.;
1000     }
1001     }
1002     }
1003    
1004     if (m_faceOffset[3] > -1) {
1005     #pragma omp for nowait
1006 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1007     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1008     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1009 caltinay 3764 // set vector at four quadrature points
1010     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1011     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1012     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1013     *o++ = 0.; *o++ = 1.; *o = 0.;
1014     }
1015     }
1016     }
1017    
1018     if (m_faceOffset[4] > -1) {
1019     #pragma omp for nowait
1020 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1021     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1022     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1023 caltinay 3764 // set vector at four quadrature points
1024     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1025     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1026     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1027     *o++ = 0.; *o++ = 0.; *o = -1.;
1028     }
1029     }
1030     }
1031    
1032     if (m_faceOffset[5] > -1) {
1033     #pragma omp for nowait
1034 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1035     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1036     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1037 caltinay 3764 // set vector at four quadrature points
1038     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1039     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1040     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1041     *o++ = 0.; *o++ = 0.; *o = 1.;
1042     }
1043     }
1044     }
1045     } // end of parallel section
1046     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1047     out.requireWrite();
1048     #pragma omp parallel
1049     {
1050     if (m_faceOffset[0] > -1) {
1051     #pragma omp for nowait
1052 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1053     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1054     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1055 caltinay 3764 *o++ = -1.;
1056     *o++ = 0.;
1057     *o = 0.;
1058     }
1059     }
1060     }
1061    
1062     if (m_faceOffset[1] > -1) {
1063     #pragma omp for nowait
1064 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1065     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1066     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1067 caltinay 3764 *o++ = 1.;
1068     *o++ = 0.;
1069     *o = 0.;
1070     }
1071     }
1072     }
1073    
1074     if (m_faceOffset[2] > -1) {
1075     #pragma omp for nowait
1076 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1077     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1078     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1079 caltinay 3764 *o++ = 0.;
1080     *o++ = -1.;
1081     *o = 0.;
1082     }
1083     }
1084     }
1085    
1086     if (m_faceOffset[3] > -1) {
1087     #pragma omp for nowait
1088 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1089     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1090     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1091 caltinay 3764 *o++ = 0.;
1092     *o++ = 1.;
1093     *o = 0.;
1094     }
1095     }
1096     }
1097    
1098     if (m_faceOffset[4] > -1) {
1099     #pragma omp for nowait
1100 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1101     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1102     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1103 caltinay 3764 *o++ = 0.;
1104     *o++ = 0.;
1105     *o = -1.;
1106     }
1107     }
1108     }
1109    
1110     if (m_faceOffset[5] > -1) {
1111     #pragma omp for nowait
1112 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1113     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1114     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1115 caltinay 3764 *o++ = 0.;
1116     *o++ = 0.;
1117     *o = 1.;
1118     }
1119     }
1120     }
1121     } // end of parallel section
1122    
1123     } else {
1124     stringstream msg;
1125 caltinay 3791 msg << "setToNormal: invalid function space type "
1126     << out.getFunctionSpace().getTypeCode();
1127 caltinay 3764 throw RipleyException(msg.str());
1128     }
1129     }
1130    
1131     void Brick::setToSize(escript::Data& out) const
1132     {
1133     if (out.getFunctionSpace().getTypeCode() == Elements
1134     || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1135     out.requireWrite();
1136     const dim_t numQuad=out.getNumDataPointsPerSample();
1137 caltinay 4334 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1138 caltinay 3764 #pragma omp parallel for
1139     for (index_t k = 0; k < getNumElements(); ++k) {
1140     double* o = out.getSampleDataRW(k);
1141     fill(o, o+numQuad, size);
1142     }
1143     } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1144     || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1145     out.requireWrite();
1146     const dim_t numQuad=out.getNumDataPointsPerSample();
1147     #pragma omp parallel
1148     {
1149     if (m_faceOffset[0] > -1) {
1150 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1151 caltinay 3764 #pragma omp for nowait
1152 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1153     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1154     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1155 caltinay 3764 fill(o, o+numQuad, size);
1156     }
1157     }
1158     }
1159    
1160     if (m_faceOffset[1] > -1) {
1161 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1162 caltinay 3764 #pragma omp for nowait
1163 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1164     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1165     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1166 caltinay 3764 fill(o, o+numQuad, size);
1167     }
1168     }
1169     }
1170    
1171     if (m_faceOffset[2] > -1) {
1172 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1173 caltinay 3764 #pragma omp for nowait
1174 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1175     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1176     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1177 caltinay 3764 fill(o, o+numQuad, size);
1178     }
1179     }
1180     }
1181    
1182     if (m_faceOffset[3] > -1) {
1183 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1184 caltinay 3764 #pragma omp for nowait
1185 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1186     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1187     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1188 caltinay 3764 fill(o, o+numQuad, size);
1189     }
1190     }
1191     }
1192    
1193     if (m_faceOffset[4] > -1) {
1194 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1195 caltinay 3764 #pragma omp for nowait
1196 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1197     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1198     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1199 caltinay 3764 fill(o, o+numQuad, size);
1200     }
1201     }
1202     }
1203    
1204     if (m_faceOffset[5] > -1) {
1205 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1206 caltinay 3764 #pragma omp for nowait
1207 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1208     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1209     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1210 caltinay 3764 fill(o, o+numQuad, size);
1211     }
1212     }
1213     }
1214     } // end of parallel section
1215    
1216     } else {
1217     stringstream msg;
1218 caltinay 3791 msg << "setToSize: invalid function space type "
1219     << out.getFunctionSpace().getTypeCode();
1220 caltinay 3764 throw RipleyException(msg.str());
1221     }
1222     }
1223    
1224     void Brick::Print_Mesh_Info(const bool full) const
1225     {
1226     RipleyDomain::Print_Mesh_Info(full);
1227     if (full) {
1228     cout << " Id Coordinates" << endl;
1229     cout.precision(15);
1230     cout.setf(ios::scientific, ios::floatfield);
1231     for (index_t i=0; i < getNumNodes(); i++) {
1232     cout << " " << setw(5) << m_nodeId[i]
1233 caltinay 4334 << " " << getLocalCoordinate(i%m_NN[0], 0)
1234     << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1235     << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1236 caltinay 3764 }
1237     }
1238     }
1239    
1240    
1241     //protected
1242     void Brick::assembleCoordinates(escript::Data& arg) const
1243     {
1244     escriptDataC x = arg.getDataC();
1245     int numDim = m_numDim;
1246     if (!isDataPointShapeEqual(&x, 1, &numDim))
1247     throw RipleyException("setToX: Invalid Data object shape");
1248     if (!numSamplesEqual(&x, 1, getNumNodes()))
1249     throw RipleyException("setToX: Illegal number of samples in Data object");
1250    
1251     arg.requireWrite();
1252     #pragma omp parallel for
1253 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1254     for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1255     for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1256     double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1257     point[0] = getLocalCoordinate(i0, 0);
1258     point[1] = getLocalCoordinate(i1, 1);
1259     point[2] = getLocalCoordinate(i2, 2);
1260 caltinay 3764 }
1261     }
1262     }
1263     }
1264    
1265     //protected
1266 caltinay 4626 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1267 caltinay 3764 {
1268 caltinay 3703 const dim_t numComp = in.getDataPointSize();
1269 caltinay 3731 const double C0 = .044658198738520451079;
1270     const double C1 = .16666666666666666667;
1271     const double C2 = .21132486540518711775;
1272     const double C3 = .25;
1273     const double C4 = .5;
1274     const double C5 = .62200846792814621559;
1275     const double C6 = .78867513459481288225;
1276    
1277 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
1278 caltinay 3760 out.requireWrite();
1279 caltinay 3913 #pragma omp parallel
1280     {
1281     vector<double> f_000(numComp);
1282     vector<double> f_001(numComp);
1283     vector<double> f_010(numComp);
1284     vector<double> f_011(numComp);
1285     vector<double> f_100(numComp);
1286     vector<double> f_101(numComp);
1287     vector<double> f_110(numComp);
1288     vector<double> f_111(numComp);
1289     #pragma omp for
1290 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1291     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1292     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1293     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1294     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1295     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1296     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1297     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1298     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1299     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1300     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1301     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1302 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1303 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1304     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1305     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1306     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1307     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1308     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1309     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1310     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1311     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1312     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1313     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1314     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1315 caltinay 3913 o[INDEX3(i,0,0,numComp,3)] = V0;
1316     o[INDEX3(i,1,0,numComp,3)] = V4;
1317     o[INDEX3(i,2,0,numComp,3)] = V8;
1318     o[INDEX3(i,0,1,numComp,3)] = V0;
1319     o[INDEX3(i,1,1,numComp,3)] = V5;
1320     o[INDEX3(i,2,1,numComp,3)] = V9;
1321     o[INDEX3(i,0,2,numComp,3)] = V1;
1322     o[INDEX3(i,1,2,numComp,3)] = V4;
1323     o[INDEX3(i,2,2,numComp,3)] = V10;
1324     o[INDEX3(i,0,3,numComp,3)] = V1;
1325     o[INDEX3(i,1,3,numComp,3)] = V5;
1326     o[INDEX3(i,2,3,numComp,3)] = V11;
1327     o[INDEX3(i,0,4,numComp,3)] = V2;
1328     o[INDEX3(i,1,4,numComp,3)] = V6;
1329     o[INDEX3(i,2,4,numComp,3)] = V8;
1330     o[INDEX3(i,0,5,numComp,3)] = V2;
1331     o[INDEX3(i,1,5,numComp,3)] = V7;
1332     o[INDEX3(i,2,5,numComp,3)] = V9;
1333     o[INDEX3(i,0,6,numComp,3)] = V3;
1334     o[INDEX3(i,1,6,numComp,3)] = V6;
1335     o[INDEX3(i,2,6,numComp,3)] = V10;
1336     o[INDEX3(i,0,7,numComp,3)] = V3;
1337     o[INDEX3(i,1,7,numComp,3)] = V7;
1338     o[INDEX3(i,2,7,numComp,3)] = V11;
1339     } // end of component loop i
1340     } // end of k0 loop
1341     } // end of k1 loop
1342     } // end of k2 loop
1343     } // end of parallel section
1344 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1345 caltinay 3760 out.requireWrite();
1346 caltinay 3913 #pragma omp parallel
1347     {
1348     vector<double> f_000(numComp);
1349     vector<double> f_001(numComp);
1350     vector<double> f_010(numComp);
1351     vector<double> f_011(numComp);
1352     vector<double> f_100(numComp);
1353     vector<double> f_101(numComp);
1354     vector<double> f_110(numComp);
1355     vector<double> f_111(numComp);
1356     #pragma omp for
1357 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1358     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1359     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1360     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1361     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1362     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1363     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1364     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1365     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1366     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1367     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1368     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1369 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1370 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1371     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1372     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1373 caltinay 3913 } // end of component loop i
1374     } // end of k0 loop
1375     } // end of k1 loop
1376     } // end of k2 loop
1377     } // end of parallel section
1378 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1379 caltinay 3760 out.requireWrite();
1380 caltinay 3722 #pragma omp parallel
1381     {
1382 caltinay 3913 vector<double> f_000(numComp);
1383     vector<double> f_001(numComp);
1384     vector<double> f_010(numComp);
1385     vector<double> f_011(numComp);
1386     vector<double> f_100(numComp);
1387     vector<double> f_101(numComp);
1388     vector<double> f_110(numComp);
1389     vector<double> f_111(numComp);
1390 caltinay 3722 if (m_faceOffset[0] > -1) {
1391     #pragma omp for nowait
1392 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1393     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1394     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1395     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1396     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1397     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1398     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1399     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1400     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1401     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1402     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1403 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1404 caltinay 4375 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1405     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1406     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1407     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1408     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1409 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1410     o[INDEX3(i,2,0,numComp,3)] = V2;
1411 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1412 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1413     o[INDEX3(i,2,1,numComp,3)] = V3;
1414 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1415 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1416     o[INDEX3(i,2,2,numComp,3)] = V2;
1417 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1418 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1419     o[INDEX3(i,2,3,numComp,3)] = V3;
1420 caltinay 3764 } // end of component loop i
1421     } // end of k1 loop
1422     } // end of k2 loop
1423     } // end of face 0
1424 caltinay 3722 if (m_faceOffset[1] > -1) {
1425     #pragma omp for nowait
1426 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1427     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1428     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1429     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1430     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1431     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1432     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1433     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1434     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1435     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1436     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1437 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1438 caltinay 4375 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1439     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1440     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1441     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1442     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1443 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1444     o[INDEX3(i,2,0,numComp,3)] = V2;
1445 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1446 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1447     o[INDEX3(i,2,1,numComp,3)] = V3;
1448 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1449 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1450     o[INDEX3(i,2,2,numComp,3)] = V2;
1451 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1452 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1453     o[INDEX3(i,2,3,numComp,3)] = V3;
1454 caltinay 3764 } // end of component loop i
1455     } // end of k1 loop
1456     } // end of k2 loop
1457     } // end of face 1
1458 caltinay 3722 if (m_faceOffset[2] > -1) {
1459     #pragma omp for nowait
1460 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1461     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1462     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1463     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1464     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1465     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1466     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1467     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1468     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1469     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1470     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1471 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1472 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1473     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1474     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1475 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1476 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1477 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V1;
1478     o[INDEX3(i,0,1,numComp,3)] = V0;
1479 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1480 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V2;
1481     o[INDEX3(i,0,2,numComp,3)] = V0;
1482 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1483 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V1;
1484     o[INDEX3(i,0,3,numComp,3)] = V0;
1485 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1486 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V2;
1487 caltinay 3764 } // end of component loop i
1488     } // end of k0 loop
1489     } // end of k2 loop
1490     } // end of face 2
1491 caltinay 3722 if (m_faceOffset[3] > -1) {
1492     #pragma omp for nowait
1493 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1494     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1495     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1496     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1497     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1498     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1499     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1500     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1501     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1502     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1503     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1504 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1505 caltinay 4375 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1506     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1507     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1508     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1509 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1510 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1511 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V2;
1512     o[INDEX3(i,0,1,numComp,3)] = V0;
1513 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1514 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V3;
1515     o[INDEX3(i,0,2,numComp,3)] = V1;
1516 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1517 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V2;
1518     o[INDEX3(i,0,3,numComp,3)] = V1;
1519 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1520 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V3;
1521 caltinay 3764 } // end of component loop i
1522     } // end of k0 loop
1523     } // end of k2 loop
1524     } // end of face 3
1525 caltinay 3722 if (m_faceOffset[4] > -1) {
1526     #pragma omp for nowait
1527 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1528     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1529     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1530     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1531     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1532     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1533     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1534     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1535     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1536     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1537     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1538 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1539 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1540     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1541     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1542     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1543 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1544     o[INDEX3(i,1,0,numComp,3)] = V2;
1545 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1546 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1547     o[INDEX3(i,1,1,numComp,3)] = V3;
1548 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1549 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1550     o[INDEX3(i,1,2,numComp,3)] = V2;
1551 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1552 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1553     o[INDEX3(i,1,3,numComp,3)] = V3;
1554 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1555 caltinay 3764 } // end of component loop i
1556     } // end of k0 loop
1557     } // end of k1 loop
1558     } // end of face 4
1559 caltinay 3722 if (m_faceOffset[5] > -1) {
1560     #pragma omp for nowait
1561 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1562     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1563     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1564     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1565     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1566     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1567     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1568     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1569     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1570     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1571     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1572 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1573 caltinay 4375 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1574     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1575     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1576     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1577 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1578     o[INDEX3(i,1,0,numComp,3)] = V2;
1579 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1580 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1581     o[INDEX3(i,1,1,numComp,3)] = V3;
1582 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1583 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1584     o[INDEX3(i,1,2,numComp,3)] = V2;
1585 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1586 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1587     o[INDEX3(i,1,3,numComp,3)] = V3;
1588 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1589 caltinay 3764