/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5136 - (hide annotations)
Tue Sep 9 07:13:55 2014 UTC (4 years, 7 months ago) by caltinay
File size: 169249 byte(s)
ripley now supports paso solvers again and returns an appropriate matrix type
id. Changed the getSystemMatrixTypeId() method to take a full SolverBuddy
instance and made some other simplifications.

1 caltinay 3691
2 jfenwick 3981 /*****************************************************************************
3 caltinay 3691 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 caltinay 3691 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 caltinay 3691
17     #include <ripley/Brick.h>
18 sshaw 4622 #include <ripley/DefaultAssembler3D.h>
19 caltinay 5122 #include <ripley/LameAssembler3D.h>
20 sshaw 4629 #include <ripley/WaveAssembler3D.h>
21 caltinay 5084 #include <ripley/blocktools.h>
22 sshaw 4712 #include <ripley/domainhelpers.h>
23 caltinay 5084 #include <esysUtils/esysFileWriter.h>
24     #include <esysUtils/EsysRandom.h>
25     #include <paso/SystemMatrix.h>
26    
27 caltinay 4477 #include <boost/scoped_array.hpp>
28    
29 caltinay 4013 #ifdef USE_NETCDF
30     #include <netcdfcpp.h>
31     #endif
32    
33 caltinay 3691 #if USE_SILO
34     #include <silo.h>
35     #ifdef ESYS_MPI
36     #include <pmpio.h>
37     #endif
38     #endif
39    
40     #include <iomanip>
41 caltinay 5084 #include <limits>
42 caltinay 3691
43 caltinay 5122 namespace bp = boost::python;
44 caltinay 3691 using namespace std;
45 caltinay 4334 using esysUtils::FileWriter;
46 caltinay 5122 using escript::AbstractSystemMatrix;
47 caltinay 3691
48     namespace ripley {
49    
50 caltinay 5122 inline int indexOfMax(dim_t a, dim_t b, dim_t c)
51     {
52 sshaw 4712 if (a > b) {
53     if (c > a) {
54     return 2;
55     }
56     return 0;
57     } else if (b > c) {
58     return 1;
59     }
60     return 2;
61     }
62    
63 caltinay 5122 Brick::Brick(dim_t n0, dim_t n1, dim_t n2, double x0, double y0, double z0,
64 sshaw 4622 double x1, double y1, double z1, int d0, int d1, int d2,
65 caltinay 5122 const vector<double>& points, const vector<int>& tags,
66 caltinay 5136 const TagMap& tagnamestonums,
67 jfenwick 4934 escript::SubWorld_ptr w) :
68     RipleyDomain(3, w)
69 caltinay 3691 {
70 caltinay 5122 if (static_cast<long>(n0 + 1) * static_cast<long>(n1 + 1)
71     * static_cast<long>(n2 + 1) > numeric_limits<int>::max())
72 sshaw 4861 throw RipleyException("The number of elements has overflowed, this "
73     "limit may be raised in future releases.");
74    
75 sshaw 4851 if (n0 <= 0 || n1 <= 0 || n2 <= 0)
76     throw RipleyException("Number of elements in each spatial dimension "
77     "must be positive");
78    
79 caltinay 3943 // ignore subdivision parameters for serial run
80     if (m_mpiInfo->size == 1) {
81     d0=1;
82     d1=1;
83     d2=1;
84     }
85     bool warn=false;
86 sshaw 4712
87 caltinay 5122 vector<int> factors;
88 sshaw 4712 int ranks = m_mpiInfo->size;
89 caltinay 5122 dim_t epr[3] = {n0,n1,n2};
90 sshaw 4712 int d[3] = {d0,d1,d2};
91     if (d0<=0 || d1<=0 || d2<=0) {
92     for (int i = 0; i < 3; i++) {
93     if (d[i] < 1) {
94     d[i] = 1;
95     continue;
96 caltinay 3943 }
97 sshaw 4712 epr[i] = -1; // can no longer be max
98     if (ranks % d[i] != 0) {
99     throw RipleyException("Invalid number of spatial subdivisions");
100 caltinay 3943 }
101 caltinay 5122 //remove
102 sshaw 4712 ranks /= d[i];
103 caltinay 3943 }
104 sshaw 4712 factorise(factors, ranks);
105     if (factors.size() != 0) {
106     warn = true;
107 caltinay 3943 }
108 caltinay 5122 }
109 sshaw 4712 while (factors.size() > 0) {
110     int i = indexOfMax(epr[0],epr[1],epr[2]);
111     int f = factors.back();
112     factors.pop_back();
113     d[i] *= f;
114     epr[i] /= f;
115 caltinay 3943 }
116 sshaw 4712 d0 = d[0]; d1 = d[1]; d2 = d[2];
117 caltinay 3943
118 caltinay 3691 // ensure number of subdivisions is valid and nodes can be distributed
119     // among number of ranks
120 sshaw 4712 if (d0*d1*d2 != m_mpiInfo->size){
121 caltinay 3691 throw RipleyException("Invalid number of spatial subdivisions");
122 sshaw 4712 }
123 caltinay 3943 if (warn) {
124     cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
125     << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
126     }
127 caltinay 3691
128 caltinay 4334 double l0 = x1-x0;
129     double l1 = y1-y0;
130     double l2 = z1-z0;
131     m_dx[0] = l0/n0;
132     m_dx[1] = l1/n1;
133     m_dx[2] = l2/n2;
134    
135     if ((n0+1)%d0 > 0) {
136 caltinay 5122 n0=(dim_t)round((float)(n0+1)/d0+0.5)*d0-1;
137 caltinay 4334 l0=m_dx[0]*n0;
138 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N0="
139 caltinay 4334 << n0 << ", l0=" << l0 << endl;
140 caltinay 3943 }
141 caltinay 4334 if ((n1+1)%d1 > 0) {
142 caltinay 5122 n1=(dim_t)round((float)(n1+1)/d1+0.5)*d1-1;
143 caltinay 4334 l1=m_dx[1]*n1;
144 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N1="
145 caltinay 4334 << n1 << ", l1=" << l1 << endl;
146 caltinay 3943 }
147 caltinay 4334 if ((n2+1)%d2 > 0) {
148 caltinay 5122 n2=(dim_t)round((float)(n2+1)/d2+0.5)*d2-1;
149 caltinay 4334 l2=m_dx[2]*n2;
150 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N2="
151 caltinay 4334 << n2 << ", l2=" << l2 << endl;
152 caltinay 3943 }
153    
154 caltinay 4334 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
155 caltinay 3753 throw RipleyException("Too few elements for the number of ranks");
156    
157 caltinay 4334 m_gNE[0] = n0;
158     m_gNE[1] = n1;
159     m_gNE[2] = n2;
160     m_origin[0] = x0;
161     m_origin[1] = y0;
162     m_origin[2] = z0;
163     m_length[0] = l0;
164     m_length[1] = l1;
165     m_length[2] = l2;
166     m_NX[0] = d0;
167     m_NX[1] = d1;
168     m_NX[2] = d2;
169    
170 caltinay 3753 // local number of elements (including overlap)
171 caltinay 4334 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
172     if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
173     m_NE[0]++;
174     else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
175     m_ownNE[0]--;
176 caltinay 3764
177 caltinay 4334 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
178     if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
179     m_NE[1]++;
180     else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
181     m_ownNE[1]--;
182 caltinay 3764
183 caltinay 4334 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
184     if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
185     m_NE[2]++;
186     else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
187     m_ownNE[2]--;
188 caltinay 3753
189     // local number of nodes
190 caltinay 4334 m_NN[0] = m_NE[0]+1;
191     m_NN[1] = m_NE[1]+1;
192     m_NN[2] = m_NE[2]+1;
193 caltinay 3753
194 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
195 caltinay 4334 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
196     if (m_offset[0] > 0)
197     m_offset[0]--;
198     m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
199     if (m_offset[1] > 0)
200     m_offset[1]--;
201     m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
202     if (m_offset[2] > 0)
203     m_offset[2]--;
204 caltinay 3753
205 caltinay 3691 populateSampleIds();
206 caltinay 5122
207 caltinay 5136 for (TagMap::const_iterator i = tagnamestonums.begin();
208 sshaw 4629 i != tagnamestonums.end(); i++) {
209     setTagMap(i->first, i->second);
210     }
211 caltinay 5122 addPoints(points, tags);
212 caltinay 3691 }
213    
214     Brick::~Brick()
215     {
216     }
217    
218     string Brick::getDescription() const
219     {
220     return "ripley::Brick";
221     }
222    
223     bool Brick::operator==(const AbstractDomain& other) const
224     {
225 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
226     if (o) {
227     return (RipleyDomain::operator==(other) &&
228 caltinay 4334 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
229     && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
230     && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
231     && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
232 caltinay 3744 }
233 caltinay 3691
234     return false;
235     }
236    
237 caltinay 4013 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
238 caltinay 4618 const ReaderParameters& params) const
239 caltinay 4013 {
240     #ifdef USE_NETCDF
241     // check destination function space
242 caltinay 5122 dim_t myN0, myN1, myN2;
243 caltinay 4013 if (out.getFunctionSpace().getTypeCode() == Nodes) {
244 caltinay 4334 myN0 = m_NN[0];
245     myN1 = m_NN[1];
246     myN2 = m_NN[2];
247 caltinay 4013 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
248     out.getFunctionSpace().getTypeCode() == ReducedElements) {
249 caltinay 4334 myN0 = m_NE[0];
250     myN1 = m_NE[1];
251     myN2 = m_NE[2];
252 caltinay 4013 } else
253     throw RipleyException("readNcGrid(): invalid function space for output data object");
254    
255 caltinay 4615 if (params.first.size() != 3)
256 caltinay 4013 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
257    
258 caltinay 4615 if (params.numValues.size() != 3)
259 caltinay 4013 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
260    
261 caltinay 4615 if (params.multiplier.size() != 3)
262 caltinay 4277 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
263 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
264     if (params.multiplier[i]<1)
265 caltinay 4277 throw RipleyException("readNcGrid(): all multipliers must be positive");
266    
267 caltinay 4013 // check file existence and size
268     NcFile f(filename.c_str(), NcFile::ReadOnly);
269     if (!f.is_valid())
270     throw RipleyException("readNcGrid(): cannot open file");
271    
272     NcVar* var = f.get_var(varname.c_str());
273     if (!var)
274     throw RipleyException("readNcGrid(): invalid variable name");
275    
276     // TODO: rank>0 data support
277     const int numComp = out.getDataPointSize();
278     if (numComp > 1)
279     throw RipleyException("readNcGrid(): only scalar data supported");
280    
281     const int dims = var->num_dims();
282 caltinay 4477 boost::scoped_array<long> edges(var->edges());
283 caltinay 4013
284     // is this a slice of the data object (dims!=3)?
285     // note the expected ordering of edges (as in numpy: z,y,x)
286 caltinay 4615 if ( (dims==3 && (params.numValues[2] > edges[0] ||
287     params.numValues[1] > edges[1] ||
288     params.numValues[0] > edges[2]))
289     || (dims==2 && params.numValues[2]>1)
290     || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
291 caltinay 4013 throw RipleyException("readNcGrid(): not enough data in file");
292     }
293    
294     // check if this rank contributes anything
295 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
296     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
297     params.first[1] >= m_offset[1]+myN1 ||
298     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
299     params.first[2] >= m_offset[2]+myN2 ||
300     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
301 caltinay 4013 return;
302     }
303    
304     // now determine how much this rank has to write
305    
306     // first coordinates in data object to write to
307 caltinay 5122 const dim_t first0 = max(0, params.first[0]-m_offset[0]);
308     const dim_t first1 = max(0, params.first[1]-m_offset[1]);
309     const dim_t first2 = max(0, params.first[2]-m_offset[2]);
310 caltinay 4618 // indices to first value in file (not accounting for reverse yet)
311 caltinay 5122 dim_t idx0 = max(0, m_offset[0]-params.first[0]);
312     dim_t idx1 = max(0, m_offset[1]-params.first[1]);
313     dim_t idx2 = max(0, m_offset[2]-params.first[2]);
314 caltinay 4277 // number of values to read
315 caltinay 5122 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
316     const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
317     const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
318 caltinay 4013
319 caltinay 4618 // make sure we read the right block if going backwards through file
320     if (params.reverse[0])
321     idx0 = edges[dims-1]-num0-idx0;
322     if (dims>1 && params.reverse[1])
323     idx1 = edges[dims-2]-num1-idx1;
324     if (dims>2 && params.reverse[2])
325     idx2 = edges[dims-3]-num2-idx2;
326    
327    
328 caltinay 4013 vector<double> values(num0*num1*num2);
329     if (dims==3) {
330     var->set_cur(idx2, idx1, idx0);
331     var->get(&values[0], num2, num1, num0);
332     } else if (dims==2) {
333     var->set_cur(idx1, idx0);
334     var->get(&values[0], num1, num0);
335     } else {
336     var->set_cur(idx0);
337     var->get(&values[0], num0);
338     }
339    
340     const int dpp = out.getNumDataPointsPerSample();
341     out.requireWrite();
342    
343 caltinay 4618 // helpers for reversing
344 caltinay 5122 const dim_t x0 = (params.reverse[0] ? num0-1 : 0);
345 caltinay 4618 const int x_mult = (params.reverse[0] ? -1 : 1);
346 caltinay 5122 const dim_t y0 = (params.reverse[1] ? num1-1 : 0);
347 caltinay 4618 const int y_mult = (params.reverse[1] ? -1 : 1);
348 caltinay 5122 const dim_t z0 = (params.reverse[2] ? num2-1 : 0);
349 caltinay 4618 const int z_mult = (params.reverse[2] ? -1 : 1);
350    
351 caltinay 4013 for (index_t z=0; z<num2; z++) {
352     for (index_t y=0; y<num1; y++) {
353     #pragma omp parallel for
354     for (index_t x=0; x<num0; x++) {
355 caltinay 5122 const dim_t baseIndex = first0+x*params.multiplier[0]
356 caltinay 4615 +(first1+y*params.multiplier[1])*myN0
357     +(first2+z*params.multiplier[2])*myN0*myN1;
358 caltinay 5122 const dim_t srcIndex=(z0+z_mult*z)*num1*num0
359 caltinay 4618 +(y0+y_mult*y)*num0
360     +(x0+x_mult*x);
361 caltinay 4174 if (!isnan(values[srcIndex])) {
362 caltinay 4615 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
363     for (index_t m1=0; m1<params.multiplier[1]; m1++) {
364     for (index_t m0=0; m0<params.multiplier[0]; m0++) {
365 caltinay 5122 const dim_t dataIndex = baseIndex+m0
366 caltinay 4277 +m1*myN0
367     +m2*myN0*myN1;
368     double* dest = out.getSampleDataRW(dataIndex);
369     for (index_t q=0; q<dpp; q++) {
370     *dest++ = values[srcIndex];
371     }
372     }
373     }
374 caltinay 4174 }
375 caltinay 4013 }
376     }
377     }
378     }
379     #else
380     throw RipleyException("readNcGrid(): not compiled with netCDF support");
381     #endif
382     }
383    
384 sshaw 4738 #ifdef USE_BOOSTIO
385     void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
386     const ReaderParameters& params) const
387     {
388     // the mapping is not universally correct but should work on our
389     // supported platforms
390     switch (params.dataType) {
391     case DATATYPE_INT32:
392     readBinaryGridZippedImpl<int>(out, filename, params);
393     break;
394     case DATATYPE_FLOAT32:
395     readBinaryGridZippedImpl<float>(out, filename, params);
396     break;
397     case DATATYPE_FLOAT64:
398     readBinaryGridZippedImpl<double>(out, filename, params);
399     break;
400     default:
401     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
402     }
403     }
404     #endif
405    
406 caltinay 4334 void Brick::readBinaryGrid(escript::Data& out, string filename,
407 caltinay 4618 const ReaderParameters& params) const
408 caltinay 4334 {
409 caltinay 4495 // the mapping is not universally correct but should work on our
410     // supported platforms
411 caltinay 4615 switch (params.dataType) {
412 caltinay 4495 case DATATYPE_INT32:
413 caltinay 4615 readBinaryGridImpl<int>(out, filename, params);
414 caltinay 4495 break;
415     case DATATYPE_FLOAT32:
416 caltinay 4615 readBinaryGridImpl<float>(out, filename, params);
417 caltinay 4495 break;
418     case DATATYPE_FLOAT64:
419 caltinay 4615 readBinaryGridImpl<double>(out, filename, params);
420 caltinay 4495 break;
421     default:
422     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
423     }
424     }
425    
426     template<typename ValueType>
427     void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
428 caltinay 4618 const ReaderParameters& params) const
429 caltinay 4495 {
430 caltinay 4334 // check destination function space
431 caltinay 5122 dim_t myN0, myN1, myN2;
432 caltinay 4334 if (out.getFunctionSpace().getTypeCode() == Nodes) {
433     myN0 = m_NN[0];
434     myN1 = m_NN[1];
435     myN2 = m_NN[2];
436     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
437     out.getFunctionSpace().getTypeCode() == ReducedElements) {
438     myN0 = m_NE[0];
439     myN1 = m_NE[1];
440     myN2 = m_NE[2];
441     } else
442     throw RipleyException("readBinaryGrid(): invalid function space for output data object");
443    
444 caltinay 4615 if (params.first.size() != 3)
445 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
446    
447 caltinay 4615 if (params.numValues.size() != 3)
448 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
449    
450 caltinay 4615 if (params.multiplier.size() != 3)
451 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
452 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
453     if (params.multiplier[i]<1)
454 caltinay 4334 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
455 caltinay 4988 if (params.reverse[0] != 0 || params.reverse[1] != 0)
456     throw RipleyException("readBinaryGrid(): reversing only supported in Z-direction currently");
457 caltinay 4334
458     // check file existence and size
459     ifstream f(filename.c_str(), ifstream::binary);
460     if (f.fail()) {
461     throw RipleyException("readBinaryGrid(): cannot open file");
462     }
463     f.seekg(0, ios::end);
464     const int numComp = out.getDataPointSize();
465 caltinay 5122 const dim_t filesize = f.tellg();
466     const dim_t reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
467 caltinay 4334 if (filesize < reqsize) {
468     f.close();
469     throw RipleyException("readBinaryGrid(): not enough data in file");
470     }
471    
472     // check if this rank contributes anything
473 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
474     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
475     params.first[1] >= m_offset[1]+myN1 ||
476     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
477     params.first[2] >= m_offset[2]+myN2 ||
478     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
479 caltinay 4334 f.close();
480     return;
481     }
482    
483     // now determine how much this rank has to write
484    
485     // first coordinates in data object to write to
486 caltinay 5122 const dim_t first0 = max(0, params.first[0]-m_offset[0]);
487     const dim_t first1 = max(0, params.first[1]-m_offset[1]);
488     const dim_t first2 = max(0, params.first[2]-m_offset[2]);
489 caltinay 4988 // indices to first value in file (not accounting for reverse yet)
490 caltinay 5122 dim_t idx0 = max(0, (m_offset[0]/params.multiplier[0])-params.first[0]);
491     dim_t idx1 = max(0, (m_offset[1]/params.multiplier[1])-params.first[1]);
492     dim_t idx2 = max(0, (m_offset[2]/params.multiplier[2])-params.first[2]);
493     // if restX > 0 the first value in the respective dimension has been
494     // written restX times already in a previous rank so this rank only
495     // contributes (multiplier-rank) copies of that value
496     const dim_t rest0 = m_offset[0]%params.multiplier[0];
497     const dim_t rest1 = m_offset[1]%params.multiplier[1];
498     const dim_t rest2 = m_offset[2]%params.multiplier[2];
499    
500 caltinay 4334 // number of values to read
501 caltinay 5122 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
502     const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
503     const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
504 caltinay 4334
505 caltinay 4988 // make sure we read the right block if going backwards through file
506     if (params.reverse[2])
507     idx2 = params.numValues[2]-idx2-1;
508    
509     // helpers for reversing
510     const int z_mult = (params.reverse[2] ? -1 : 1);
511    
512 caltinay 4334 out.requireWrite();
513 caltinay 4495 vector<ValueType> values(num0*numComp);
514 caltinay 4334 const int dpp = out.getNumDataPointsPerSample();
515    
516 caltinay 5122 for (dim_t z=0; z<num2; z++) {
517     const dim_t m2limit = (z==0 ? params.multiplier[2]-rest2 : params.multiplier[2]);
518     dim_t dataZbase = first2 + z*params.multiplier[2];
519     if (z>0)
520     dataZbase -= rest2;
521    
522     for (dim_t y=0; y<num1; y++) {
523     const dim_t fileofs = numComp*(idx0 +
524 caltinay 4988 (idx1+y)*params.numValues[0] +
525     (idx2+z_mult*z)*params.numValues[0]*params.numValues[1]);
526 caltinay 4495 f.seekg(fileofs*sizeof(ValueType));
527     f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
528 caltinay 5122 const dim_t m1limit = (y==0 ? params.multiplier[1]-rest1 : params.multiplier[1]);
529     dim_t dataYbase = first1 + y*params.multiplier[1];
530     if (y>0)
531     dataYbase -= rest1;
532 caltinay 4334
533 caltinay 5122 for (dim_t x=0; x<num0; x++) {
534     const dim_t m0limit = (x==0 ? params.multiplier[0]-rest0 : params.multiplier[0]);
535     dim_t dataXbase = first0 + x*params.multiplier[0];
536     if (x>0)
537     dataXbase -= rest0;
538     // write a block of mult0 x mult1 x mult2 identical values into
539     // Data object
540     for (dim_t m2=0; m2 < m2limit; m2++) {
541     const dim_t dataZ = dataZbase + m2;
542     if (dataZ >= myN2)
543     break;
544     for (dim_t m1=0; m1 < m1limit; m1++) {
545     const dim_t dataY = dataYbase + m1;
546     if (dataY >= myN1)
547     break;
548     for (dim_t m0=0; m0 < m0limit; m0++) {
549     const dim_t dataX = dataXbase + m0;
550     if (dataX >= myN0)
551     break;
552     const dim_t dataIndex = dataX + dataY*myN0 + dataZ*myN0*myN1;
553 caltinay 4334 double* dest = out.getSampleDataRW(dataIndex);
554 caltinay 4529 for (int c=0; c<numComp; c++) {
555     ValueType val = values[x*numComp+c];
556    
557 caltinay 4615 if (params.byteOrder != BYTEORDER_NATIVE) {
558 caltinay 4529 char* cval = reinterpret_cast<char*>(&val);
559     // this will alter val!!
560 caltinay 5084 if (sizeof(ValueType)>4) {
561     byte_swap64(cval);
562     } else {
563     byte_swap32(cval);
564     }
565 caltinay 4529 }
566 caltinay 5084 if (!isnan(val)) {
567 caltinay 4529 for (int q=0; q<dpp; q++) {
568     *dest++ = static_cast<double>(val);
569 caltinay 4334 }
570     }
571     }
572     }
573     }
574     }
575     }
576     }
577     }
578    
579     f.close();
580     }
581    
582 sshaw 4738 #ifdef USE_BOOSTIO
583     template<typename ValueType>
584     void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
585     const ReaderParameters& params) const
586     {
587     // check destination function space
588 caltinay 5122 dim_t myN0, myN1, myN2;
589 sshaw 4738 if (out.getFunctionSpace().getTypeCode() == Nodes) {
590     myN0 = m_NN[0];
591     myN1 = m_NN[1];
592     myN2 = m_NN[2];
593     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
594     out.getFunctionSpace().getTypeCode() == ReducedElements) {
595     myN0 = m_NE[0];
596     myN1 = m_NE[1];
597     myN2 = m_NE[2];
598     } else
599     throw RipleyException("readBinaryGridFromZipped(): invalid function space for output data object");
600    
601     if (params.first.size() != 3)
602     throw RipleyException("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
603    
604     if (params.numValues.size() != 3)
605     throw RipleyException("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
606    
607     if (params.multiplier.size() != 3)
608     throw RipleyException("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
609     for (size_t i=0; i<params.multiplier.size(); i++)
610     if (params.multiplier[i]<1)
611     throw RipleyException("readBinaryGridFromZipped(): all multipliers must be positive");
612    
613     // check file existence and size
614     ifstream f(filename.c_str(), ifstream::binary);
615     if (f.fail()) {
616     throw RipleyException("readBinaryGridFromZipped(): cannot open file");
617     }
618     f.seekg(0, ios::end);
619     const int numComp = out.getDataPointSize();
620 caltinay 5122 dim_t filesize = f.tellg();
621 sshaw 4738 f.seekg(0, ios::beg);
622 caltinay 5122 vector<char> compressed(filesize);
623 sshaw 4738 f.read((char*)&compressed[0], filesize);
624     f.close();
625 caltinay 5122 vector<char> decompressed = unzip(compressed);
626 sshaw 4738 filesize = decompressed.size();
627 caltinay 5122 const dim_t reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
628 sshaw 4738 if (filesize < reqsize) {
629     throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
630     }
631    
632     // check if this rank contributes anything
633     if (params.first[0] >= m_offset[0]+myN0 ||
634     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
635     params.first[1] >= m_offset[1]+myN1 ||
636     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
637     params.first[2] >= m_offset[2]+myN2 ||
638     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
639     return;
640     }
641    
642     // now determine how much this rank has to write
643    
644     // first coordinates in data object to write to
645 caltinay 5122 const dim_t first0 = max(0, params.first[0]-m_offset[0]);
646     const dim_t first1 = max(0, params.first[1]-m_offset[1]);
647     const dim_t first2 = max(0, params.first[2]-m_offset[2]);
648 sshaw 4738 // indices to first value in file
649 caltinay 5122 const dim_t idx0 = max(0, m_offset[0]-params.first[0]);
650     const dim_t idx1 = max(0, m_offset[1]-params.first[1]);
651     const dim_t idx2 = max(0, m_offset[2]-params.first[2]);
652 sshaw 4738 // number of values to read
653 caltinay 5122 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
654     const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
655     const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
656 sshaw 4738
657     out.requireWrite();
658     vector<ValueType> values(num0*numComp);
659     const int dpp = out.getNumDataPointsPerSample();
660    
661 caltinay 5122 for (dim_t z=0; z<num2; z++) {
662     for (dim_t y=0; y<num1; y++) {
663     const dim_t fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
664 sshaw 4738 +(idx2+z)*params.numValues[0]*params.numValues[1]);
665     memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
666 caltinay 5122
667     for (dim_t x=0; x<num0; x++) {
668     const dim_t baseIndex = first0+x*params.multiplier[0]
669 sshaw 4738 +(first1+y*params.multiplier[1])*myN0
670     +(first2+z*params.multiplier[2])*myN0*myN1;
671 caltinay 5122 for (dim_t m2=0; m2<params.multiplier[2]; m2++) {
672     for (dim_t m1=0; m1<params.multiplier[1]; m1++) {
673     for (dim_t m0=0; m0<params.multiplier[0]; m0++) {
674     const dim_t dataIndex = baseIndex+m0
675 sshaw 4738 +m1*myN0
676     +m2*myN0*myN1;
677     double* dest = out.getSampleDataRW(dataIndex);
678     for (int c=0; c<numComp; c++) {
679     ValueType val = values[x*numComp+c];
680    
681     if (params.byteOrder != BYTEORDER_NATIVE) {
682     char* cval = reinterpret_cast<char*>(&val);
683     // this will alter val!!
684     byte_swap32(cval);
685     }
686 caltinay 5084 if (!isnan(val)) {
687 sshaw 4738 for (int q=0; q<dpp; q++) {
688     *dest++ = static_cast<double>(val);
689     }
690     }
691     }
692     }
693     }
694     }
695     }
696     }
697     }
698     }
699     #endif
700    
701 caltinay 4357 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
702     int byteOrder, int dataType) const
703 caltinay 4334 {
704 caltinay 4357 // the mapping is not universally correct but should work on our
705     // supported platforms
706     switch (dataType) {
707     case DATATYPE_INT32:
708     writeBinaryGridImpl<int>(in, filename, byteOrder);
709     break;
710     case DATATYPE_FLOAT32:
711     writeBinaryGridImpl<float>(in, filename, byteOrder);
712     break;
713     case DATATYPE_FLOAT64:
714     writeBinaryGridImpl<double>(in, filename, byteOrder);
715     break;
716     default:
717     throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
718     }
719     }
720    
721     template<typename ValueType>
722     void Brick::writeBinaryGridImpl(const escript::Data& in,
723     const string& filename, int byteOrder) const
724     {
725 caltinay 4334 // check function space and determine number of points
726 caltinay 5122 dim_t myN0, myN1, myN2;
727     dim_t totalN0, totalN1, totalN2;
728 caltinay 4334 if (in.getFunctionSpace().getTypeCode() == Nodes) {
729     myN0 = m_NN[0];
730     myN1 = m_NN[1];
731     myN2 = m_NN[2];
732     totalN0 = m_gNE[0]+1;
733     totalN1 = m_gNE[1]+1;
734     totalN2 = m_gNE[2]+1;
735     } else if (in.getFunctionSpace().getTypeCode() == Elements ||
736     in.getFunctionSpace().getTypeCode() == ReducedElements) {
737     myN0 = m_NE[0];
738     myN1 = m_NE[1];
739     myN2 = m_NE[2];
740     totalN0 = m_gNE[0];
741     totalN1 = m_gNE[1];
742     totalN2 = m_gNE[2];
743     } else
744     throw RipleyException("writeBinaryGrid(): invalid function space of data object");
745    
746     const int numComp = in.getDataPointSize();
747     const int dpp = in.getNumDataPointsPerSample();
748 caltinay 5122 const dim_t fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
749 caltinay 4334
750     if (numComp > 1 || dpp > 1)
751     throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
752    
753     // from here on we know that each sample consists of one value
754 caltinay 4482 FileWriter fw;
755     fw.openFile(filename, fileSize);
756 caltinay 4334 MPIBarrier();
757    
758     for (index_t z=0; z<myN2; z++) {
759     for (index_t y=0; y<myN1; y++) {
760 caltinay 5122 const dim_t fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
761 caltinay 4357 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
762 caltinay 4334 ostringstream oss;
763    
764     for (index_t x=0; x<myN0; x++) {
765 caltinay 4626 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
766 caltinay 4357 ValueType fvalue = static_cast<ValueType>(*sample);
767     if (byteOrder == BYTEORDER_NATIVE) {
768 caltinay 4334 oss.write((char*)&fvalue, sizeof(fvalue));
769     } else {
770     char* value = reinterpret_cast<char*>(&fvalue);
771 caltinay 5084 if (sizeof(fvalue)>4) {
772     byte_swap64(value);
773     } else {
774     byte_swap32(value);
775     }
776     oss.write(value, sizeof(fvalue));
777 caltinay 4334 }
778     }
779 caltinay 4482 fw.writeAt(oss, fileofs);
780 caltinay 4334 }
781     }
782 caltinay 4482 fw.close();
783 caltinay 4334 }
784    
785 caltinay 5122 void Brick::write(const std::string& filename) const
786     {
787     throw RipleyException("write: not supported");
788     }
789    
790 caltinay 3691 void Brick::dump(const string& fileName) const
791     {
792     #if USE_SILO
793     string fn(fileName);
794     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
795     fn+=".silo";
796     }
797    
798 caltinay 5122 int driver=DB_HDF5;
799 caltinay 3691 string siloPath;
800     DBfile* dbfile = NULL;
801    
802     #ifdef ESYS_MPI
803     PMPIO_baton_t* baton = NULL;
804 gross 3793 const int NUM_SILO_FILES = 1;
805     const char* blockDirFmt = "/block%04d";
806 caltinay 3691 #endif
807    
808     if (m_mpiInfo->size > 1) {
809     #ifdef ESYS_MPI
810     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
811     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
812     PMPIO_DefaultClose, (void*)&driver);
813     // try the fallback driver in case of error
814     if (!baton && driver != DB_PDB) {
815     driver = DB_PDB;
816     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
817     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
818     PMPIO_DefaultClose, (void*)&driver);
819     }
820     if (baton) {
821     char str[64];
822     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
823     siloPath = str;
824     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
825     }
826     #endif
827     } else {
828     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
829     getDescription().c_str(), driver);
830     // try the fallback driver in case of error
831     if (!dbfile && driver != DB_PDB) {
832     driver = DB_PDB;
833     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
834     getDescription().c_str(), driver);
835     }
836     }
837    
838     if (!dbfile)
839     throw RipleyException("dump: Could not create Silo file");
840    
841     /*
842     if (driver==DB_HDF5) {
843     // gzip level 1 already provides good compression with minimal
844     // performance penalty. Some tests showed that gzip levels >3 performed
845     // rather badly on escript data both in terms of time and space
846     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
847     }
848     */
849    
850 caltinay 4334 boost::scoped_ptr<double> x(new double[m_NN[0]]);
851     boost::scoped_ptr<double> y(new double[m_NN[1]]);
852     boost::scoped_ptr<double> z(new double[m_NN[2]]);
853 caltinay 3691 double* coords[3] = { x.get(), y.get(), z.get() };
854 caltinay 5122 const dim_t NN0 = m_NN[0];
855     const dim_t NN1 = m_NN[1];
856     const dim_t NN2 = m_NN[2];
857 caltinay 5084
858 caltinay 3691 #pragma omp parallel
859     {
860     #pragma omp for
861 caltinay 5084 for (dim_t i0 = 0; i0 < NN0; i0++) {
862 caltinay 4334 coords[0][i0]=getLocalCoordinate(i0, 0);
863 caltinay 3691 }
864     #pragma omp for
865 caltinay 5084 for (dim_t i1 = 0; i1 < NN1; i1++) {
866 caltinay 4334 coords[1][i1]=getLocalCoordinate(i1, 1);
867 caltinay 3691 }
868     #pragma omp for
869 caltinay 5084 for (dim_t i2 = 0; i2 < NN2; i2++) {
870 caltinay 4334 coords[2][i2]=getLocalCoordinate(i2, 2);
871 caltinay 3691 }
872     }
873 caltinay 5122 dim_t* dims = const_cast<dim_t*>(getNumNodesPerDim());
874 caltinay 4334
875     // write mesh
876     DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
877 caltinay 3691 DB_COLLINEAR, NULL);
878    
879 caltinay 4334 // write node ids
880     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
881 caltinay 3698 NULL, 0, DB_INT, DB_NODECENT, NULL);
882 caltinay 3691
883 caltinay 3698 // write element ids
884 caltinay 5122 dims = const_cast<dim_t*>(getNumElementsPerDim());
885 caltinay 3698 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
886 caltinay 4334 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
887 caltinay 3698
888     // rank 0 writes multimesh and multivar
889 caltinay 3691 if (m_mpiInfo->rank == 0) {
890     vector<string> tempstrings;
891     vector<char*> names;
892     for (dim_t i=0; i<m_mpiInfo->size; i++) {
893     stringstream path;
894     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
895     tempstrings.push_back(path.str());
896     names.push_back((char*)tempstrings.back().c_str());
897     }
898     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
899     DBSetDir(dbfile, "/");
900     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
901     &types[0], NULL);
902     tempstrings.clear();
903     names.clear();
904     for (dim_t i=0; i<m_mpiInfo->size; i++) {
905     stringstream path;
906     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
907     tempstrings.push_back(path.str());
908     names.push_back((char*)tempstrings.back().c_str());
909     }
910     types.assign(m_mpiInfo->size, DB_QUADVAR);
911     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
912     &types[0], NULL);
913 caltinay 3698 tempstrings.clear();
914     names.clear();
915     for (dim_t i=0; i<m_mpiInfo->size; i++) {
916     stringstream path;
917     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
918     tempstrings.push_back(path.str());
919     names.push_back((char*)tempstrings.back().c_str());
920     }
921     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
922     &types[0], NULL);
923 caltinay 3691 }
924    
925     if (m_mpiInfo->size > 1) {
926     #ifdef ESYS_MPI
927     PMPIO_HandOffBaton(baton, dbfile);
928     PMPIO_Finish(baton);
929     #endif
930     } else {
931     DBClose(dbfile);
932     }
933    
934     #else // USE_SILO
935 caltinay 3791 throw RipleyException("dump: no Silo support");
936 caltinay 3691 #endif
937     }
938    
939 caltinay 5122 const dim_t* Brick::borrowSampleReferenceIDs(int fsType) const
940 caltinay 3691 {
941 caltinay 3697 switch (fsType) {
942     case Nodes:
943 caltinay 3748 case ReducedNodes: //FIXME: reduced
944 caltinay 3753 return &m_nodeId[0];
945 caltinay 3757 case DegreesOfFreedom:
946     case ReducedDegreesOfFreedom: //FIXME: reduced
947 caltinay 3753 return &m_dofId[0];
948 caltinay 3697 case Elements:
949 caltinay 3733 case ReducedElements:
950 caltinay 3697 return &m_elementId[0];
951 caltinay 3757 case FaceElements:
952 caltinay 3733 case ReducedFaceElements:
953 caltinay 3697 return &m_faceId[0];
954 sshaw 4660 case Points:
955     return &m_diracPointNodeIDs[0];
956 caltinay 3697 default:
957     break;
958     }
959 caltinay 3691
960 caltinay 3697 stringstream msg;
961 caltinay 3791 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
962 caltinay 3697 throw RipleyException(msg.str());
963 caltinay 3691 }
964    
965 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
966 caltinay 3691 {
967 caltinay 3759 if (getMPISize()==1)
968     return true;
969    
970 caltinay 3757 switch (fsType) {
971     case Nodes:
972     case ReducedNodes: //FIXME: reduced
973     return (m_dofMap[id] < getNumDOF());
974     case DegreesOfFreedom:
975     case ReducedDegreesOfFreedom:
976     return true;
977     case Elements:
978     case ReducedElements:
979     {
980     // check ownership of element's _last_ node
981 caltinay 4334 const index_t x=id%m_NE[0] + 1;
982     const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
983     const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
984     return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
985 caltinay 3757 }
986     case FaceElements:
987     case ReducedFaceElements:
988 caltinay 3759 {
989     // check ownership of face element's last node
990     dim_t n=0;
991 caltinay 4334 for (size_t i=0; i<6; i++) {
992     n+=m_faceCount[i];
993 caltinay 3759 if (id<n) {
994 caltinay 4334 const index_t j=id-n+m_faceCount[i];
995 caltinay 3759 if (i>=4) { // front or back
996 caltinay 4334 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
997     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
998 caltinay 3759 } else if (i>=2) { // bottom or top
999 caltinay 4334 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
1000     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
1001 caltinay 3759 } else { // left or right
1002 caltinay 4334 const index_t first=(i==0 ? 0 : m_NN[0]-1);
1003     return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
1004 caltinay 3759 }
1005     }
1006     }
1007     return false;
1008     }
1009 caltinay 3757 default:
1010     break;
1011 caltinay 3753 }
1012 caltinay 3757
1013     stringstream msg;
1014 caltinay 3791 msg << "ownSample: invalid function space type " << fsType;
1015 caltinay 3757 throw RipleyException(msg.str());
1016 caltinay 3691 }
1017    
1018 caltinay 3764 void Brick::setToNormal(escript::Data& out) const
1019 caltinay 3703 {
1020 caltinay 5122 const dim_t NE0 = m_NE[0];
1021     const dim_t NE1 = m_NE[1];
1022     const dim_t NE2 = m_NE[2];
1023 caltinay 5084
1024 caltinay 3764 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1025     out.requireWrite();
1026     #pragma omp parallel
1027     {
1028     if (m_faceOffset[0] > -1) {
1029     #pragma omp for nowait
1030 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1031     for (index_t k1 = 0; k1 < NE1; ++k1) {
1032 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1033 caltinay 3764 // set vector at four quadrature points
1034     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1035     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1036     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1037     *o++ = -1.; *o++ = 0.; *o = 0.;
1038     }
1039     }
1040     }
1041    
1042     if (m_faceOffset[1] > -1) {
1043     #pragma omp for nowait
1044 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1045     for (index_t k1 = 0; k1 < NE1; ++k1) {
1046 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1047 caltinay 3764 // set vector at four quadrature points
1048     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1049     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1050     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1051     *o++ = 1.; *o++ = 0.; *o = 0.;
1052     }
1053     }
1054     }
1055    
1056     if (m_faceOffset[2] > -1) {
1057     #pragma omp for nowait
1058 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1059     for (index_t k0 = 0; k0 < NE0; ++k0) {
1060 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1061 caltinay 3764 // set vector at four quadrature points
1062     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1063     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1064     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1065     *o++ = 0.; *o++ = -1.; *o = 0.;
1066     }
1067     }
1068     }
1069    
1070     if (m_faceOffset[3] > -1) {
1071     #pragma omp for nowait
1072 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1073     for (index_t k0 = 0; k0 < NE0; ++k0) {
1074 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1075 caltinay 3764 // set vector at four quadrature points
1076     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1077     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1078     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1079     *o++ = 0.; *o++ = 1.; *o = 0.;
1080     }
1081     }
1082     }
1083    
1084     if (m_faceOffset[4] > -1) {
1085     #pragma omp for nowait
1086 caltinay 5084 for (index_t k1 = 0; k1 < NE1; ++k1) {
1087     for (index_t k0 = 0; k0 < NE0; ++k0) {
1088 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1089 caltinay 3764 // set vector at four quadrature points
1090     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1091     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1092     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1093     *o++ = 0.; *o++ = 0.; *o = -1.;
1094     }
1095     }
1096     }
1097    
1098     if (m_faceOffset[5] > -1) {
1099     #pragma omp for nowait
1100 caltinay 5084 for (index_t k1 = 0; k1 < NE1; ++k1) {
1101     for (index_t k0 = 0; k0 < NE0; ++k0) {
1102 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1103 caltinay 3764 // set vector at four quadrature points
1104     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1105     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1106     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1107     *o++ = 0.; *o++ = 0.; *o = 1.;
1108     }
1109     }
1110     }
1111     } // end of parallel section
1112     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1113     out.requireWrite();
1114     #pragma omp parallel
1115     {
1116     if (m_faceOffset[0] > -1) {
1117     #pragma omp for nowait
1118 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1119     for (index_t k1 = 0; k1 < NE1; ++k1) {
1120 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1121 caltinay 3764 *o++ = -1.;
1122     *o++ = 0.;
1123     *o = 0.;
1124     }
1125     }
1126     }
1127    
1128     if (m_faceOffset[1] > -1) {
1129     #pragma omp for nowait
1130 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1131     for (index_t k1 = 0; k1 < NE1; ++k1) {
1132 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1133 caltinay 3764 *o++ = 1.;
1134     *o++ = 0.;
1135     *o = 0.;
1136     }
1137     }
1138     }
1139    
1140     if (m_faceOffset[2] > -1) {
1141     #pragma omp for nowait
1142 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1143     for (index_t k0 = 0; k0 < NE0; ++k0) {
1144 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1145 caltinay 3764 *o++ = 0.;
1146     *o++ = -1.;
1147     *o = 0.;
1148     }
1149     }
1150     }
1151    
1152     if (m_faceOffset[3] > -1) {
1153     #pragma omp for nowait
1154 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1155     for (index_t k0 = 0; k0 < NE0; ++k0) {
1156 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1157 caltinay 3764 *o++ = 0.;
1158     *o++ = 1.;
1159     *o = 0.;
1160     }
1161     }
1162     }
1163    
1164     if (m_faceOffset[4] > -1) {
1165     #pragma omp for nowait
1166 caltinay 5084 for (index_t k1 = 0; k1 < NE1; ++k1) {
1167     for (index_t k0 = 0; k0 < NE0; ++k0) {
1168 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1169 caltinay 3764 *o++ = 0.;
1170     *o++ = 0.;
1171     *o = -1.;
1172     }
1173     }
1174     }
1175    
1176     if (m_faceOffset[5] > -1) {
1177     #pragma omp for nowait
1178 caltinay 5084 for (index_t k1 = 0; k1 < NE1; ++k1) {
1179     for (index_t k0 = 0; k0 < NE0; ++k0) {
1180 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1181 caltinay 3764 *o++ = 0.;
1182     *o++ = 0.;
1183     *o = 1.;
1184     }
1185     }
1186     }
1187     } // end of parallel section
1188    
1189     } else {
1190     stringstream msg;
1191 caltinay 3791 msg << "setToNormal: invalid function space type "
1192     << out.getFunctionSpace().getTypeCode();
1193 caltinay 3764 throw RipleyException(msg.str());
1194     }
1195     }
1196    
1197     void Brick::setToSize(escript::Data& out) const
1198     {
1199     if (out.getFunctionSpace().getTypeCode() == Elements
1200     || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1201     out.requireWrite();
1202     const dim_t numQuad=out.getNumDataPointsPerSample();
1203 caltinay 4334 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1204 caltinay 5122 const dim_t NE = getNumElements();
1205 caltinay 3764 #pragma omp parallel for
1206 caltinay 5084 for (index_t k = 0; k < NE; ++k) {
1207 caltinay 3764 double* o = out.getSampleDataRW(k);
1208     fill(o, o+numQuad, size);
1209     }
1210     } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1211     || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1212     out.requireWrite();
1213     const dim_t numQuad=out.getNumDataPointsPerSample();
1214 caltinay 5122 const dim_t NE0 = m_NE[0];
1215     const dim_t NE1 = m_NE[1];
1216     const dim_t NE2 = m_NE[2];
1217 caltinay 3764 #pragma omp parallel
1218     {
1219     if (m_faceOffset[0] > -1) {
1220 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1221 caltinay 3764 #pragma omp for nowait
1222 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1223     for (index_t k1 = 0; k1 < NE1; ++k1) {
1224 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1225 caltinay 3764 fill(o, o+numQuad, size);
1226     }
1227     }
1228     }
1229    
1230     if (m_faceOffset[1] > -1) {
1231 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1232 caltinay 3764 #pragma omp for nowait
1233 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1234     for (index_t k1 = 0; k1 < NE1; ++k1) {
1235 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1236 caltinay 3764 fill(o, o+numQuad, size);
1237     }
1238     }
1239     }
1240    
1241     if (m_faceOffset[2] > -1) {
1242 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1243 caltinay 3764 #pragma omp for nowait
1244 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1245     for (index_t k0 = 0; k0 < NE0; ++k0) {
1246 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1247 caltinay 3764 fill(o, o+numQuad, size);
1248     }
1249     }
1250     }
1251    
1252     if (m_faceOffset[3] > -1) {
1253 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1254 caltinay 3764 #pragma omp for nowait
1255 caltinay 5084 for (index_t k2 = 0; k2 < NE2; ++k2) {
1256     for (index_t k0 = 0; k0 < NE0; ++k0) {
1257 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1258 caltinay 3764 fill(o, o+numQuad, size);
1259     }
1260     }
1261     }
1262    
1263     if (m_faceOffset[4] > -1) {
1264 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1265 caltinay 3764 #pragma omp for nowait
1266 caltinay 5084 for (index_t k1 = 0; k1 < NE1; ++k1) {
1267     for (index_t k0 = 0; k0 < NE0; ++k0) {
1268 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1269 caltinay 3764 fill(o, o+numQuad, size);
1270     }
1271     }
1272     }
1273    
1274     if (m_faceOffset[5] > -1) {
1275 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1276 caltinay 3764 #pragma omp for nowait
1277 caltinay 5084 for (index_t k1 = 0; k1 < NE1; ++k1) {
1278     for (index_t k0 = 0; k0 < NE0; ++k0) {
1279 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1280 caltinay 3764 fill(o, o+numQuad, size);
1281     }
1282     }
1283     }
1284     } // end of parallel section
1285    
1286     } else {
1287     stringstream msg;
1288 caltinay 3791 msg << "setToSize: invalid function space type "
1289     << out.getFunctionSpace().getTypeCode();
1290 caltinay 3764 throw RipleyException(msg.str());
1291     }
1292     }
1293    
1294     void Brick::Print_Mesh_Info(const bool full) const
1295     {
1296     RipleyDomain::Print_Mesh_Info(full);
1297     if (full) {
1298     cout << " Id Coordinates" << endl;
1299     cout.precision(15);
1300     cout.setf(ios::scientific, ios::floatfield);
1301     for (index_t i=0; i < getNumNodes(); i++) {
1302     cout << " " << setw(5) << m_nodeId[i]
1303 caltinay 4334 << " " << getLocalCoordinate(i%m_NN[0], 0)
1304     << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1305     << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1306 caltinay 3764 }
1307     }
1308     }
1309    
1310    
1311     //protected
1312     void Brick::assembleCoordinates(escript::Data& arg) const
1313     {
1314     escriptDataC x = arg.getDataC();
1315     int numDim = m_numDim;
1316     if (!isDataPointShapeEqual(&x, 1, &numDim))
1317     throw RipleyException("setToX: Invalid Data object shape");
1318     if (!numSamplesEqual(&x, 1, getNumNodes()))
1319     throw RipleyException("setToX: Illegal number of samples in Data object");
1320    
1321 caltinay 5122 const dim_t NN0 = m_NN[0];
1322     const dim_t NN1 = m_NN[1];
1323     const dim_t NN2 = m_NN[2];
1324 caltinay 3764 arg.requireWrite();
1325     #pragma omp parallel for
1326 caltinay 5084 for (dim_t i2 = 0; i2 < NN2; i2++) {
1327     for (dim_t i1 = 0; i1 < NN1; i1++) {
1328     for (dim_t i0 = 0; i0 < NN0; i0++) {
1329     double* point = arg.getSampleDataRW(i0+NN0*i1+NN0*NN1*i2);
1330 caltinay 4334 point[0] = getLocalCoordinate(i0, 0);
1331     point[1] = getLocalCoordinate(i1, 1);
1332     point[2] = getLocalCoordinate(i2, 2);
1333 caltinay 3764 }
1334     }
1335     }
1336     }
1337    
1338     //protected
1339 caltinay 4626 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1340 caltinay 3764 {
1341 caltinay 3703 const dim_t numComp = in.getDataPointSize();
1342 caltinay 3731 const double C0 = .044658198738520451079;
1343     const double C1 = .16666666666666666667;
1344     const double C2 = .21132486540518711775;
1345     const double C3 = .25;
1346     const double C4 = .5;
1347     const double C5 = .62200846792814621559;
1348     const double C6 = .78867513459481288225;
1349 caltinay 5122 const dim_t NE0 = m_NE[0];
1350     const dim_t NE1 = m_NE[1];
1351     const dim_t NE2 = m_NE[2];
1352 caltinay 3731
1353 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
1354 caltinay 3760 out.requireWrite();
1355 caltinay 3913 #pragma omp parallel
1356     {
1357     vector<double> f_000(numComp);
1358     vector<double> f_001(numComp);
1359     vector<double> f_010(numComp);
1360     vector<double> f_011(numComp);
1361     vector<double> f_100(numComp);
1362     vector<double> f_101(numComp);
1363     vector<double> f_110(numComp);
1364     vector<double> f_111(numComp);
1365     #pragma omp for
1366 caltinay 5084 for (index_t k2=0; k2 < NE2; ++k2) {
1367     for (index_t k1=0; k1 < NE1; ++k1) {
1368     for (index_t k0=0; k0 < NE0; ++k0) {
1369 caltinay 4334 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1370     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1371     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1372     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1373     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1374     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1375     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1376     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1377 caltinay 5084 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,NE0,NE1));
1378 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1379 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1380     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1381     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1382     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1383     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1384     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1385     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1386     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1387     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1388     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1389     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1390     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1391 caltinay 3913 o[INDEX3(i,0,0,numComp,3)] = V0;
1392     o[INDEX3(i,1,0,numComp,3)] = V4;
1393     o[INDEX3(i,2,0,numComp,3)] = V8;
1394     o[INDEX3(i,0,1,numComp,3)] = V0;
1395     o[INDEX3(i,1,1,numComp,3)] = V5;
1396     o[INDEX3(i,2,1,numComp,3)] = V9;
1397     o[INDEX3(i,0,2,numComp,3)] = V1;
1398     o[INDEX3(i,1,2,numComp,3)] = V4;
1399     o[INDEX3(i,2,2,numComp,3)] = V10;
1400     o[INDEX3(i,0,3,numComp,3)] = V1;
1401     o[INDEX3(i,1,3,numComp,3)] = V5;
1402     o[INDEX3(i,2,3,numComp,3)] = V11;
1403     o[INDEX3(i,0,4,numComp,3)] = V2;
1404     o[INDEX3(i,1,4,numComp,3)] = V6;
1405     o[INDEX3(i,2,4,numComp,3)] = V8;
1406     o[INDEX3(i,0,5,numComp,3)] = V2;
1407     o[INDEX3(i,1,5,numComp,3)] = V7;
1408     o[INDEX3(i,2,5,numComp,3)] = V9;
1409     o[INDEX3(i,0,6,numComp,3)] = V3;
1410     o[INDEX3(i,1,6,numComp,3)] = V6;
1411     o[INDEX3(i,2,6,numComp,3)] = V10;
1412     o[INDEX3(i,0,7,numComp,3)] = V3;
1413     o[INDEX3(i,1,7,numComp,3)] = V7;
1414     o[INDEX3(i,2,7,numComp,3)] = V11;
1415     } // end of component loop i
1416     } // end of k0 loop
1417     } // end of k1 loop
1418     } // end of k2 loop
1419     } // end of parallel section
1420 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1421 caltinay 3760 out.requireWrite();
1422 caltinay 3913 #pragma omp parallel
1423     {
1424     vector<double> f_000(numComp);
1425     vector<double> f_001(numComp);
1426     vector<double> f_010(numComp);
1427     vector<double> f_011(numComp);
1428     vector<double> f_100(numComp);
1429     vector<double> f_101(numComp);
1430     vector<double> f_110(numComp);
1431     vector<double> f_111(numComp);
1432     #pragma omp for
1433 caltinay 5084 for (index_t k2=0; k2 < NE2; ++k2) {
1434     for (index_t k1=0; k1 < NE1; ++k1) {
1435     for (index_t k0=0; k0 < NE0; ++k0) {
1436 caltinay 4334 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1437     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1438     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1439     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1440     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1441     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1442     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1443     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1444 caltinay 5084 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,NE0,NE1));
1445 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1446 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1447     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1448     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1449 caltinay 3913 } // end of component loop i
1450     } // end of k0 loop
1451     } // end of k1 loop
1452     } // end of k2 loop
1453     } // end of parallel section
1454 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1455 caltinay 3760 out.requireWrite();
1456 caltinay 3722 #pragma omp parallel
1457     {
1458 caltinay 3913 vector<double> f_000(numComp);
1459     vector<double> f_001(numComp);
1460     vector<double> f_010(numComp);
1461     vector<double> f_011(numComp);
1462     vector<double> f_100(numComp);
1463     vector<double> f_101(numComp);
1464     vector<double> f_110(numComp);
1465     vector<double> f_111(numComp);
1466 caltinay 3722 if (m_faceOffset[0] > -1) {
1467     #pragma omp for nowait
1468 caltinay 5084 for (index_t k2=0; k2 < NE2; ++k2) {
1469     for (index_t k1=0; k1 < NE1; ++k1) {
1470 caltinay 4334 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1471     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1472     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1473     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1474     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1475     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1476     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1477     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1478 caltinay 5084 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,NE1));
1479 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1480 caltinay 4375 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1481     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1482     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1483     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1484     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1485 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1486     o[INDEX3(i,2,0,numComp,3)] = V2;
1487 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1488 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1489     o[INDEX3(i,2,1,numComp,3)] = V3;
1490 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1491 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1492     o[INDEX3(i,2,2,numComp,3)] = V2;
1493 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1494 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1495     o[INDEX3(i,2,3,numComp,3)] = V3;
1496 caltinay 3764 } // end of component loop i
1497     } // end of k1 loop
1498     } // end of k2 loop
1499     } // end of face 0
1500 caltinay 3722 if (m_faceOffset[1] > -1) {
1501     #pragma omp for nowait
1502 caltinay 5084 for (index_t k2=0; k2 < NE2; ++k2) {
1503     for (index_t k1=0; k1 < NE1; ++k1) {
1504 caltinay 4334 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1505     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1506     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1507     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1508     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1509     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1510     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1511     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1512 caltinay 5084 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,NE1));
1513 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1514 caltinay 4375 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1515     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1516     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1517     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1518     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1519 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1520     o[INDEX3(i,2,0,numComp,3)] = V2;
1521 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1522 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1523     o[INDEX3(i,2,1,numComp,3)] = V3;
1524 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1525 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1526     o[INDEX3(i,2,2,numComp,3)] = V2;
1527 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1528 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1529     o[INDEX3(i,2,3,numComp,3)] = V3;
1530 caltinay 3764 } // end of component loop i
1531     } // end of k1 loop
1532     } // end of k2 loop
1533     } // end of face 1
1534 caltinay 3722 if (m_faceOffset[2] > -1) {
1535     #pragma omp for nowait
1536 caltinay 5084 for (index_t k2=0; k2 < NE2; ++k2) {
1537     for (index_t k0=0; k0 < NE0; ++k0) {
1538 caltinay 4334 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1539     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1540     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1541     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1542     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1543     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1544     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1545     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1546 caltinay 5084 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,NE0));
1547 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1548 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1549     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1550     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1551 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1552 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1553 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V1;
1554     o[INDEX3(i,0,1,numComp,3)] = V0;
1555 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1556 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V2;
1557     o[INDEX3(i,0,2,numComp,3)] = V0;
1558 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1559 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V1;
1560     o[INDEX3(i,0,3,numComp,3)] = V0;
1561 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1562 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V2;
1563 caltinay 3764 } // end of component loop i
1564     } // end of k0 loop
1565     } // end of k2 loop
1566     } // end of face 2
1567 caltinay 3722 if (m_faceOffset[3] > -1) {
1568     #pragma omp for nowait
1569 caltinay 5084 for (index_t k2=0; k2 < NE2; ++k2) {
1570     for (index_t k0=0; k0 < NE0; ++k0) {
1571 caltinay 4334 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1572     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1573     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1574     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1575     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1576     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1577     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1578     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1579 caltinay 5084 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,NE0));
1580 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1581 caltinay 4375 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1582     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1583     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1584     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1585 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1586 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1587 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V2;
1588     o[INDEX3(i,0,1,numComp,3)] = V0;
1589 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1590 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V3;
1591     o[INDEX3(i,0,2,numComp,3)] = V1;
1592 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1593 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V2;
1594     o[INDEX3(i,0,3,numComp,3)] = V1;
1595 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1596 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V3;
1597 caltinay 3764 } // end of component loop i
1598     } // end of k0 loop
1599     } // end of k2 loop
1600     } // end of face 3
1601 caltinay 3722 if (m_faceOffset[4] > -1) {
1602     #pragma omp for nowait
1603 caltinay 5084 for (index_t k1=0; k1 < NE1; ++k1) {
1604     for (index_t k0=0; k0 < NE0; ++k0) {
1605 caltinay 4334 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1606     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1607     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1608     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1609     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1610     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1611     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1612     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1613 caltinay 5084 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,NE0));
1614 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1615 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1616     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1617     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1618     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1619 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1620     o[INDEX3(i,1,0,numComp,3)] = V2;
1621 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1622 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1623     o[INDEX3(i,1,1,numComp,3)] = V3;
1624 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1625 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1626     o[INDEX3(i,1,2,numComp,3)] = V2;
1627 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1628 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1629     o[INDEX3(i,1,3,numComp,3)] = V3;
1630 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101