/[escript]/trunk/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /trunk/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4816 - (hide annotations)
Fri Mar 28 06:16:02 2014 UTC (5 years, 1 month ago) by caltinay
File size: 165702 byte(s)
paso::SharedComponents now header-only and shared ptr managed.

1 caltinay 3691
2 jfenwick 3981 /*****************************************************************************
3 caltinay 3691 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 caltinay 3691 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 caltinay 3691
17     #include <ripley/Brick.h>
18 caltinay 3791 #include <paso/SystemMatrix.h>
19 caltinay 4334 #include <esysUtils/esysFileWriter.h>
20 sshaw 4622 #include <ripley/DefaultAssembler3D.h>
21 sshaw 4629 #include <ripley/WaveAssembler3D.h>
22 sshaw 4712 #include <ripley/LameAssembler3D.h>
23     #include <ripley/domainhelpers.h>
24 caltinay 4477 #include <boost/scoped_array.hpp>
25    
26 caltinay 4013 #ifdef USE_NETCDF
27     #include <netcdfcpp.h>
28     #endif
29    
30 caltinay 3691 #if USE_SILO
31     #include <silo.h>
32     #ifdef ESYS_MPI
33     #include <pmpio.h>
34     #endif
35     #endif
36    
37     #include <iomanip>
38    
39 jfenwick 4650 #include "esysUtils/EsysRandom.h"
40     #include "blocktools.h"
41    
42    
43 caltinay 3691 using namespace std;
44 caltinay 4334 using esysUtils::FileWriter;
45 caltinay 3691
46     namespace ripley {
47    
48 sshaw 4712 int indexOfMax(int a, int b, int c) {
49     if (a > b) {
50     if (c > a) {
51     return 2;
52     }
53     return 0;
54     } else if (b > c) {
55     return 1;
56     }
57     return 2;
58     }
59    
60 caltinay 3781 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
61 sshaw 4622 double x1, double y1, double z1, int d0, int d1, int d2,
62     const std::vector<double>& points, const std::vector<int>& tags,
63 jfenwick 4638 const simap_t& tagnamestonums) :
64 caltinay 4334 RipleyDomain(3)
65 caltinay 3691 {
66 caltinay 3943 // ignore subdivision parameters for serial run
67     if (m_mpiInfo->size == 1) {
68     d0=1;
69     d1=1;
70     d2=1;
71     }
72     bool warn=false;
73 sshaw 4712
74     std::vector<int> factors;
75     int ranks = m_mpiInfo->size;
76     int epr[3] = {n0,n1,n2};
77     int d[3] = {d0,d1,d2};
78     if (d0<=0 || d1<=0 || d2<=0) {
79     for (int i = 0; i < 3; i++) {
80     if (d[i] < 1) {
81     d[i] = 1;
82     continue;
83 caltinay 3943 }
84 sshaw 4712 epr[i] = -1; // can no longer be max
85     if (ranks % d[i] != 0) {
86     throw RipleyException("Invalid number of spatial subdivisions");
87 caltinay 3943 }
88 sshaw 4712 //remove
89     ranks /= d[i];
90 caltinay 3943 }
91 sshaw 4712 factorise(factors, ranks);
92     if (factors.size() != 0) {
93     warn = true;
94 caltinay 3943 }
95 sshaw 4712 }
96     while (factors.size() > 0) {
97     int i = indexOfMax(epr[0],epr[1],epr[2]);
98     int f = factors.back();
99     factors.pop_back();
100     d[i] *= f;
101     epr[i] /= f;
102 caltinay 3943 }
103 sshaw 4712 d0 = d[0]; d1 = d[1]; d2 = d[2];
104 caltinay 3943
105 caltinay 3691 // ensure number of subdivisions is valid and nodes can be distributed
106     // among number of ranks
107 sshaw 4712 if (d0*d1*d2 != m_mpiInfo->size){
108 caltinay 3691 throw RipleyException("Invalid number of spatial subdivisions");
109 sshaw 4712 }
110 caltinay 3943 if (warn) {
111     cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
112     << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
113     }
114 caltinay 3691
115 caltinay 4334 double l0 = x1-x0;
116     double l1 = y1-y0;
117     double l2 = z1-z0;
118     m_dx[0] = l0/n0;
119     m_dx[1] = l1/n1;
120     m_dx[2] = l2/n2;
121    
122     if ((n0+1)%d0 > 0) {
123 caltinay 3943 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
124 caltinay 4334 l0=m_dx[0]*n0;
125 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N0="
126 caltinay 4334 << n0 << ", l0=" << l0 << endl;
127 caltinay 3943 }
128 caltinay 4334 if ((n1+1)%d1 > 0) {
129 caltinay 3943 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
130 caltinay 4334 l1=m_dx[1]*n1;
131 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N1="
132 caltinay 4334 << n1 << ", l1=" << l1 << endl;
133 caltinay 3943 }
134 caltinay 4334 if ((n2+1)%d2 > 0) {
135 caltinay 3943 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
136 caltinay 4334 l2=m_dx[2]*n2;
137 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N2="
138 caltinay 4334 << n2 << ", l2=" << l2 << endl;
139 caltinay 3943 }
140    
141 caltinay 4334 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
142 caltinay 3753 throw RipleyException("Too few elements for the number of ranks");
143    
144 caltinay 4334 m_gNE[0] = n0;
145     m_gNE[1] = n1;
146     m_gNE[2] = n2;
147     m_origin[0] = x0;
148     m_origin[1] = y0;
149     m_origin[2] = z0;
150     m_length[0] = l0;
151     m_length[1] = l1;
152     m_length[2] = l2;
153     m_NX[0] = d0;
154     m_NX[1] = d1;
155     m_NX[2] = d2;
156    
157 caltinay 3753 // local number of elements (including overlap)
158 caltinay 4334 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
159     if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
160     m_NE[0]++;
161     else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
162     m_ownNE[0]--;
163 caltinay 3764
164 caltinay 4334 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
165     if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
166     m_NE[1]++;
167     else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
168     m_ownNE[1]--;
169 caltinay 3764
170 caltinay 4334 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
171     if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
172     m_NE[2]++;
173     else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
174     m_ownNE[2]--;
175 caltinay 3753
176     // local number of nodes
177 caltinay 4334 m_NN[0] = m_NE[0]+1;
178     m_NN[1] = m_NE[1]+1;
179     m_NN[2] = m_NE[2]+1;
180 caltinay 3753
181 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
182 caltinay 4334 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
183     if (m_offset[0] > 0)
184     m_offset[0]--;
185     m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
186     if (m_offset[1] > 0)
187     m_offset[1]--;
188     m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
189     if (m_offset[2] > 0)
190     m_offset[2]--;
191 caltinay 3753
192 caltinay 3691 populateSampleIds();
193 caltinay 3756 createPattern();
194 sshaw 4622
195     assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
196 sshaw 4629 for (map<string, int>::const_iterator i = tagnamestonums.begin();
197     i != tagnamestonums.end(); i++) {
198     setTagMap(i->first, i->second);
199     }
200 sshaw 4622 addPoints(tags.size(), &points[0], &tags[0]);
201 caltinay 3691 }
202    
203    
204     Brick::~Brick()
205     {
206 caltinay 4800 paso::SystemMatrixPattern_free(m_pattern);
207 caltinay 4801 paso::Connector_free(m_connector);
208 sshaw 4622 delete assembler;
209 caltinay 3691 }
210    
211     string Brick::getDescription() const
212     {
213     return "ripley::Brick";
214     }
215    
216     bool Brick::operator==(const AbstractDomain& other) const
217     {
218 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
219     if (o) {
220     return (RipleyDomain::operator==(other) &&
221 caltinay 4334 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
222     && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
223     && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
224     && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
225 caltinay 3744 }
226 caltinay 3691
227     return false;
228     }
229    
230 caltinay 4013 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
231 caltinay 4618 const ReaderParameters& params) const
232 caltinay 4013 {
233     #ifdef USE_NETCDF
234     // check destination function space
235     int myN0, myN1, myN2;
236     if (out.getFunctionSpace().getTypeCode() == Nodes) {
237 caltinay 4334 myN0 = m_NN[0];
238     myN1 = m_NN[1];
239     myN2 = m_NN[2];
240 caltinay 4013 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
241     out.getFunctionSpace().getTypeCode() == ReducedElements) {
242 caltinay 4334 myN0 = m_NE[0];
243     myN1 = m_NE[1];
244     myN2 = m_NE[2];
245 caltinay 4013 } else
246     throw RipleyException("readNcGrid(): invalid function space for output data object");
247    
248 caltinay 4615 if (params.first.size() != 3)
249 caltinay 4013 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
250    
251 caltinay 4615 if (params.numValues.size() != 3)
252 caltinay 4013 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
253    
254 caltinay 4615 if (params.multiplier.size() != 3)
255 caltinay 4277 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
256 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
257     if (params.multiplier[i]<1)
258 caltinay 4277 throw RipleyException("readNcGrid(): all multipliers must be positive");
259    
260 caltinay 4013 // check file existence and size
261     NcFile f(filename.c_str(), NcFile::ReadOnly);
262     if (!f.is_valid())
263     throw RipleyException("readNcGrid(): cannot open file");
264    
265     NcVar* var = f.get_var(varname.c_str());
266     if (!var)
267     throw RipleyException("readNcGrid(): invalid variable name");
268    
269     // TODO: rank>0 data support
270     const int numComp = out.getDataPointSize();
271     if (numComp > 1)
272     throw RipleyException("readNcGrid(): only scalar data supported");
273    
274     const int dims = var->num_dims();
275 caltinay 4477 boost::scoped_array<long> edges(var->edges());
276 caltinay 4013
277     // is this a slice of the data object (dims!=3)?
278     // note the expected ordering of edges (as in numpy: z,y,x)
279 caltinay 4615 if ( (dims==3 && (params.numValues[2] > edges[0] ||
280     params.numValues[1] > edges[1] ||
281     params.numValues[0] > edges[2]))
282     || (dims==2 && params.numValues[2]>1)
283     || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
284 caltinay 4013 throw RipleyException("readNcGrid(): not enough data in file");
285     }
286    
287     // check if this rank contributes anything
288 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
289     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
290     params.first[1] >= m_offset[1]+myN1 ||
291     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
292     params.first[2] >= m_offset[2]+myN2 ||
293     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
294 caltinay 4013 return;
295     }
296    
297     // now determine how much this rank has to write
298    
299     // first coordinates in data object to write to
300 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
301     const int first1 = max(0, params.first[1]-m_offset[1]);
302     const int first2 = max(0, params.first[2]-m_offset[2]);
303 caltinay 4618 // indices to first value in file (not accounting for reverse yet)
304     int idx0 = max(0, m_offset[0]-params.first[0]);
305     int idx1 = max(0, m_offset[1]-params.first[1]);
306     int idx2 = max(0, m_offset[2]-params.first[2]);
307 caltinay 4277 // number of values to read
308 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
309     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
310     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
311 caltinay 4013
312 caltinay 4618 // make sure we read the right block if going backwards through file
313     if (params.reverse[0])
314     idx0 = edges[dims-1]-num0-idx0;
315     if (dims>1 && params.reverse[1])
316     idx1 = edges[dims-2]-num1-idx1;
317     if (dims>2 && params.reverse[2])
318     idx2 = edges[dims-3]-num2-idx2;
319    
320    
321 caltinay 4013 vector<double> values(num0*num1*num2);
322     if (dims==3) {
323     var->set_cur(idx2, idx1, idx0);
324     var->get(&values[0], num2, num1, num0);
325     } else if (dims==2) {
326     var->set_cur(idx1, idx0);
327     var->get(&values[0], num1, num0);
328     } else {
329     var->set_cur(idx0);
330     var->get(&values[0], num0);
331     }
332    
333     const int dpp = out.getNumDataPointsPerSample();
334     out.requireWrite();
335    
336 caltinay 4618 // helpers for reversing
337     const int x0 = (params.reverse[0] ? num0-1 : 0);
338     const int x_mult = (params.reverse[0] ? -1 : 1);
339     const int y0 = (params.reverse[1] ? num1-1 : 0);
340     const int y_mult = (params.reverse[1] ? -1 : 1);
341     const int z0 = (params.reverse[2] ? num2-1 : 0);
342     const int z_mult = (params.reverse[2] ? -1 : 1);
343    
344 caltinay 4013 for (index_t z=0; z<num2; z++) {
345     for (index_t y=0; y<num1; y++) {
346     #pragma omp parallel for
347     for (index_t x=0; x<num0; x++) {
348 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
349     +(first1+y*params.multiplier[1])*myN0
350     +(first2+z*params.multiplier[2])*myN0*myN1;
351 caltinay 4618 const int srcIndex=(z0+z_mult*z)*num1*num0
352     +(y0+y_mult*y)*num0
353     +(x0+x_mult*x);
354 caltinay 4174 if (!isnan(values[srcIndex])) {
355 caltinay 4615 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
356     for (index_t m1=0; m1<params.multiplier[1]; m1++) {
357     for (index_t m0=0; m0<params.multiplier[0]; m0++) {
358 caltinay 4277 const int dataIndex = baseIndex+m0
359     +m1*myN0
360     +m2*myN0*myN1;
361     double* dest = out.getSampleDataRW(dataIndex);
362     for (index_t q=0; q<dpp; q++) {
363     *dest++ = values[srcIndex];
364     }
365     }
366     }
367 caltinay 4174 }
368 caltinay 4013 }
369     }
370     }
371     }
372     #else
373     throw RipleyException("readNcGrid(): not compiled with netCDF support");
374     #endif
375     }
376    
377 sshaw 4738 #ifdef USE_BOOSTIO
378     void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
379     const ReaderParameters& params) const
380     {
381     // the mapping is not universally correct but should work on our
382     // supported platforms
383     switch (params.dataType) {
384     case DATATYPE_INT32:
385     readBinaryGridZippedImpl<int>(out, filename, params);
386     break;
387     case DATATYPE_FLOAT32:
388     readBinaryGridZippedImpl<float>(out, filename, params);
389     break;
390     case DATATYPE_FLOAT64:
391     readBinaryGridZippedImpl<double>(out, filename, params);
392     break;
393     default:
394     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
395     }
396     }
397     #endif
398    
399 caltinay 4334 void Brick::readBinaryGrid(escript::Data& out, string filename,
400 caltinay 4618 const ReaderParameters& params) const
401 caltinay 4334 {
402 caltinay 4495 // the mapping is not universally correct but should work on our
403     // supported platforms
404 caltinay 4615 switch (params.dataType) {
405 caltinay 4495 case DATATYPE_INT32:
406 caltinay 4615 readBinaryGridImpl<int>(out, filename, params);
407 caltinay 4495 break;
408     case DATATYPE_FLOAT32:
409 caltinay 4615 readBinaryGridImpl<float>(out, filename, params);
410 caltinay 4495 break;
411     case DATATYPE_FLOAT64:
412 caltinay 4615 readBinaryGridImpl<double>(out, filename, params);
413 caltinay 4495 break;
414     default:
415     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
416     }
417     }
418    
419     template<typename ValueType>
420     void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
421 caltinay 4618 const ReaderParameters& params) const
422 caltinay 4495 {
423 caltinay 4334 // check destination function space
424     int myN0, myN1, myN2;
425     if (out.getFunctionSpace().getTypeCode() == Nodes) {
426     myN0 = m_NN[0];
427     myN1 = m_NN[1];
428     myN2 = m_NN[2];
429     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
430     out.getFunctionSpace().getTypeCode() == ReducedElements) {
431     myN0 = m_NE[0];
432     myN1 = m_NE[1];
433     myN2 = m_NE[2];
434     } else
435     throw RipleyException("readBinaryGrid(): invalid function space for output data object");
436    
437 caltinay 4615 if (params.first.size() != 3)
438 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
439    
440 caltinay 4615 if (params.numValues.size() != 3)
441 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
442    
443 caltinay 4615 if (params.multiplier.size() != 3)
444 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
445 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
446     if (params.multiplier[i]<1)
447 caltinay 4334 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
448    
449     // check file existence and size
450     ifstream f(filename.c_str(), ifstream::binary);
451     if (f.fail()) {
452     throw RipleyException("readBinaryGrid(): cannot open file");
453     }
454     f.seekg(0, ios::end);
455     const int numComp = out.getDataPointSize();
456     const int filesize = f.tellg();
457 caltinay 4615 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
458 caltinay 4334 if (filesize < reqsize) {
459     f.close();
460     throw RipleyException("readBinaryGrid(): not enough data in file");
461     }
462    
463     // check if this rank contributes anything
464 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
465     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
466     params.first[1] >= m_offset[1]+myN1 ||
467     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
468     params.first[2] >= m_offset[2]+myN2 ||
469     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
470 caltinay 4334 f.close();
471     return;
472     }
473    
474     // now determine how much this rank has to write
475    
476     // first coordinates in data object to write to
477 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
478     const int first1 = max(0, params.first[1]-m_offset[1]);
479     const int first2 = max(0, params.first[2]-m_offset[2]);
480 caltinay 4334 // indices to first value in file
481 caltinay 4615 const int idx0 = max(0, m_offset[0]-params.first[0]);
482     const int idx1 = max(0, m_offset[1]-params.first[1]);
483     const int idx2 = max(0, m_offset[2]-params.first[2]);
484 caltinay 4334 // number of values to read
485 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
486     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
487     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
488 caltinay 4334
489     out.requireWrite();
490 caltinay 4495 vector<ValueType> values(num0*numComp);
491 caltinay 4334 const int dpp = out.getNumDataPointsPerSample();
492    
493 caltinay 4529 for (int z=0; z<num2; z++) {
494     for (int y=0; y<num1; y++) {
495 caltinay 4615 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
496     +(idx2+z)*params.numValues[0]*params.numValues[1]);
497 caltinay 4495 f.seekg(fileofs*sizeof(ValueType));
498     f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
499 caltinay 4334
500 caltinay 4529 for (int x=0; x<num0; x++) {
501 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
502     +(first1+y*params.multiplier[1])*myN0
503     +(first2+z*params.multiplier[2])*myN0*myN1;
504     for (int m2=0; m2<params.multiplier[2]; m2++) {
505     for (int m1=0; m1<params.multiplier[1]; m1++) {
506     for (int m0=0; m0<params.multiplier[0]; m0++) {
507 caltinay 4334 const int dataIndex = baseIndex+m0
508     +m1*myN0
509     +m2*myN0*myN1;
510     double* dest = out.getSampleDataRW(dataIndex);
511 caltinay 4529 for (int c=0; c<numComp; c++) {
512     ValueType val = values[x*numComp+c];
513    
514 caltinay 4615 if (params.byteOrder != BYTEORDER_NATIVE) {
515 caltinay 4529 char* cval = reinterpret_cast<char*>(&val);
516     // this will alter val!!
517     byte_swap32(cval);
518     }
519     if (!std::isnan(val)) {
520     for (int q=0; q<dpp; q++) {
521     *dest++ = static_cast<double>(val);
522 caltinay 4334 }
523     }
524     }
525     }
526     }
527     }
528     }
529     }
530     }
531    
532     f.close();
533     }
534    
535 sshaw 4738 #ifdef USE_BOOSTIO
536     template<typename ValueType>
537     void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
538     const ReaderParameters& params) const
539     {
540     // check destination function space
541     int myN0, myN1, myN2;
542     if (out.getFunctionSpace().getTypeCode() == Nodes) {
543     myN0 = m_NN[0];
544     myN1 = m_NN[1];
545     myN2 = m_NN[2];
546     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
547     out.getFunctionSpace().getTypeCode() == ReducedElements) {
548     myN0 = m_NE[0];
549     myN1 = m_NE[1];
550     myN2 = m_NE[2];
551     } else
552     throw RipleyException("readBinaryGridFromZipped(): invalid function space for output data object");
553    
554     if (params.first.size() != 3)
555     throw RipleyException("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
556    
557     if (params.numValues.size() != 3)
558     throw RipleyException("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
559    
560     if (params.multiplier.size() != 3)
561     throw RipleyException("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
562     for (size_t i=0; i<params.multiplier.size(); i++)
563     if (params.multiplier[i]<1)
564     throw RipleyException("readBinaryGridFromZipped(): all multipliers must be positive");
565    
566     // check file existence and size
567     ifstream f(filename.c_str(), ifstream::binary);
568     if (f.fail()) {
569     throw RipleyException("readBinaryGridFromZipped(): cannot open file");
570     }
571     f.seekg(0, ios::end);
572     const int numComp = out.getDataPointSize();
573     int filesize = f.tellg();
574     f.seekg(0, ios::beg);
575     std::vector<char> compressed(filesize);
576     f.read((char*)&compressed[0], filesize);
577     f.close();
578     std::vector<char> decompressed = unzip(compressed);
579     filesize = decompressed.size();
580     const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
581     if (filesize < reqsize) {
582     throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
583     }
584    
585     // check if this rank contributes anything
586     if (params.first[0] >= m_offset[0]+myN0 ||
587     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
588     params.first[1] >= m_offset[1]+myN1 ||
589     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
590     params.first[2] >= m_offset[2]+myN2 ||
591     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
592     return;
593     }
594    
595     // now determine how much this rank has to write
596    
597     // first coordinates in data object to write to
598     const int first0 = max(0, params.first[0]-m_offset[0]);
599     const int first1 = max(0, params.first[1]-m_offset[1]);
600     const int first2 = max(0, params.first[2]-m_offset[2]);
601     // indices to first value in file
602     const int idx0 = max(0, m_offset[0]-params.first[0]);
603     const int idx1 = max(0, m_offset[1]-params.first[1]);
604     const int idx2 = max(0, m_offset[2]-params.first[2]);
605     // number of values to read
606     const int num0 = min(params.numValues[0]-idx0, myN0-first0);
607     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
608     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
609    
610     out.requireWrite();
611     vector<ValueType> values(num0*numComp);
612     const int dpp = out.getNumDataPointsPerSample();
613    
614     for (int z=0; z<num2; z++) {
615     for (int y=0; y<num1; y++) {
616     const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
617     +(idx2+z)*params.numValues[0]*params.numValues[1]);
618     memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
619    
620     for (int x=0; x<num0; x++) {
621     const int baseIndex = first0+x*params.multiplier[0]
622     +(first1+y*params.multiplier[1])*myN0
623     +(first2+z*params.multiplier[2])*myN0*myN1;
624     for (int m2=0; m2<params.multiplier[2]; m2++) {
625     for (int m1=0; m1<params.multiplier[1]; m1++) {
626     for (int m0=0; m0<params.multiplier[0]; m0++) {
627     const int dataIndex = baseIndex+m0
628     +m1*myN0
629     +m2*myN0*myN1;
630     double* dest = out.getSampleDataRW(dataIndex);
631     for (int c=0; c<numComp; c++) {
632     ValueType val = values[x*numComp+c];
633    
634     if (params.byteOrder != BYTEORDER_NATIVE) {
635     char* cval = reinterpret_cast<char*>(&val);
636     // this will alter val!!
637     byte_swap32(cval);
638     }
639     if (!std::isnan(val)) {
640     for (int q=0; q<dpp; q++) {
641     *dest++ = static_cast<double>(val);
642     }
643     }
644     }
645     }
646     }
647     }
648     }
649     }
650     }
651     }
652     #endif
653    
654 caltinay 4357 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
655     int byteOrder, int dataType) const
656 caltinay 4334 {
657 caltinay 4357 // the mapping is not universally correct but should work on our
658     // supported platforms
659     switch (dataType) {
660     case DATATYPE_INT32:
661     writeBinaryGridImpl<int>(in, filename, byteOrder);
662     break;
663     case DATATYPE_FLOAT32:
664     writeBinaryGridImpl<float>(in, filename, byteOrder);
665     break;
666     case DATATYPE_FLOAT64:
667     writeBinaryGridImpl<double>(in, filename, byteOrder);
668     break;
669     default:
670     throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
671     }
672     }
673    
674     template<typename ValueType>
675     void Brick::writeBinaryGridImpl(const escript::Data& in,
676     const string& filename, int byteOrder) const
677     {
678 caltinay 4334 // check function space and determine number of points
679     int myN0, myN1, myN2;
680     int totalN0, totalN1, totalN2;
681     if (in.getFunctionSpace().getTypeCode() == Nodes) {
682     myN0 = m_NN[0];
683     myN1 = m_NN[1];
684     myN2 = m_NN[2];
685     totalN0 = m_gNE[0]+1;
686     totalN1 = m_gNE[1]+1;
687     totalN2 = m_gNE[2]+1;
688     } else if (in.getFunctionSpace().getTypeCode() == Elements ||
689     in.getFunctionSpace().getTypeCode() == ReducedElements) {
690     myN0 = m_NE[0];
691     myN1 = m_NE[1];
692     myN2 = m_NE[2];
693     totalN0 = m_gNE[0];
694     totalN1 = m_gNE[1];
695     totalN2 = m_gNE[2];
696     } else
697     throw RipleyException("writeBinaryGrid(): invalid function space of data object");
698    
699     const int numComp = in.getDataPointSize();
700     const int dpp = in.getNumDataPointsPerSample();
701 caltinay 4357 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
702 caltinay 4334
703     if (numComp > 1 || dpp > 1)
704     throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
705    
706     // from here on we know that each sample consists of one value
707 caltinay 4482 FileWriter fw;
708     fw.openFile(filename, fileSize);
709 caltinay 4334 MPIBarrier();
710    
711     for (index_t z=0; z<myN2; z++) {
712     for (index_t y=0; y<myN1; y++) {
713     const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
714 caltinay 4357 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
715 caltinay 4334 ostringstream oss;
716    
717     for (index_t x=0; x<myN0; x++) {
718 caltinay 4626 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
719 caltinay 4357 ValueType fvalue = static_cast<ValueType>(*sample);
720     if (byteOrder == BYTEORDER_NATIVE) {
721 caltinay 4334 oss.write((char*)&fvalue, sizeof(fvalue));
722     } else {
723     char* value = reinterpret_cast<char*>(&fvalue);
724 caltinay 4357 oss.write(byte_swap32(value), sizeof(fvalue));
725 caltinay 4334 }
726     }
727 caltinay 4482 fw.writeAt(oss, fileofs);
728 caltinay 4334 }
729     }
730 caltinay 4482 fw.close();
731 caltinay 4334 }
732    
733 caltinay 3691 void Brick::dump(const string& fileName) const
734     {
735     #if USE_SILO
736     string fn(fileName);
737     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
738     fn+=".silo";
739     }
740    
741     int driver=DB_HDF5;
742     string siloPath;
743     DBfile* dbfile = NULL;
744    
745     #ifdef ESYS_MPI
746     PMPIO_baton_t* baton = NULL;
747 gross 3793 const int NUM_SILO_FILES = 1;
748     const char* blockDirFmt = "/block%04d";
749 caltinay 3691 #endif
750    
751     if (m_mpiInfo->size > 1) {
752     #ifdef ESYS_MPI
753     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
754     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
755     PMPIO_DefaultClose, (void*)&driver);
756     // try the fallback driver in case of error
757     if (!baton && driver != DB_PDB) {
758     driver = DB_PDB;
759     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
760     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
761     PMPIO_DefaultClose, (void*)&driver);
762     }
763     if (baton) {
764     char str[64];
765     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
766     siloPath = str;
767     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
768     }
769     #endif
770     } else {
771     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
772     getDescription().c_str(), driver);
773     // try the fallback driver in case of error
774     if (!dbfile && driver != DB_PDB) {
775     driver = DB_PDB;
776     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
777     getDescription().c_str(), driver);
778     }
779     }
780    
781     if (!dbfile)
782     throw RipleyException("dump: Could not create Silo file");
783    
784     /*
785     if (driver==DB_HDF5) {
786     // gzip level 1 already provides good compression with minimal
787     // performance penalty. Some tests showed that gzip levels >3 performed
788     // rather badly on escript data both in terms of time and space
789     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
790     }
791     */
792    
793 caltinay 4334 boost::scoped_ptr<double> x(new double[m_NN[0]]);
794     boost::scoped_ptr<double> y(new double[m_NN[1]]);
795     boost::scoped_ptr<double> z(new double[m_NN[2]]);
796 caltinay 3691 double* coords[3] = { x.get(), y.get(), z.get() };
797     #pragma omp parallel
798     {
799     #pragma omp for
800 caltinay 4334 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
801     coords[0][i0]=getLocalCoordinate(i0, 0);
802 caltinay 3691 }
803     #pragma omp for
804 caltinay 4334 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
805     coords[1][i1]=getLocalCoordinate(i1, 1);
806 caltinay 3691 }
807     #pragma omp for
808 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
809     coords[2][i2]=getLocalCoordinate(i2, 2);
810 caltinay 3691 }
811     }
812 caltinay 4334 int* dims = const_cast<int*>(getNumNodesPerDim());
813    
814     // write mesh
815     DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
816 caltinay 3691 DB_COLLINEAR, NULL);
817    
818 caltinay 4334 // write node ids
819     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
820 caltinay 3698 NULL, 0, DB_INT, DB_NODECENT, NULL);
821 caltinay 3691
822 caltinay 3698 // write element ids
823 caltinay 4334 dims = const_cast<int*>(getNumElementsPerDim());
824 caltinay 3698 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
825 caltinay 4334 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
826 caltinay 3698
827     // rank 0 writes multimesh and multivar
828 caltinay 3691 if (m_mpiInfo->rank == 0) {
829     vector<string> tempstrings;
830     vector<char*> names;
831     for (dim_t i=0; i<m_mpiInfo->size; i++) {
832     stringstream path;
833     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
834     tempstrings.push_back(path.str());
835     names.push_back((char*)tempstrings.back().c_str());
836     }
837     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
838     DBSetDir(dbfile, "/");
839     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
840     &types[0], NULL);
841     tempstrings.clear();
842     names.clear();
843     for (dim_t i=0; i<m_mpiInfo->size; i++) {
844     stringstream path;
845     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
846     tempstrings.push_back(path.str());
847     names.push_back((char*)tempstrings.back().c_str());
848     }
849     types.assign(m_mpiInfo->size, DB_QUADVAR);
850     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
851     &types[0], NULL);
852 caltinay 3698 tempstrings.clear();
853     names.clear();
854     for (dim_t i=0; i<m_mpiInfo->size; i++) {
855     stringstream path;
856     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
857     tempstrings.push_back(path.str());
858     names.push_back((char*)tempstrings.back().c_str());
859     }
860     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
861     &types[0], NULL);
862 caltinay 3691 }
863    
864     if (m_mpiInfo->size > 1) {
865     #ifdef ESYS_MPI
866     PMPIO_HandOffBaton(baton, dbfile);
867     PMPIO_Finish(baton);
868     #endif
869     } else {
870     DBClose(dbfile);
871     }
872    
873     #else // USE_SILO
874 caltinay 3791 throw RipleyException("dump: no Silo support");
875 caltinay 3691 #endif
876     }
877    
878     const int* Brick::borrowSampleReferenceIDs(int fsType) const
879     {
880 caltinay 3697 switch (fsType) {
881     case Nodes:
882 caltinay 3748 case ReducedNodes: //FIXME: reduced
883 caltinay 3753 return &m_nodeId[0];
884 caltinay 3757 case DegreesOfFreedom:
885     case ReducedDegreesOfFreedom: //FIXME: reduced
886 caltinay 3753 return &m_dofId[0];
887 caltinay 3697 case Elements:
888 caltinay 3733 case ReducedElements:
889 caltinay 3697 return &m_elementId[0];
890 caltinay 3757 case FaceElements:
891 caltinay 3733 case ReducedFaceElements:
892 caltinay 3697 return &m_faceId[0];
893 sshaw 4660 case Points:
894     return &m_diracPointNodeIDs[0];
895 caltinay 3697 default:
896     break;
897     }
898 caltinay 3691
899 caltinay 3697 stringstream msg;
900 caltinay 3791 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
901 caltinay 3697 throw RipleyException(msg.str());
902 caltinay 3691 }
903    
904 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
905 caltinay 3691 {
906 caltinay 3759 if (getMPISize()==1)
907     return true;
908    
909 caltinay 3757 switch (fsType) {
910     case Nodes:
911     case ReducedNodes: //FIXME: reduced
912     return (m_dofMap[id] < getNumDOF());
913     case DegreesOfFreedom:
914     case ReducedDegreesOfFreedom:
915     return true;
916     case Elements:
917     case ReducedElements:
918     {
919     // check ownership of element's _last_ node
920 caltinay 4334 const index_t x=id%m_NE[0] + 1;
921     const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
922     const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
923     return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
924 caltinay 3757 }
925     case FaceElements:
926     case ReducedFaceElements:
927 caltinay 3759 {
928     // check ownership of face element's last node
929     dim_t n=0;
930 caltinay 4334 for (size_t i=0; i<6; i++) {
931     n+=m_faceCount[i];
932 caltinay 3759 if (id<n) {
933 caltinay 4334 const index_t j=id-n+m_faceCount[i];
934 caltinay 3759 if (i>=4) { // front or back
935 caltinay 4334 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
936     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
937 caltinay 3759 } else if (i>=2) { // bottom or top
938 caltinay 4334 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
939     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
940 caltinay 3759 } else { // left or right
941 caltinay 4334 const index_t first=(i==0 ? 0 : m_NN[0]-1);
942     return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
943 caltinay 3759 }
944     }
945     }
946     return false;
947     }
948 caltinay 3757 default:
949     break;
950 caltinay 3753 }
951 caltinay 3757
952     stringstream msg;
953 caltinay 3791 msg << "ownSample: invalid function space type " << fsType;
954 caltinay 3757 throw RipleyException(msg.str());
955 caltinay 3691 }
956    
957 caltinay 3764 void Brick::setToNormal(escript::Data& out) const
958 caltinay 3703 {
959 caltinay 3764 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
960     out.requireWrite();
961     #pragma omp parallel
962     {
963     if (m_faceOffset[0] > -1) {
964     #pragma omp for nowait
965 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
966     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
967     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
968 caltinay 3764 // set vector at four quadrature points
969     *o++ = -1.; *o++ = 0.; *o++ = 0.;
970     *o++ = -1.; *o++ = 0.; *o++ = 0.;
971     *o++ = -1.; *o++ = 0.; *o++ = 0.;
972     *o++ = -1.; *o++ = 0.; *o = 0.;
973     }
974     }
975     }
976    
977     if (m_faceOffset[1] > -1) {
978     #pragma omp for nowait
979 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
980     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
981     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
982 caltinay 3764 // set vector at four quadrature points
983     *o++ = 1.; *o++ = 0.; *o++ = 0.;
984     *o++ = 1.; *o++ = 0.; *o++ = 0.;
985     *o++ = 1.; *o++ = 0.; *o++ = 0.;
986     *o++ = 1.; *o++ = 0.; *o = 0.;
987     }
988     }
989     }
990    
991     if (m_faceOffset[2] > -1) {
992     #pragma omp for nowait
993 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
994     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
995     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
996 caltinay 3764 // set vector at four quadrature points
997     *o++ = 0.; *o++ = -1.; *o++ = 0.;
998     *o++ = 0.; *o++ = -1.; *o++ = 0.;
999     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1000     *o++ = 0.; *o++ = -1.; *o = 0.;
1001     }
1002     }
1003     }
1004    
1005     if (m_faceOffset[3] > -1) {
1006     #pragma omp for nowait
1007 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1008     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1009     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1010 caltinay 3764 // set vector at four quadrature points
1011     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1012     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1013     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1014     *o++ = 0.; *o++ = 1.; *o = 0.;
1015     }
1016     }
1017     }
1018    
1019     if (m_faceOffset[4] > -1) {
1020     #pragma omp for nowait
1021 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1022     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1023     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1024 caltinay 3764 // set vector at four quadrature points
1025     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1026     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1027     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1028     *o++ = 0.; *o++ = 0.; *o = -1.;
1029     }
1030     }
1031     }
1032    
1033     if (m_faceOffset[5] > -1) {
1034     #pragma omp for nowait
1035 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1036     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1037     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1038 caltinay 3764 // set vector at four quadrature points
1039     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1040     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1041     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1042     *o++ = 0.; *o++ = 0.; *o = 1.;
1043     }
1044     }
1045     }
1046     } // end of parallel section
1047     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1048     out.requireWrite();
1049     #pragma omp parallel
1050     {
1051     if (m_faceOffset[0] > -1) {
1052     #pragma omp for nowait
1053 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1054     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1055     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1056 caltinay 3764 *o++ = -1.;
1057     *o++ = 0.;
1058     *o = 0.;
1059     }
1060     }
1061     }
1062    
1063     if (m_faceOffset[1] > -1) {
1064     #pragma omp for nowait
1065 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1066     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1067     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1068 caltinay 3764 *o++ = 1.;
1069     *o++ = 0.;
1070     *o = 0.;
1071     }
1072     }
1073     }
1074    
1075     if (m_faceOffset[2] > -1) {
1076     #pragma omp for nowait
1077 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1078     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1079     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1080 caltinay 3764 *o++ = 0.;
1081     *o++ = -1.;
1082     *o = 0.;
1083     }
1084     }
1085     }
1086    
1087     if (m_faceOffset[3] > -1) {
1088     #pragma omp for nowait
1089 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1090     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1091     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1092 caltinay 3764 *o++ = 0.;
1093     *o++ = 1.;
1094     *o = 0.;
1095     }
1096     }
1097     }
1098    
1099     if (m_faceOffset[4] > -1) {
1100     #pragma omp for nowait
1101 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1102     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1103     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1104 caltinay 3764 *o++ = 0.;
1105     *o++ = 0.;
1106     *o = -1.;
1107     }
1108     }
1109     }
1110    
1111     if (m_faceOffset[5] > -1) {
1112     #pragma omp for nowait
1113 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1114     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1115     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1116 caltinay 3764 *o++ = 0.;
1117     *o++ = 0.;
1118     *o = 1.;
1119     }
1120     }
1121     }
1122     } // end of parallel section
1123    
1124     } else {
1125     stringstream msg;
1126 caltinay 3791 msg << "setToNormal: invalid function space type "
1127     << out.getFunctionSpace().getTypeCode();
1128 caltinay 3764 throw RipleyException(msg.str());
1129     }
1130     }
1131    
1132     void Brick::setToSize(escript::Data& out) const
1133     {
1134     if (out.getFunctionSpace().getTypeCode() == Elements
1135     || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1136     out.requireWrite();
1137     const dim_t numQuad=out.getNumDataPointsPerSample();
1138 caltinay 4334 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1139 caltinay 3764 #pragma omp parallel for
1140     for (index_t k = 0; k < getNumElements(); ++k) {
1141     double* o = out.getSampleDataRW(k);
1142     fill(o, o+numQuad, size);
1143     }
1144     } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1145     || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1146     out.requireWrite();
1147     const dim_t numQuad=out.getNumDataPointsPerSample();
1148     #pragma omp parallel
1149     {
1150     if (m_faceOffset[0] > -1) {
1151 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1152 caltinay 3764 #pragma omp for nowait
1153 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1154     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1155     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1156 caltinay 3764 fill(o, o+numQuad, size);
1157     }
1158     }
1159     }
1160    
1161     if (m_faceOffset[1] > -1) {
1162 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1163 caltinay 3764 #pragma omp for nowait
1164 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1165     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1166     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1167 caltinay 3764 fill(o, o+numQuad, size);
1168     }
1169     }
1170     }
1171    
1172     if (m_faceOffset[2] > -1) {
1173 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1174 caltinay 3764 #pragma omp for nowait
1175 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1176     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1177     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1178 caltinay 3764 fill(o, o+numQuad, size);
1179     }
1180     }
1181     }
1182    
1183     if (m_faceOffset[3] > -1) {
1184 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1185 caltinay 3764 #pragma omp for nowait
1186 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1187     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1188     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1189 caltinay 3764 fill(o, o+numQuad, size);
1190     }
1191     }
1192     }
1193    
1194     if (m_faceOffset[4] > -1) {
1195 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1196 caltinay 3764 #pragma omp for nowait
1197 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1198     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1199     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1200 caltinay 3764 fill(o, o+numQuad, size);
1201     }
1202     }
1203     }
1204    
1205     if (m_faceOffset[5] > -1) {
1206 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1207 caltinay 3764 #pragma omp for nowait
1208 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1209     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1210     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1211 caltinay 3764 fill(o, o+numQuad, size);
1212     }
1213     }
1214     }
1215     } // end of parallel section
1216    
1217     } else {
1218     stringstream msg;
1219 caltinay 3791 msg << "setToSize: invalid function space type "
1220     << out.getFunctionSpace().getTypeCode();
1221 caltinay 3764 throw RipleyException(msg.str());
1222     }
1223     }
1224    
1225     void Brick::Print_Mesh_Info(const bool full) const
1226     {
1227     RipleyDomain::Print_Mesh_Info(full);
1228     if (full) {
1229     cout << " Id Coordinates" << endl;
1230     cout.precision(15);
1231     cout.setf(ios::scientific, ios::floatfield);
1232     for (index_t i=0; i < getNumNodes(); i++) {
1233     cout << " " << setw(5) << m_nodeId[i]
1234 caltinay 4334 << " " << getLocalCoordinate(i%m_NN[0], 0)
1235     << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1236     << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1237 caltinay 3764 }
1238     }
1239     }
1240    
1241    
1242     //protected
1243     void Brick::assembleCoordinates(escript::Data& arg) const
1244     {
1245     escriptDataC x = arg.getDataC();
1246     int numDim = m_numDim;
1247     if (!isDataPointShapeEqual(&x, 1, &numDim))
1248     throw RipleyException("setToX: Invalid Data object shape");
1249     if (!numSamplesEqual(&x, 1, getNumNodes()))
1250     throw RipleyException("setToX: Illegal number of samples in Data object");
1251    
1252     arg.requireWrite();
1253     #pragma omp parallel for
1254 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1255     for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1256     for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1257     double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1258     point[0] = getLocalCoordinate(i0, 0);
1259     point[1] = getLocalCoordinate(i1, 1);
1260     point[2] = getLocalCoordinate(i2, 2);
1261 caltinay 3764 }
1262     }
1263     }
1264     }
1265    
1266     //protected
1267 caltinay 4626 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1268 caltinay 3764 {
1269 caltinay 3703 const dim_t numComp = in.getDataPointSize();
1270 caltinay 3731 const double C0 = .044658198738520451079;
1271     const double C1 = .16666666666666666667;
1272     const double C2 = .21132486540518711775;
1273     const double C3 = .25;
1274     const double C4 = .5;
1275     const double C5 = .62200846792814621559;
1276     const double C6 = .78867513459481288225;
1277    
1278 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
1279 caltinay 3760 out.requireWrite();
1280 caltinay 3913 #pragma omp parallel
1281     {
1282     vector<double> f_000(numComp);
1283     vector<double> f_001(numComp);
1284     vector<double> f_010(numComp);
1285     vector<double> f_011(numComp);
1286     vector<double> f_100(numComp);
1287     vector<double> f_101(numComp);
1288     vector<double> f_110(numComp);
1289     vector<double> f_111(numComp);
1290     #pragma omp for
1291 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1292     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1293     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1294     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1295     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1296     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1297     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1298     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1299     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1300     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1301     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1302     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1303 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1304 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1305     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1306     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1307     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1308     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1309     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1310     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1311     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1312     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1313     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1314     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1315     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1316 caltinay 3913 o[INDEX3(i,0,0,numComp,3)] = V0;
1317     o[INDEX3(i,1,0,numComp,3)] = V4;
1318     o[INDEX3(i,2,0,numComp,3)] = V8;
1319     o[INDEX3(i,0,1,numComp,3)] = V0;
1320     o[INDEX3(i,1,1,numComp,3)] = V5;
1321     o[INDEX3(i,2,1,numComp,3)] = V9;
1322     o[INDEX3(i,0,2,numComp,3)] = V1;
1323     o[INDEX3(i,1,2,numComp,3)] = V4;
1324     o[INDEX3(i,2,2,numComp,3)] = V10;
1325     o[INDEX3(i,0,3,numComp,3)] = V1;
1326     o[INDEX3(i,1,3,numComp,3)] = V5;
1327     o[INDEX3(i,2,3,numComp,3)] = V11;
1328     o[INDEX3(i,0,4,numComp,3)] = V2;
1329     o[INDEX3(i,1,4,numComp,3)] = V6;
1330     o[INDEX3(i,2,4,numComp,3)] = V8;
1331     o[INDEX3(i,0,5,numComp,3)] = V2;
1332     o[INDEX3(i,1,5,numComp,3)] = V7;
1333     o[INDEX3(i,2,5,numComp,3)] = V9;
1334     o[INDEX3(i,0,6,numComp,3)] = V3;
1335     o[INDEX3(i,1,6,numComp,3)] = V6;
1336     o[INDEX3(i,2,6,numComp,3)] = V10;
1337     o[INDEX3(i,0,7,numComp,3)] = V3;
1338     o[INDEX3(i,1,7,numComp,3)] = V7;
1339     o[INDEX3(i,2,7,numComp,3)] = V11;
1340     } // end of component loop i
1341     } // end of k0 loop
1342     } // end of k1 loop
1343     } // end of k2 loop
1344     } // end of parallel section
1345 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1346 caltinay 3760 out.requireWrite();
1347 caltinay 3913 #pragma omp parallel
1348     {
1349     vector<double> f_000(numComp);
1350     vector<double> f_001(numComp);
1351     vector<double> f_010(numComp);
1352     vector<double> f_011(numComp);
1353     vector<double> f_100(numComp);
1354     vector<double> f_101(numComp);
1355     vector<double> f_110(numComp);
1356     vector<double> f_111(numComp);
1357     #pragma omp for
1358 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1359     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1360     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1361     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1362     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1363     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1364     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1365     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1366     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1367     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1368     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1369     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1370 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1371 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1372     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1373     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1374 caltinay 3913 } // end of component loop i
1375     } // end of k0 loop
1376     } // end of k1 loop
1377     } // end of k2 loop
1378     } // end of parallel section
1379 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1380 caltinay 3760 out.requireWrite();
1381 caltinay 3722 #pragma omp parallel
1382     {
1383 caltinay 3913 vector<double> f_000(numComp);
1384     vector<double> f_001(numComp);
1385     vector<double> f_010(numComp);
1386     vector<double> f_011(numComp);
1387     vector<double> f_100(numComp);
1388     vector<double> f_101(numComp);
1389     vector<double> f_110(numComp);
1390     vector<double> f_111(numComp);
1391 caltinay 3722 if (m_faceOffset[0] > -1) {
1392     #pragma omp for nowait
1393 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1394     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1395     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1396     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1397     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1398     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1399     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1400     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1401     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1402     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1403     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1404 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1405 caltinay 4375 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1406     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1407     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1408     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1409     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1410 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1411     o[INDEX3(i,2,0,numComp,3)] = V2;
1412 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1413 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1414     o[INDEX3(i,2,1,numComp,3)] = V3;
1415 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1416 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1417     o[INDEX3(i,2,2,numComp,3)] = V2;
1418 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1419 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1420     o[INDEX3(i,2,3,numComp,3)] = V3;
1421 caltinay 3764 } // end of component loop i
1422     } // end of k1 loop
1423     } // end of k2 loop
1424     } // end of face 0
1425 caltinay 3722 if (m_faceOffset[1] > -1) {
1426     #pragma omp for nowait
1427 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1428     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1429     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1430     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1431     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1432     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1433     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1434     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1435     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1436     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1437     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1438 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1439 caltinay 4375 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1440     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1441     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1442     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1443     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1444 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1445     o[INDEX3(i,2,0,numComp,3)] = V2;
1446 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1447 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1448     o[INDEX3(i,2,1,numComp,3)] = V3;
1449 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1450 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1451     o[INDEX3(i,2,2,numComp,3)] = V2;
1452 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1453 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1454     o[INDEX3(i,2,3,numComp,3)] = V3;
1455 caltinay 3764 } // end of component loop i
1456     } // end of k1 loop
1457     } // end of k2 loop
1458     } // end of face 1
1459 caltinay 3722 if (m_faceOffset[2] > -1) {
1460     #pragma omp for nowait
1461 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1462     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1463     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1464     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1465     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1466     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1467     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1468     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1469     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1470     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1471     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1472 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1473 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1474     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1475     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1476 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1477 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1478 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V1;
1479     o[INDEX3(i,0,1,numComp,3)] = V0;
1480 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1481 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V2;
1482     o[INDEX3(i,0,2,numComp,3)] = V0;
1483 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1484 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V1;
1485     o[INDEX3(i,0,3,numComp,3)] = V0;
1486 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1487 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V2;
1488 caltinay 3764 } // end of component loop i
1489     } // end of k0 loop
1490     } // end of k2 loop
1491     } // end of face 2
1492 caltinay 3722 if (m_faceOffset[3] > -1) {
1493     #pragma omp for nowait
1494 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1495     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1496     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1497     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1498     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1499     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1500     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1501     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1502     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1503     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1504     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1505 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1506 caltinay 4375 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1507     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1508     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1509     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1510 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1511 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1512 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V2;
1513     o[INDEX3(i,0,1,numComp,3)] = V0;
1514 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1515 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V3;
1516     o[INDEX3(i,0,2,numComp,3)] = V1;
1517 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1518 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V2;
1519     o[INDEX3(i,0,3,numComp,3)] = V1;
1520 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1521 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V3;
1522 caltinay 3764 } // end of component loop i
1523     } // end of k0 loop
1524     } // end of k2 loop
1525     } // end of face 3
1526 caltinay 3722 if (m_faceOffset[4] > -1) {
1527     #pragma omp for nowait
1528 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1529     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1530     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1531     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1532     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1533     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1534     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1535     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1536     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1537     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1538     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1539 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1540 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1541     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1542     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1543     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1544 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1545     o[INDEX3(i,1,0,numComp,3)] = V2;
1546 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1547 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1548     o[INDEX3(i,1,1,numComp,3)] = V3;
1549 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1550 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1551     o[INDEX3(i,1,2,numComp,3)] = V2;
1552 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1553 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1554     o[INDEX3(i,1,3,numComp,3)] = V3;
1555 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1556 caltinay 3764 } // end of component loop i
1557     } // end of k0 loop
1558     } // end of k1 loop
1559     } // end of face 4
1560 caltinay 3722 if (m_faceOffset[5] > -1) {
1561     #pragma omp for nowait
1562 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1563     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1564     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1565     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1566     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1567     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1568     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1569     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1570     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1571     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1572     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1573 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1574 caltinay 4375 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1575     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1576     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1577     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1578 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1579     o[INDEX3(i,1,0,numComp,3)] = V2;
1580 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1581 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1582     o[INDEX3(i,1,1,numComp,3)] = V3;
1583 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1584 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1585     o[INDEX3(i,1,2,numComp,3)] = V2;
1586 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1587 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1588     o[INDEX3(i,1,3,numComp,3)] = V3;
1589 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1590 caltinay 3764 } // end of component loop i
1591     } // end of k0 loop
1592     } // end of k1 loop
1593     } // end of face 5
1594 caltinay 3722 } // end of parallel section
1595 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1596 caltinay 3760 out.requireWrite();
1597 caltinay 3722 #pragma omp parallel
1598     {
1599 caltinay 3913 vector<double> f_000(numComp);
1600     vector<double> f_001(numComp);
1601     vector<double> f_010(numComp);
1602     vector<double> f_011(numComp);
1603     vector<double> f_100(numComp);
1604