/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4712 - (hide annotations)
Wed Feb 26 04:08:41 2014 UTC (5 years ago) by sshaw
Original Path: trunk/ripley/src/Brick.cpp
File size: 157468 byte(s)
adding skeleton of fast Lame assemblers
adjusted automatic ripley domain subdivision ( should solve issue #94 )
more tab->space conversions

1 caltinay 3691
2 jfenwick 3981 /*****************************************************************************
3 caltinay 3691 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 caltinay 3691 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 caltinay 3691
17     #include <ripley/Brick.h>
18 caltinay 3791 #include <paso/SystemMatrix.h>
19 caltinay 4334 #include <esysUtils/esysFileWriter.h>
20 sshaw 4622 #include <ripley/DefaultAssembler3D.h>
21 sshaw 4629 #include <ripley/WaveAssembler3D.h>
22 sshaw 4712 #include <ripley/LameAssembler3D.h>
23     #include <ripley/domainhelpers.h>
24 caltinay 4477 #include <boost/scoped_array.hpp>
25    
26 caltinay 4013 #ifdef USE_NETCDF
27     #include <netcdfcpp.h>
28     #endif
29    
30 caltinay 3691 #if USE_SILO
31     #include <silo.h>
32     #ifdef ESYS_MPI
33     #include <pmpio.h>
34     #endif
35     #endif
36    
37     #include <iomanip>
38    
39 jfenwick 4650 #include "esysUtils/EsysRandom.h"
40     #include "blocktools.h"
41    
42    
43 caltinay 3691 using namespace std;
44 caltinay 4334 using esysUtils::FileWriter;
45 caltinay 3691
46     namespace ripley {
47    
48 sshaw 4712 int indexOfMax(int a, int b, int c) {
49     if (a > b) {
50     if (c > a) {
51     return 2;
52     }
53     return 0;
54     } else if (b > c) {
55     return 1;
56     }
57     return 2;
58     }
59    
60 caltinay 3781 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
61 sshaw 4622 double x1, double y1, double z1, int d0, int d1, int d2,
62     const std::vector<double>& points, const std::vector<int>& tags,
63 jfenwick 4638 const simap_t& tagnamestonums) :
64 caltinay 4334 RipleyDomain(3)
65 caltinay 3691 {
66 caltinay 3943 // ignore subdivision parameters for serial run
67     if (m_mpiInfo->size == 1) {
68     d0=1;
69     d1=1;
70     d2=1;
71     }
72     bool warn=false;
73 sshaw 4712
74     std::vector<int> factors;
75     int ranks = m_mpiInfo->size;
76     int epr[3] = {n0,n1,n2};
77     int d[3] = {d0,d1,d2};
78     if (d0<=0 || d1<=0 || d2<=0) {
79     for (int i = 0; i < 3; i++) {
80     if (d[i] < 1) {
81     d[i] = 1;
82     continue;
83 caltinay 3943 }
84 sshaw 4712 epr[i] = -1; // can no longer be max
85     if (ranks % d[i] != 0) {
86     throw RipleyException("Invalid number of spatial subdivisions");
87 caltinay 3943 }
88 sshaw 4712 //remove
89     ranks /= d[i];
90 caltinay 3943 }
91 sshaw 4712 factorise(factors, ranks);
92     if (factors.size() != 0) {
93     warn = true;
94 caltinay 3943 }
95 sshaw 4712 }
96     while (factors.size() > 0) {
97     int i = indexOfMax(epr[0],epr[1],epr[2]);
98     int f = factors.back();
99     factors.pop_back();
100     d[i] *= f;
101     epr[i] /= f;
102 caltinay 3943 }
103 sshaw 4712 d0 = d[0]; d1 = d[1]; d2 = d[2];
104 caltinay 3943
105 caltinay 3691 // ensure number of subdivisions is valid and nodes can be distributed
106     // among number of ranks
107 sshaw 4712 if (d0*d1*d2 != m_mpiInfo->size){
108 caltinay 3691 throw RipleyException("Invalid number of spatial subdivisions");
109 sshaw 4712 }
110 caltinay 3943 if (warn) {
111     cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
112     << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
113     }
114 caltinay 3691
115 caltinay 4334 double l0 = x1-x0;
116     double l1 = y1-y0;
117     double l2 = z1-z0;
118     m_dx[0] = l0/n0;
119     m_dx[1] = l1/n1;
120     m_dx[2] = l2/n2;
121    
122     if ((n0+1)%d0 > 0) {
123 caltinay 3943 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
124 caltinay 4334 l0=m_dx[0]*n0;
125 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N0="
126 caltinay 4334 << n0 << ", l0=" << l0 << endl;
127 caltinay 3943 }
128 caltinay 4334 if ((n1+1)%d1 > 0) {
129 caltinay 3943 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
130 caltinay 4334 l1=m_dx[1]*n1;
131 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N1="
132 caltinay 4334 << n1 << ", l1=" << l1 << endl;
133 caltinay 3943 }
134 caltinay 4334 if ((n2+1)%d2 > 0) {
135 caltinay 3943 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
136 caltinay 4334 l2=m_dx[2]*n2;
137 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N2="
138 caltinay 4334 << n2 << ", l2=" << l2 << endl;
139 caltinay 3943 }
140    
141 caltinay 4334 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
142 caltinay 3753 throw RipleyException("Too few elements for the number of ranks");
143    
144 caltinay 4334 m_gNE[0] = n0;
145     m_gNE[1] = n1;
146     m_gNE[2] = n2;
147     m_origin[0] = x0;
148     m_origin[1] = y0;
149     m_origin[2] = z0;
150     m_length[0] = l0;
151     m_length[1] = l1;
152     m_length[2] = l2;
153     m_NX[0] = d0;
154     m_NX[1] = d1;
155     m_NX[2] = d2;
156    
157 caltinay 3753 // local number of elements (including overlap)
158 caltinay 4334 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
159     if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
160     m_NE[0]++;
161     else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
162     m_ownNE[0]--;
163 caltinay 3764
164 caltinay 4334 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
165     if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
166     m_NE[1]++;
167     else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
168     m_ownNE[1]--;
169 caltinay 3764
170 caltinay 4334 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
171     if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
172     m_NE[2]++;
173     else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
174     m_ownNE[2]--;
175 caltinay 3753
176     // local number of nodes
177 caltinay 4334 m_NN[0] = m_NE[0]+1;
178     m_NN[1] = m_NE[1]+1;
179     m_NN[2] = m_NE[2]+1;
180 caltinay 3753
181 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
182 caltinay 4334 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
183     if (m_offset[0] > 0)
184     m_offset[0]--;
185     m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
186     if (m_offset[1] > 0)
187     m_offset[1]--;
188     m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
189     if (m_offset[2] > 0)
190     m_offset[2]--;
191 caltinay 3753
192 caltinay 3691 populateSampleIds();
193 caltinay 3756 createPattern();
194 sshaw 4622
195     assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
196 sshaw 4629 for (map<string, int>::const_iterator i = tagnamestonums.begin();
197     i != tagnamestonums.end(); i++) {
198     setTagMap(i->first, i->second);
199     }
200 sshaw 4622 addPoints(tags.size(), &points[0], &tags[0]);
201 caltinay 3691 }
202    
203    
204     Brick::~Brick()
205     {
206 caltinay 3785 Paso_SystemMatrixPattern_free(m_pattern);
207     Paso_Connector_free(m_connector);
208 sshaw 4622 delete assembler;
209 caltinay 3691 }
210    
211     string Brick::getDescription() const
212     {
213     return "ripley::Brick";
214     }
215    
216     bool Brick::operator==(const AbstractDomain& other) const
217     {
218 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
219     if (o) {
220     return (RipleyDomain::operator==(other) &&
221 caltinay 4334 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
222     && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
223     && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
224     && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
225 caltinay 3744 }
226 caltinay 3691
227     return false;
228     }
229    
230 caltinay 4013 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
231 caltinay 4618 const ReaderParameters& params) const
232 caltinay 4013 {
233     #ifdef USE_NETCDF
234     // check destination function space
235     int myN0, myN1, myN2;
236     if (out.getFunctionSpace().getTypeCode() == Nodes) {
237 caltinay 4334 myN0 = m_NN[0];
238     myN1 = m_NN[1];
239     myN2 = m_NN[2];
240 caltinay 4013 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
241     out.getFunctionSpace().getTypeCode() == ReducedElements) {
242 caltinay 4334 myN0 = m_NE[0];
243     myN1 = m_NE[1];
244     myN2 = m_NE[2];
245 caltinay 4013 } else
246     throw RipleyException("readNcGrid(): invalid function space for output data object");
247    
248 caltinay 4615 if (params.first.size() != 3)
249 caltinay 4013 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
250    
251 caltinay 4615 if (params.numValues.size() != 3)
252 caltinay 4013 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
253    
254 caltinay 4615 if (params.multiplier.size() != 3)
255 caltinay 4277 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
256 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
257     if (params.multiplier[i]<1)
258 caltinay 4277 throw RipleyException("readNcGrid(): all multipliers must be positive");
259    
260 caltinay 4013 // check file existence and size
261     NcFile f(filename.c_str(), NcFile::ReadOnly);
262     if (!f.is_valid())
263     throw RipleyException("readNcGrid(): cannot open file");
264    
265     NcVar* var = f.get_var(varname.c_str());
266     if (!var)
267     throw RipleyException("readNcGrid(): invalid variable name");
268    
269     // TODO: rank>0 data support
270     const int numComp = out.getDataPointSize();
271     if (numComp > 1)
272     throw RipleyException("readNcGrid(): only scalar data supported");
273    
274     const int dims = var->num_dims();
275 caltinay 4477 boost::scoped_array<long> edges(var->edges());
276 caltinay 4013
277     // is this a slice of the data object (dims!=3)?
278     // note the expected ordering of edges (as in numpy: z,y,x)
279 caltinay 4615 if ( (dims==3 && (params.numValues[2] > edges[0] ||
280     params.numValues[1] > edges[1] ||
281     params.numValues[0] > edges[2]))
282     || (dims==2 && params.numValues[2]>1)
283     || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
284 caltinay 4013 throw RipleyException("readNcGrid(): not enough data in file");
285     }
286    
287     // check if this rank contributes anything
288 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
289     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
290     params.first[1] >= m_offset[1]+myN1 ||
291     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
292     params.first[2] >= m_offset[2]+myN2 ||
293     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
294 caltinay 4013 return;
295     }
296    
297     // now determine how much this rank has to write
298    
299     // first coordinates in data object to write to
300 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
301     const int first1 = max(0, params.first[1]-m_offset[1]);
302     const int first2 = max(0, params.first[2]-m_offset[2]);
303 caltinay 4618 // indices to first value in file (not accounting for reverse yet)
304     int idx0 = max(0, m_offset[0]-params.first[0]);
305     int idx1 = max(0, m_offset[1]-params.first[1]);
306     int idx2 = max(0, m_offset[2]-params.first[2]);
307 caltinay 4277 // number of values to read
308 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
309     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
310     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
311 caltinay 4013
312 caltinay 4618 // make sure we read the right block if going backwards through file
313     if (params.reverse[0])
314     idx0 = edges[dims-1]-num0-idx0;
315     if (dims>1 && params.reverse[1])
316     idx1 = edges[dims-2]-num1-idx1;
317     if (dims>2 && params.reverse[2])
318     idx2 = edges[dims-3]-num2-idx2;
319    
320    
321 caltinay 4013 vector<double> values(num0*num1*num2);
322     if (dims==3) {
323     var->set_cur(idx2, idx1, idx0);
324     var->get(&values[0], num2, num1, num0);
325     } else if (dims==2) {
326     var->set_cur(idx1, idx0);
327     var->get(&values[0], num1, num0);
328     } else {
329     var->set_cur(idx0);
330     var->get(&values[0], num0);
331     }
332    
333     const int dpp = out.getNumDataPointsPerSample();
334     out.requireWrite();
335    
336 caltinay 4618 // helpers for reversing
337     const int x0 = (params.reverse[0] ? num0-1 : 0);
338     const int x_mult = (params.reverse[0] ? -1 : 1);
339     const int y0 = (params.reverse[1] ? num1-1 : 0);
340     const int y_mult = (params.reverse[1] ? -1 : 1);
341     const int z0 = (params.reverse[2] ? num2-1 : 0);
342     const int z_mult = (params.reverse[2] ? -1 : 1);
343    
344 caltinay 4013 for (index_t z=0; z<num2; z++) {
345     for (index_t y=0; y<num1; y++) {
346     #pragma omp parallel for
347     for (index_t x=0; x<num0; x++) {
348 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
349     +(first1+y*params.multiplier[1])*myN0
350     +(first2+z*params.multiplier[2])*myN0*myN1;
351 caltinay 4618 const int srcIndex=(z0+z_mult*z)*num1*num0
352     +(y0+y_mult*y)*num0
353     +(x0+x_mult*x);
354 caltinay 4174 if (!isnan(values[srcIndex])) {
355 caltinay 4615 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
356     for (index_t m1=0; m1<params.multiplier[1]; m1++) {
357     for (index_t m0=0; m0<params.multiplier[0]; m0++) {
358 caltinay 4277 const int dataIndex = baseIndex+m0
359     +m1*myN0
360     +m2*myN0*myN1;
361     double* dest = out.getSampleDataRW(dataIndex);
362     for (index_t q=0; q<dpp; q++) {
363     *dest++ = values[srcIndex];
364     }
365     }
366     }
367 caltinay 4174 }
368 caltinay 4013 }
369     }
370     }
371     }
372     #else
373     throw RipleyException("readNcGrid(): not compiled with netCDF support");
374     #endif
375     }
376    
377 caltinay 4334 void Brick::readBinaryGrid(escript::Data& out, string filename,
378 caltinay 4618 const ReaderParameters& params) const
379 caltinay 4334 {
380 caltinay 4495 // the mapping is not universally correct but should work on our
381     // supported platforms
382 caltinay 4615 switch (params.dataType) {
383 caltinay 4495 case DATATYPE_INT32:
384 caltinay 4615 readBinaryGridImpl<int>(out, filename, params);
385 caltinay 4495 break;
386     case DATATYPE_FLOAT32:
387 caltinay 4615 readBinaryGridImpl<float>(out, filename, params);
388 caltinay 4495 break;
389     case DATATYPE_FLOAT64:
390 caltinay 4615 readBinaryGridImpl<double>(out, filename, params);
391 caltinay 4495 break;
392     default:
393     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
394     }
395     }
396    
397     template<typename ValueType>
398     void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
399 caltinay 4618 const ReaderParameters& params) const
400 caltinay 4495 {
401 caltinay 4334 // check destination function space
402     int myN0, myN1, myN2;
403     if (out.getFunctionSpace().getTypeCode() == Nodes) {
404     myN0 = m_NN[0];
405     myN1 = m_NN[1];
406     myN2 = m_NN[2];
407     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
408     out.getFunctionSpace().getTypeCode() == ReducedElements) {
409     myN0 = m_NE[0];
410     myN1 = m_NE[1];
411     myN2 = m_NE[2];
412     } else
413     throw RipleyException("readBinaryGrid(): invalid function space for output data object");
414    
415 caltinay 4615 if (params.first.size() != 3)
416 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
417    
418 caltinay 4615 if (params.numValues.size() != 3)
419 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
420    
421 caltinay 4615 if (params.multiplier.size() != 3)
422 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
423 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
424     if (params.multiplier[i]<1)
425 caltinay 4334 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
426    
427     // check file existence and size
428     ifstream f(filename.c_str(), ifstream::binary);
429     if (f.fail()) {
430     throw RipleyException("readBinaryGrid(): cannot open file");
431     }
432     f.seekg(0, ios::end);
433     const int numComp = out.getDataPointSize();
434     const int filesize = f.tellg();
435 caltinay 4615 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
436 caltinay 4334 if (filesize < reqsize) {
437     f.close();
438     throw RipleyException("readBinaryGrid(): not enough data in file");
439     }
440    
441     // check if this rank contributes anything
442 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
443     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
444     params.first[1] >= m_offset[1]+myN1 ||
445     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
446     params.first[2] >= m_offset[2]+myN2 ||
447     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
448 caltinay 4334 f.close();
449     return;
450     }
451    
452     // now determine how much this rank has to write
453    
454     // first coordinates in data object to write to
455 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
456     const int first1 = max(0, params.first[1]-m_offset[1]);
457     const int first2 = max(0, params.first[2]-m_offset[2]);
458 caltinay 4334 // indices to first value in file
459 caltinay 4615 const int idx0 = max(0, m_offset[0]-params.first[0]);
460     const int idx1 = max(0, m_offset[1]-params.first[1]);
461     const int idx2 = max(0, m_offset[2]-params.first[2]);
462 caltinay 4334 // number of values to read
463 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
464     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
465     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
466 caltinay 4334
467     out.requireWrite();
468 caltinay 4495 vector<ValueType> values(num0*numComp);
469 caltinay 4334 const int dpp = out.getNumDataPointsPerSample();
470    
471 caltinay 4529 for (int z=0; z<num2; z++) {
472     for (int y=0; y<num1; y++) {
473 caltinay 4615 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
474     +(idx2+z)*params.numValues[0]*params.numValues[1]);
475 caltinay 4495 f.seekg(fileofs*sizeof(ValueType));
476     f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
477 caltinay 4334
478 caltinay 4529 for (int x=0; x<num0; x++) {
479 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
480     +(first1+y*params.multiplier[1])*myN0
481     +(first2+z*params.multiplier[2])*myN0*myN1;
482     for (int m2=0; m2<params.multiplier[2]; m2++) {
483     for (int m1=0; m1<params.multiplier[1]; m1++) {
484     for (int m0=0; m0<params.multiplier[0]; m0++) {
485 caltinay 4334 const int dataIndex = baseIndex+m0
486     +m1*myN0
487     +m2*myN0*myN1;
488     double* dest = out.getSampleDataRW(dataIndex);
489 caltinay 4529 for (int c=0; c<numComp; c++) {
490     ValueType val = values[x*numComp+c];
491    
492 caltinay 4615 if (params.byteOrder != BYTEORDER_NATIVE) {
493 caltinay 4529 char* cval = reinterpret_cast<char*>(&val);
494     // this will alter val!!
495     byte_swap32(cval);
496     }
497     if (!std::isnan(val)) {
498     for (int q=0; q<dpp; q++) {
499     *dest++ = static_cast<double>(val);
500 caltinay 4334 }
501     }
502     }
503     }
504     }
505     }
506     }
507     }
508     }
509    
510     f.close();
511     }
512    
513 caltinay 4357 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
514     int byteOrder, int dataType) const
515 caltinay 4334 {
516 caltinay 4357 // the mapping is not universally correct but should work on our
517     // supported platforms
518     switch (dataType) {
519     case DATATYPE_INT32:
520     writeBinaryGridImpl<int>(in, filename, byteOrder);
521     break;
522     case DATATYPE_FLOAT32:
523     writeBinaryGridImpl<float>(in, filename, byteOrder);
524     break;
525     case DATATYPE_FLOAT64:
526     writeBinaryGridImpl<double>(in, filename, byteOrder);
527     break;
528     default:
529     throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
530     }
531     }
532    
533     template<typename ValueType>
534     void Brick::writeBinaryGridImpl(const escript::Data& in,
535     const string& filename, int byteOrder) const
536     {
537 caltinay 4334 // check function space and determine number of points
538     int myN0, myN1, myN2;
539     int totalN0, totalN1, totalN2;
540     if (in.getFunctionSpace().getTypeCode() == Nodes) {
541     myN0 = m_NN[0];
542     myN1 = m_NN[1];
543     myN2 = m_NN[2];
544     totalN0 = m_gNE[0]+1;
545     totalN1 = m_gNE[1]+1;
546     totalN2 = m_gNE[2]+1;
547     } else if (in.getFunctionSpace().getTypeCode() == Elements ||
548     in.getFunctionSpace().getTypeCode() == ReducedElements) {
549     myN0 = m_NE[0];
550     myN1 = m_NE[1];
551     myN2 = m_NE[2];
552     totalN0 = m_gNE[0];
553     totalN1 = m_gNE[1];
554     totalN2 = m_gNE[2];
555     } else
556     throw RipleyException("writeBinaryGrid(): invalid function space of data object");
557    
558     const int numComp = in.getDataPointSize();
559     const int dpp = in.getNumDataPointsPerSample();
560 caltinay 4357 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
561 caltinay 4334
562     if (numComp > 1 || dpp > 1)
563     throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
564    
565     // from here on we know that each sample consists of one value
566 caltinay 4482 FileWriter fw;
567     fw.openFile(filename, fileSize);
568 caltinay 4334 MPIBarrier();
569    
570     for (index_t z=0; z<myN2; z++) {
571     for (index_t y=0; y<myN1; y++) {
572     const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
573 caltinay 4357 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
574 caltinay 4334 ostringstream oss;
575    
576     for (index_t x=0; x<myN0; x++) {
577 caltinay 4626 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
578 caltinay 4357 ValueType fvalue = static_cast<ValueType>(*sample);
579     if (byteOrder == BYTEORDER_NATIVE) {
580 caltinay 4334 oss.write((char*)&fvalue, sizeof(fvalue));
581     } else {
582     char* value = reinterpret_cast<char*>(&fvalue);
583 caltinay 4357 oss.write(byte_swap32(value), sizeof(fvalue));
584 caltinay 4334 }
585     }
586 caltinay 4482 fw.writeAt(oss, fileofs);
587 caltinay 4334 }
588     }
589 caltinay 4482 fw.close();
590 caltinay 4334 }
591    
592 caltinay 3691 void Brick::dump(const string& fileName) const
593     {
594     #if USE_SILO
595     string fn(fileName);
596     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
597     fn+=".silo";
598     }
599    
600     int driver=DB_HDF5;
601     string siloPath;
602     DBfile* dbfile = NULL;
603    
604     #ifdef ESYS_MPI
605     PMPIO_baton_t* baton = NULL;
606 gross 3793 const int NUM_SILO_FILES = 1;
607     const char* blockDirFmt = "/block%04d";
608 caltinay 3691 #endif
609    
610     if (m_mpiInfo->size > 1) {
611     #ifdef ESYS_MPI
612     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
613     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
614     PMPIO_DefaultClose, (void*)&driver);
615     // try the fallback driver in case of error
616     if (!baton && driver != DB_PDB) {
617     driver = DB_PDB;
618     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
619     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
620     PMPIO_DefaultClose, (void*)&driver);
621     }
622     if (baton) {
623     char str[64];
624     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
625     siloPath = str;
626     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
627     }
628     #endif
629     } else {
630     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
631     getDescription().c_str(), driver);
632     // try the fallback driver in case of error
633     if (!dbfile && driver != DB_PDB) {
634     driver = DB_PDB;
635     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
636     getDescription().c_str(), driver);
637     }
638     }
639    
640     if (!dbfile)
641     throw RipleyException("dump: Could not create Silo file");
642    
643     /*
644     if (driver==DB_HDF5) {
645     // gzip level 1 already provides good compression with minimal
646     // performance penalty. Some tests showed that gzip levels >3 performed
647     // rather badly on escript data both in terms of time and space
648     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
649     }
650     */
651    
652 caltinay 4334 boost::scoped_ptr<double> x(new double[m_NN[0]]);
653     boost::scoped_ptr<double> y(new double[m_NN[1]]);
654     boost::scoped_ptr<double> z(new double[m_NN[2]]);
655 caltinay 3691 double* coords[3] = { x.get(), y.get(), z.get() };
656     #pragma omp parallel
657     {
658     #pragma omp for
659 caltinay 4334 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
660     coords[0][i0]=getLocalCoordinate(i0, 0);
661 caltinay 3691 }
662     #pragma omp for
663 caltinay 4334 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
664     coords[1][i1]=getLocalCoordinate(i1, 1);
665 caltinay 3691 }
666     #pragma omp for
667 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
668     coords[2][i2]=getLocalCoordinate(i2, 2);
669 caltinay 3691 }
670     }
671 caltinay 4334 int* dims = const_cast<int*>(getNumNodesPerDim());
672    
673     // write mesh
674     DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
675 caltinay 3691 DB_COLLINEAR, NULL);
676    
677 caltinay 4334 // write node ids
678     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
679 caltinay 3698 NULL, 0, DB_INT, DB_NODECENT, NULL);
680 caltinay 3691
681 caltinay 3698 // write element ids
682 caltinay 4334 dims = const_cast<int*>(getNumElementsPerDim());
683 caltinay 3698 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
684 caltinay 4334 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
685 caltinay 3698
686     // rank 0 writes multimesh and multivar
687 caltinay 3691 if (m_mpiInfo->rank == 0) {
688     vector<string> tempstrings;
689     vector<char*> names;
690     for (dim_t i=0; i<m_mpiInfo->size; i++) {
691     stringstream path;
692     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
693     tempstrings.push_back(path.str());
694     names.push_back((char*)tempstrings.back().c_str());
695     }
696     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
697     DBSetDir(dbfile, "/");
698     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
699     &types[0], NULL);
700     tempstrings.clear();
701     names.clear();
702     for (dim_t i=0; i<m_mpiInfo->size; i++) {
703     stringstream path;
704     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
705     tempstrings.push_back(path.str());
706     names.push_back((char*)tempstrings.back().c_str());
707     }
708     types.assign(m_mpiInfo->size, DB_QUADVAR);
709     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
710     &types[0], NULL);
711 caltinay 3698 tempstrings.clear();
712     names.clear();
713     for (dim_t i=0; i<m_mpiInfo->size; i++) {
714     stringstream path;
715     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
716     tempstrings.push_back(path.str());
717     names.push_back((char*)tempstrings.back().c_str());
718     }
719     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
720     &types[0], NULL);
721 caltinay 3691 }
722    
723     if (m_mpiInfo->size > 1) {
724     #ifdef ESYS_MPI
725     PMPIO_HandOffBaton(baton, dbfile);
726     PMPIO_Finish(baton);
727     #endif
728     } else {
729     DBClose(dbfile);
730     }
731    
732     #else // USE_SILO
733 caltinay 3791 throw RipleyException("dump: no Silo support");
734 caltinay 3691 #endif
735     }
736    
737     const int* Brick::borrowSampleReferenceIDs(int fsType) const
738     {
739 caltinay 3697 switch (fsType) {
740     case Nodes:
741 caltinay 3748 case ReducedNodes: //FIXME: reduced
742 caltinay 3753 return &m_nodeId[0];
743 caltinay 3757 case DegreesOfFreedom:
744     case ReducedDegreesOfFreedom: //FIXME: reduced
745 caltinay 3753 return &m_dofId[0];
746 caltinay 3697 case Elements:
747 caltinay 3733 case ReducedElements:
748 caltinay 3697 return &m_elementId[0];
749 caltinay 3757 case FaceElements:
750 caltinay 3733 case ReducedFaceElements:
751 caltinay 3697 return &m_faceId[0];
752 sshaw 4660 case Points:
753     return &m_diracPointNodeIDs[0];
754 caltinay 3697 default:
755     break;
756     }
757 caltinay 3691
758 caltinay 3697 stringstream msg;
759 caltinay 3791 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
760 caltinay 3697 throw RipleyException(msg.str());
761 caltinay 3691 }
762    
763 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
764 caltinay 3691 {
765 caltinay 3759 if (getMPISize()==1)
766     return true;
767    
768 caltinay 3757 switch (fsType) {
769     case Nodes:
770     case ReducedNodes: //FIXME: reduced
771     return (m_dofMap[id] < getNumDOF());
772     case DegreesOfFreedom:
773     case ReducedDegreesOfFreedom:
774     return true;
775     case Elements:
776     case ReducedElements:
777     {
778     // check ownership of element's _last_ node
779 caltinay 4334 const index_t x=id%m_NE[0] + 1;
780     const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
781     const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
782     return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
783 caltinay 3757 }
784     case FaceElements:
785     case ReducedFaceElements:
786 caltinay 3759 {
787     // check ownership of face element's last node
788     dim_t n=0;
789 caltinay 4334 for (size_t i=0; i<6; i++) {
790     n+=m_faceCount[i];
791 caltinay 3759 if (id<n) {
792 caltinay 4334 const index_t j=id-n+m_faceCount[i];
793 caltinay 3759 if (i>=4) { // front or back
794 caltinay 4334 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
795     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
796 caltinay 3759 } else if (i>=2) { // bottom or top
797 caltinay 4334 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
798     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
799 caltinay 3759 } else { // left or right
800 caltinay 4334 const index_t first=(i==0 ? 0 : m_NN[0]-1);
801     return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
802 caltinay 3759 }
803     }
804     }
805     return false;
806     }
807 caltinay 3757 default:
808     break;
809 caltinay 3753 }
810 caltinay 3757
811     stringstream msg;
812 caltinay 3791 msg << "ownSample: invalid function space type " << fsType;
813 caltinay 3757 throw RipleyException(msg.str());
814 caltinay 3691 }
815    
816 caltinay 3764 void Brick::setToNormal(escript::Data& out) const
817 caltinay 3703 {
818 caltinay 3764 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
819     out.requireWrite();
820     #pragma omp parallel
821     {
822     if (m_faceOffset[0] > -1) {
823     #pragma omp for nowait
824 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
825     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
826     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
827 caltinay 3764 // set vector at four quadrature points
828     *o++ = -1.; *o++ = 0.; *o++ = 0.;
829     *o++ = -1.; *o++ = 0.; *o++ = 0.;
830     *o++ = -1.; *o++ = 0.; *o++ = 0.;
831     *o++ = -1.; *o++ = 0.; *o = 0.;
832     }
833     }
834     }
835    
836     if (m_faceOffset[1] > -1) {
837     #pragma omp for nowait
838 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
839     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
840     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
841 caltinay 3764 // set vector at four quadrature points
842     *o++ = 1.; *o++ = 0.; *o++ = 0.;
843     *o++ = 1.; *o++ = 0.; *o++ = 0.;
844     *o++ = 1.; *o++ = 0.; *o++ = 0.;
845     *o++ = 1.; *o++ = 0.; *o = 0.;
846     }
847     }
848     }
849    
850     if (m_faceOffset[2] > -1) {
851     #pragma omp for nowait
852 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
853     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
854     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
855 caltinay 3764 // set vector at four quadrature points
856     *o++ = 0.; *o++ = -1.; *o++ = 0.;
857     *o++ = 0.; *o++ = -1.; *o++ = 0.;
858     *o++ = 0.; *o++ = -1.; *o++ = 0.;
859     *o++ = 0.; *o++ = -1.; *o = 0.;
860     }
861     }
862     }
863    
864     if (m_faceOffset[3] > -1) {
865     #pragma omp for nowait
866 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
867     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
868     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
869 caltinay 3764 // set vector at four quadrature points
870     *o++ = 0.; *o++ = 1.; *o++ = 0.;
871     *o++ = 0.; *o++ = 1.; *o++ = 0.;
872     *o++ = 0.; *o++ = 1.; *o++ = 0.;
873     *o++ = 0.; *o++ = 1.; *o = 0.;
874     }
875     }
876     }
877    
878     if (m_faceOffset[4] > -1) {
879     #pragma omp for nowait
880 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
881     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
882     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
883 caltinay 3764 // set vector at four quadrature points
884     *o++ = 0.; *o++ = 0.; *o++ = -1.;
885     *o++ = 0.; *o++ = 0.; *o++ = -1.;
886     *o++ = 0.; *o++ = 0.; *o++ = -1.;
887     *o++ = 0.; *o++ = 0.; *o = -1.;
888     }
889     }
890     }
891    
892     if (m_faceOffset[5] > -1) {
893     #pragma omp for nowait
894 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
895     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
896     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
897 caltinay 3764 // set vector at four quadrature points
898     *o++ = 0.; *o++ = 0.; *o++ = 1.;
899     *o++ = 0.; *o++ = 0.; *o++ = 1.;
900     *o++ = 0.; *o++ = 0.; *o++ = 1.;
901     *o++ = 0.; *o++ = 0.; *o = 1.;
902     }
903     }
904     }
905     } // end of parallel section
906     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
907     out.requireWrite();
908     #pragma omp parallel
909     {
910     if (m_faceOffset[0] > -1) {
911     #pragma omp for nowait
912 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
913     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
914     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
915 caltinay 3764 *o++ = -1.;
916     *o++ = 0.;
917     *o = 0.;
918     }
919     }
920     }
921    
922     if (m_faceOffset[1] > -1) {
923     #pragma omp for nowait
924 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
925     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
926     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
927 caltinay 3764 *o++ = 1.;
928     *o++ = 0.;
929     *o = 0.;
930     }
931     }
932     }
933    
934     if (m_faceOffset[2] > -1) {
935     #pragma omp for nowait
936 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
937     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
938     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
939 caltinay 3764 *o++ = 0.;
940     *o++ = -1.;
941     *o = 0.;
942     }
943     }
944     }
945    
946     if (m_faceOffset[3] > -1) {
947     #pragma omp for nowait
948 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
949     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
950     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
951 caltinay 3764 *o++ = 0.;
952     *o++ = 1.;
953     *o = 0.;
954     }
955     }
956     }
957    
958     if (m_faceOffset[4] > -1) {
959     #pragma omp for nowait
960 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
961     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
962     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
963 caltinay 3764 *o++ = 0.;
964     *o++ = 0.;
965     *o = -1.;
966     }
967     }
968     }
969    
970     if (m_faceOffset[5] > -1) {
971     #pragma omp for nowait
972 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
973     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
974     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
975 caltinay 3764 *o++ = 0.;
976     *o++ = 0.;
977     *o = 1.;
978     }
979     }
980     }
981     } // end of parallel section
982    
983     } else {
984     stringstream msg;
985 caltinay 3791 msg << "setToNormal: invalid function space type "
986     << out.getFunctionSpace().getTypeCode();
987 caltinay 3764 throw RipleyException(msg.str());
988     }
989     }
990    
991     void Brick::setToSize(escript::Data& out) const
992     {
993     if (out.getFunctionSpace().getTypeCode() == Elements
994     || out.getFunctionSpace().getTypeCode() == ReducedElements) {
995     out.requireWrite();
996     const dim_t numQuad=out.getNumDataPointsPerSample();
997 caltinay 4334 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
998 caltinay 3764 #pragma omp parallel for
999     for (index_t k = 0; k < getNumElements(); ++k) {
1000     double* o = out.getSampleDataRW(k);
1001     fill(o, o+numQuad, size);
1002     }
1003     } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1004     || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1005     out.requireWrite();
1006     const dim_t numQuad=out.getNumDataPointsPerSample();
1007     #pragma omp parallel
1008     {
1009     if (m_faceOffset[0] > -1) {
1010 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1011 caltinay 3764 #pragma omp for nowait
1012 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1013     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1014     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1015 caltinay 3764 fill(o, o+numQuad, size);
1016     }
1017     }
1018     }
1019    
1020     if (m_faceOffset[1] > -1) {
1021 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1022 caltinay 3764 #pragma omp for nowait
1023 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1024     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1025     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1026 caltinay 3764 fill(o, o+numQuad, size);
1027     }
1028     }
1029     }
1030    
1031     if (m_faceOffset[2] > -1) {
1032 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1033 caltinay 3764 #pragma omp for nowait
1034 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1035     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1036     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1037 caltinay 3764 fill(o, o+numQuad, size);
1038     }
1039     }
1040     }
1041    
1042     if (m_faceOffset[3] > -1) {
1043 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1044 caltinay 3764 #pragma omp for nowait
1045 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1046     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1047     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1048 caltinay 3764 fill(o, o+numQuad, size);
1049     }
1050     }
1051     }
1052    
1053     if (m_faceOffset[4] > -1) {
1054 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1055 caltinay 3764 #pragma omp for nowait
1056 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1057     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1058     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1059 caltinay 3764 fill(o, o+numQuad, size);
1060     }
1061     }
1062     }
1063    
1064     if (m_faceOffset[5] > -1) {
1065 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1066 caltinay 3764 #pragma omp for nowait
1067 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1068     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1069     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1070 caltinay 3764 fill(o, o+numQuad, size);
1071     }
1072     }
1073     }
1074     } // end of parallel section
1075    
1076     } else {
1077     stringstream msg;
1078 caltinay 3791 msg << "setToSize: invalid function space type "
1079     << out.getFunctionSpace().getTypeCode();
1080 caltinay 3764 throw RipleyException(msg.str());
1081     }
1082     }
1083    
1084     void Brick::Print_Mesh_Info(const bool full) const
1085     {
1086     RipleyDomain::Print_Mesh_Info(full);
1087     if (full) {
1088     cout << " Id Coordinates" << endl;
1089     cout.precision(15);
1090     cout.setf(ios::scientific, ios::floatfield);
1091     for (index_t i=0; i < getNumNodes(); i++) {
1092     cout << " " << setw(5) << m_nodeId[i]
1093 caltinay 4334 << " " << getLocalCoordinate(i%m_NN[0], 0)
1094     << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1095     << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1096 caltinay 3764 }
1097     }
1098     }
1099    
1100    
1101     //protected
1102     void Brick::assembleCoordinates(escript::Data& arg) const
1103     {
1104     escriptDataC x = arg.getDataC();
1105     int numDim = m_numDim;
1106     if (!isDataPointShapeEqual(&x, 1, &numDim))
1107     throw RipleyException("setToX: Invalid Data object shape");
1108     if (!numSamplesEqual(&x, 1, getNumNodes()))
1109     throw RipleyException("setToX: Illegal number of samples in Data object");
1110    
1111     arg.requireWrite();
1112     #pragma omp parallel for
1113 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1114     for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1115     for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1116     double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1117     point[0] = getLocalCoordinate(i0, 0);
1118     point[1] = getLocalCoordinate(i1, 1);
1119     point[2] = getLocalCoordinate(i2, 2);
1120 caltinay 3764 }
1121     }
1122     }
1123     }
1124    
1125     //protected
1126 caltinay 4626 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1127 caltinay 3764 {
1128 caltinay 3703 const dim_t numComp = in.getDataPointSize();
1129 caltinay 3731 const double C0 = .044658198738520451079;
1130     const double C1 = .16666666666666666667;
1131     const double C2 = .21132486540518711775;
1132     const double C3 = .25;
1133     const double C4 = .5;
1134     const double C5 = .62200846792814621559;
1135     const double C6 = .78867513459481288225;
1136    
1137 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
1138 caltinay 3760 out.requireWrite();
1139 caltinay 3913 #pragma omp parallel
1140     {
1141     vector<double> f_000(numComp);
1142     vector<double> f_001(numComp);
1143     vector<double> f_010(numComp);
1144     vector<double> f_011(numComp);
1145     vector<double> f_100(numComp);
1146     vector<double> f_101(numComp);
1147     vector<double> f_110(numComp);
1148     vector<double> f_111(numComp);
1149     #pragma omp for
1150 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1151     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1152     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1153     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1154     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1155     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1156     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1157     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1158     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1159     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1160     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1161     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1162 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1163 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1164     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1165     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1166     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1167     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1168     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1169     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1170     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1171     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1172     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1173     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1174     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1175 caltinay 3913 o[INDEX3(i,0,0,numComp,3)] = V0;
1176     o[INDEX3(i,1,0,numComp,3)] = V4;
1177     o[INDEX3(i,2,0,numComp,3)] = V8;
1178     o[INDEX3(i,0,1,numComp,3)] = V0;
1179     o[INDEX3(i,1,1,numComp,3)] = V5;
1180     o[INDEX3(i,2,1,numComp,3)] = V9;
1181     o[INDEX3(i,0,2,numComp,3)] = V1;
1182     o[INDEX3(i,1,2,numComp,3)] = V4;
1183     o[INDEX3(i,2,2,numComp,3)] = V10;
1184     o[INDEX3(i,0,3,numComp,3)] = V1;
1185     o[INDEX3(i,1,3,numComp,3)] = V5;
1186     o[INDEX3(i,2,3,numComp,3)] = V11;
1187     o[INDEX3(i,0,4,numComp,3)] = V2;
1188     o[INDEX3(i,1,4,numComp,3)] = V6;
1189     o[INDEX3(i,2,4,numComp,3)] = V8;
1190     o[INDEX3(i,0,5,numComp,3)] = V2;
1191     o[INDEX3(i,1,5,numComp,3)] = V7;
1192     o[INDEX3(i,2,5,numComp,3)] = V9;
1193     o[INDEX3(i,0,6,numComp,3)] = V3;
1194     o[INDEX3(i,1,6,numComp,3)] = V6;
1195     o[INDEX3(i,2,6,numComp,3)] = V10;
1196     o[INDEX3(i,0,7,numComp,3)] = V3;
1197     o[INDEX3(i,1,7,numComp,3)] = V7;
1198     o[INDEX3(i,2,7,numComp,3)] = V11;
1199     } // end of component loop i
1200     } // end of k0 loop
1201     } // end of k1 loop
1202     } // end of k2 loop
1203     } // end of parallel section
1204 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1205 caltinay 3760 out.requireWrite();
1206 caltinay 3913 #pragma omp parallel
1207     {
1208     vector<double> f_000(numComp);
1209     vector<double> f_001(numComp);
1210     vector<double> f_010(numComp);
1211     vector<double> f_011(numComp);
1212     vector<double> f_100(numComp);
1213     vector<double> f_101(numComp);
1214     vector<double> f_110(numComp);
1215     vector<double> f_111(numComp);
1216     #pragma omp for
1217 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1218     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1219     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1220     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1221     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1222     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1223     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1224     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1225     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1226     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1227     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1228     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1229 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1230 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1231     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1232     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1233 caltinay 3913 } // end of component loop i
1234     } // end of k0 loop
1235     } // end of k1 loop
1236     } // end of k2 loop
1237     } // end of parallel section
1238 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1239 caltinay 3760 out.requireWrite();
1240 caltinay 3722 #pragma omp parallel
1241     {
1242 caltinay 3913 vector<double> f_000(numComp);
1243     vector<double> f_001(numComp);
1244     vector<double> f_010(numComp);
1245     vector<double> f_011(numComp);
1246     vector<double> f_100(numComp);
1247     vector<double> f_101(numComp);
1248     vector<double> f_110(numComp);
1249     vector<double> f_111(numComp);
1250 caltinay 3722 if (m_faceOffset[0] > -1) {
1251     #pragma omp for nowait
1252 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1253     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1254     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1255     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1256     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1257     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1258     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1259     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1260     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1261     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1262     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1263 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1264 caltinay 4375 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1265     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1266     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1267     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1268     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1269 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1270     o[INDEX3(i,2,0,numComp,3)] = V2;
1271 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1272 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1273     o[INDEX3(i,2,1,numComp,3)] = V3;
1274 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1275 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1276     o[INDEX3(i,2,2,numComp,3)] = V2;
1277 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1278 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1279     o[INDEX3(i,2,3,numComp,3)] = V3;
1280 caltinay 3764 } // end of component loop i
1281     } // end of k1 loop
1282     } // end of k2 loop
1283     } // end of face 0
1284 caltinay 3722 if (m_faceOffset[1] > -1) {
1285     #pragma omp for nowait
1286 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1287     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1288     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1289     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1290     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1291     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1292     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1293     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1294     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1295     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1296     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1297 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1298 caltinay 4375 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1299     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1300     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1301     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1302     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1303 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1304     o[INDEX3(i,2,0,numComp,3)] = V2;
1305 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1306 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1307     o[INDEX3(i,2,1,numComp,3)] = V3;
1308 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1309 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1310     o[INDEX3(i,2,2,numComp,3)] = V2;
1311 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1312 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1313     o[INDEX3(i,2,3,numComp,3)] = V3;
1314 caltinay 3764 } // end of component loop i
1315     } // end of k1 loop
1316     } // end of k2 loop
1317     } // end of face 1
1318 caltinay 3722 if (m_faceOffset[2] > -1) {
1319     #pragma omp for nowait
1320 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1321     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1322     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1323     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1324     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1325     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1326     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1327     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1328     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1329     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1330     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1331 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1332 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1333     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1334     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1335 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1336 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1337 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V1;
1338     o[INDEX3(i,0,1,numComp,3)] = V0;
1339 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1340 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V2;
1341     o[INDEX3(i,0,2,numComp,3)] = V0;
1342 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1343 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V1;
1344     o[INDEX3(i,0,3,numComp,3)] = V0;
1345 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1346 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V2;
1347 caltinay 3764 } // end of component loop i
1348     } // end of k0 loop
1349     } // end of k2 loop
1350     } // end of face 2
1351 caltinay 3722 if (m_faceOffset[3] > -1) {
1352     #pragma omp for nowait
1353 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1354     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1355     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1356     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1357     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1358     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1359     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1360     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1361     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1362     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1363     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1364 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1365 caltinay 4375 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1366     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1367     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1368     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1369 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1370 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1371 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V2;
1372     o[INDEX3(i,0,1,numComp,3)] = V0;
1373 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1374 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V3;
1375     o[INDEX3(i,0,2,numComp,3)] = V1;
1376 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1377 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V2;
1378     o[INDEX3(i,0,3,numComp,3)] = V1;
1379 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1380 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V3;
1381 caltinay 3764 } // end of component loop i
1382     } // end of k0 loop
1383     } // end of k2 loop
1384     } // end of face 3
1385 caltinay 3722 if (m_faceOffset[4] > -1) {
1386     #pragma omp for nowait
1387 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1388     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1389     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1390     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1391     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1392     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1393     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1394     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1395     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1396     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1397     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1398 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1399 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1400     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1401     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1402     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1403 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1404     o[INDEX3(i,1,0,numComp,3)] = V2;
1405 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1406 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1407     o[INDEX3(i,1,1,numComp,3)] = V3;
1408 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1409 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1410     o[INDEX3(i,1,2,numComp,3)] = V2;
1411 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1412 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1413     o[INDEX3(i,1,3,numComp,3)] = V3;
1414 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1415 caltinay 3764 } // end of component loop i
1416     } // end of k0 loop
1417     } // end of k1 loop
1418     } // end of face 4
1419 caltinay 3722 if (m_faceOffset[5] > -1) {
1420     #pragma omp for nowait
1421 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1422     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1423     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1424     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1425     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1426     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1427     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1428     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1429     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1430     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1431     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1432 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1433 caltinay 4375 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1434     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1435     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1436     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1437 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1438     o[INDEX3(i,1,0,numComp,3)] = V2;
1439 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1440 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1441     o[INDEX3(i,1,1,numComp,3)] = V3;
1442 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1443 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1444     o[INDEX3(i,1,2,numComp,3)] = V2;
1445 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1446 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1447     o[INDEX3(i,1,3,numComp,3)] = V3;
1448 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1449 caltinay 3764 } // end of component loop i
1450     } // end of k0 loop
1451     } // end of k1 loop
1452     } // end of face 5
1453 caltinay 3722 } // end of parallel section
1454 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1455 caltinay 3760 out.requireWrite();
1456 caltinay 3722 #pragma omp parallel
1457     {
1458 caltinay 3913 vector<double> f_000(numComp);
1459     vector<double> f_001(numComp);
1460     vector<double> f_010(numComp);
1461     vector<double> f_011(numComp);
1462     vector<double> f_100(numComp);
1463     vector<double> f_101(numComp);
1464     vector<double> f_110(numComp);
1465     vector<double> f_111(numComp);
1466 caltinay 3722 if (m_faceOffset[0] > -1) {
1467     #pragma omp for nowait
1468 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1469     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1470     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1471     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1472     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1473     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1474     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1475     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1476     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1477     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1478     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1479 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1480 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1481     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1482     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1483 caltinay 3764 } // end of component loop i
1484     } // end of k1 loop
1485     } // end of k2 loop
1486     } // end of face 0
1487 caltinay 3722 if (m_faceOffset[1] > -1) {
1488     #pragma omp for nowait
1489 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1490     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1491     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1492     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1493     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1494     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1495     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1496     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1497     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1498     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1499     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1500 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1501 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1502     o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1503     o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1504 caltinay 3764 } // end of component loop i
1505     } // end of k1 loop
1506     } // end of k2 loop
1507     } // end of face 1
1508 caltinay 3722 if (m_faceOffset[2] > -1) {
1509     #pragma omp for nowait
1510 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1511     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1512     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1513     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1514     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1515     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1516     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1517     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1518     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1519     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1520     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1521 caltinay 3722 for (index_t i=0; i < numComp; ++i) {