/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4626 - (hide annotations)
Wed Jan 22 06:07:34 2014 UTC (5 years, 2 months ago) by caltinay
Original Path: trunk/ripley/src/Brick.cpp
File size: 146643 byte(s)
Eliminated all const_cast<Data*> hacks in ripley and finley now that
Data.getSampleDataRO returns a const pointer.

1 caltinay 3691
2 jfenwick 3981 /*****************************************************************************
3 caltinay 3691 *
4 jfenwick 4154 * Copyright (c) 2003-2013 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 caltinay 3691 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12     * Development since 2012 by School of Earth Sciences
13     *
14     *****************************************************************************/
15 caltinay 3691
16     #include <ripley/Brick.h>
17 caltinay 3791 #include <paso/SystemMatrix.h>
18 caltinay 4334 #include <esysUtils/esysFileWriter.h>
19 sshaw 4622 #include <ripley/DefaultAssembler3D.h>
20 caltinay 4477 #include <boost/scoped_array.hpp>
21    
22 caltinay 4013 #ifdef USE_NETCDF
23     #include <netcdfcpp.h>
24     #endif
25    
26 caltinay 3691 #if USE_SILO
27     #include <silo.h>
28     #ifdef ESYS_MPI
29     #include <pmpio.h>
30     #endif
31     #endif
32    
33     #include <iomanip>
34    
35     using namespace std;
36 caltinay 4334 using esysUtils::FileWriter;
37 caltinay 3691
38     namespace ripley {
39    
40 caltinay 3781 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
41 sshaw 4622 double x1, double y1, double z1, int d0, int d1, int d2,
42     const std::vector<double>& points, const std::vector<int>& tags,
43     const std::map<std::string, int>& tagnamestonums) :
44 caltinay 4334 RipleyDomain(3)
45 caltinay 3691 {
46 caltinay 3943 // ignore subdivision parameters for serial run
47     if (m_mpiInfo->size == 1) {
48     d0=1;
49     d1=1;
50     d2=1;
51     }
52     bool warn=false;
53     // if number of subdivisions is non-positive, try to subdivide by the same
54     // ratio as the number of elements
55     if (d0<=0 && d1<=0 && d2<=0) {
56     warn=true;
57 jfenwick 3962 d0=(int)(pow(m_mpiInfo->size*(n0+1)*(n0+1)/((float)(n1+1)*(n2+1)), 1.f/3));
58 caltinay 4010 d0=max(1, d0);
59     d1=max(1, (int)(d0*n1/(float)n0));
60 caltinay 3943 d2=m_mpiInfo->size/(d0*d1);
61     if (d0*d1*d2 != m_mpiInfo->size) {
62     // ratios not the same so leave "smallest" side undivided and try
63     // dividing 2 sides only
64     if (n0>=n1) {
65     if (n1>=n2) {
66     d0=d1=0;
67     d2=1;
68     } else {
69     d0=d2=0;
70     d1=1;
71     }
72     } else {
73     if (n0>=n2) {
74     d0=d1=0;
75     d2=1;
76     } else {
77     d0=1;
78     d1=d2=0;
79     }
80     }
81     }
82     }
83     if (d0<=0 && d1<=0) {
84     warn=true;
85 caltinay 4010 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1))));
86 caltinay 3943 d1=m_mpiInfo->size/d0;
87     if (d0*d1*d2 != m_mpiInfo->size) {
88     // ratios not the same so subdivide side with more elements only
89     if (n0>n1) {
90 caltinay 3944 d0=0;
91 caltinay 3943 d1=1;
92     } else {
93     d0=1;
94 caltinay 3944 d1=0;
95 caltinay 3943 }
96     }
97     } else if (d0<=0 && d2<=0) {
98     warn=true;
99 caltinay 4010 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n2+1))));
100 caltinay 3943 d2=m_mpiInfo->size/d0;
101     if (d0*d1*d2 != m_mpiInfo->size) {
102     // ratios not the same so subdivide side with more elements only
103     if (n0>n2) {
104 caltinay 3944 d0=0;
105 caltinay 3943 d2=1;
106     } else {
107     d0=1;
108 caltinay 3944 d2=0;
109 caltinay 3943 }
110     }
111     } else if (d1<=0 && d2<=0) {
112     warn=true;
113 caltinay 4010 d1=max(1, int(sqrt(m_mpiInfo->size*(n1+1)/(float)(n2+1))));
114 caltinay 3943 d2=m_mpiInfo->size/d1;
115     if (d0*d1*d2 != m_mpiInfo->size) {
116     // ratios not the same so subdivide side with more elements only
117     if (n1>n2) {
118 caltinay 3944 d1=0;
119 caltinay 3943 d2=1;
120     } else {
121     d1=1;
122 caltinay 3944 d2=0;
123 caltinay 3943 }
124     }
125     }
126     if (d0<=0) {
127     // d1,d2 are preset, determine d0
128     d0=m_mpiInfo->size/(d1*d2);
129     } else if (d1<=0) {
130     // d0,d2 are preset, determine d1
131     d1=m_mpiInfo->size/(d0*d2);
132     } else if (d2<=0) {
133     // d0,d1 are preset, determine d2
134     d2=m_mpiInfo->size/(d0*d1);
135     }
136    
137 caltinay 3691 // ensure number of subdivisions is valid and nodes can be distributed
138     // among number of ranks
139 caltinay 4334 if (d0*d1*d2 != m_mpiInfo->size)
140 caltinay 3691 throw RipleyException("Invalid number of spatial subdivisions");
141    
142 caltinay 3943 if (warn) {
143     cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
144     << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
145     }
146 caltinay 3691
147 caltinay 4334 double l0 = x1-x0;
148     double l1 = y1-y0;
149     double l2 = z1-z0;
150     m_dx[0] = l0/n0;
151     m_dx[1] = l1/n1;
152     m_dx[2] = l2/n2;
153    
154     if ((n0+1)%d0 > 0) {
155 caltinay 3943 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
156 caltinay 4334 l0=m_dx[0]*n0;
157 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N0="
158 caltinay 4334 << n0 << ", l0=" << l0 << endl;
159 caltinay 3943 }
160 caltinay 4334 if ((n1+1)%d1 > 0) {
161 caltinay 3943 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
162 caltinay 4334 l1=m_dx[1]*n1;
163 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N1="
164 caltinay 4334 << n1 << ", l1=" << l1 << endl;
165 caltinay 3943 }
166 caltinay 4334 if ((n2+1)%d2 > 0) {
167 caltinay 3943 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
168 caltinay 4334 l2=m_dx[2]*n2;
169 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N2="
170 caltinay 4334 << n2 << ", l2=" << l2 << endl;
171 caltinay 3943 }
172    
173 caltinay 4334 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
174 caltinay 3753 throw RipleyException("Too few elements for the number of ranks");
175    
176 caltinay 4334 m_gNE[0] = n0;
177     m_gNE[1] = n1;
178     m_gNE[2] = n2;
179     m_origin[0] = x0;
180     m_origin[1] = y0;
181     m_origin[2] = z0;
182     m_length[0] = l0;
183     m_length[1] = l1;
184     m_length[2] = l2;
185     m_NX[0] = d0;
186     m_NX[1] = d1;
187     m_NX[2] = d2;
188    
189 caltinay 3753 // local number of elements (including overlap)
190 caltinay 4334 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
191     if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
192     m_NE[0]++;
193     else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
194     m_ownNE[0]--;
195 caltinay 3764
196 caltinay 4334 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
197     if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
198     m_NE[1]++;
199     else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
200     m_ownNE[1]--;
201 caltinay 3764
202 caltinay 4334 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
203     if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
204     m_NE[2]++;
205     else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
206     m_ownNE[2]--;
207 caltinay 3753
208     // local number of nodes
209 caltinay 4334 m_NN[0] = m_NE[0]+1;
210     m_NN[1] = m_NE[1]+1;
211     m_NN[2] = m_NE[2]+1;
212 caltinay 3753
213 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
214 caltinay 4334 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
215     if (m_offset[0] > 0)
216     m_offset[0]--;
217     m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
218     if (m_offset[1] > 0)
219     m_offset[1]--;
220     m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
221     if (m_offset[2] > 0)
222     m_offset[2]--;
223 caltinay 3753
224 caltinay 3691 populateSampleIds();
225 caltinay 3756 createPattern();
226 sshaw 4622
227     assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
228     addPoints(tags.size(), &points[0], &tags[0]);
229 caltinay 3691 }
230    
231    
232     Brick::~Brick()
233     {
234 caltinay 3785 Paso_SystemMatrixPattern_free(m_pattern);
235     Paso_Connector_free(m_connector);
236 sshaw 4622 delete assembler;
237 caltinay 3691 }
238    
239     string Brick::getDescription() const
240     {
241     return "ripley::Brick";
242     }
243    
244     bool Brick::operator==(const AbstractDomain& other) const
245     {
246 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
247     if (o) {
248     return (RipleyDomain::operator==(other) &&
249 caltinay 4334 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
250     && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
251     && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
252     && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
253 caltinay 3744 }
254 caltinay 3691
255     return false;
256     }
257    
258 caltinay 4013 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
259 caltinay 4618 const ReaderParameters& params) const
260 caltinay 4013 {
261     #ifdef USE_NETCDF
262     // check destination function space
263     int myN0, myN1, myN2;
264     if (out.getFunctionSpace().getTypeCode() == Nodes) {
265 caltinay 4334 myN0 = m_NN[0];
266     myN1 = m_NN[1];
267     myN2 = m_NN[2];
268 caltinay 4013 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
269     out.getFunctionSpace().getTypeCode() == ReducedElements) {
270 caltinay 4334 myN0 = m_NE[0];
271     myN1 = m_NE[1];
272     myN2 = m_NE[2];
273 caltinay 4013 } else
274     throw RipleyException("readNcGrid(): invalid function space for output data object");
275    
276 caltinay 4615 if (params.first.size() != 3)
277 caltinay 4013 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
278    
279 caltinay 4615 if (params.numValues.size() != 3)
280 caltinay 4013 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
281    
282 caltinay 4615 if (params.multiplier.size() != 3)
283 caltinay 4277 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
284 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
285     if (params.multiplier[i]<1)
286 caltinay 4277 throw RipleyException("readNcGrid(): all multipliers must be positive");
287    
288 caltinay 4013 // check file existence and size
289     NcFile f(filename.c_str(), NcFile::ReadOnly);
290     if (!f.is_valid())
291     throw RipleyException("readNcGrid(): cannot open file");
292    
293     NcVar* var = f.get_var(varname.c_str());
294     if (!var)
295     throw RipleyException("readNcGrid(): invalid variable name");
296    
297     // TODO: rank>0 data support
298     const int numComp = out.getDataPointSize();
299     if (numComp > 1)
300     throw RipleyException("readNcGrid(): only scalar data supported");
301    
302     const int dims = var->num_dims();
303 caltinay 4477 boost::scoped_array<long> edges(var->edges());
304 caltinay 4013
305     // is this a slice of the data object (dims!=3)?
306     // note the expected ordering of edges (as in numpy: z,y,x)
307 caltinay 4615 if ( (dims==3 && (params.numValues[2] > edges[0] ||
308     params.numValues[1] > edges[1] ||
309     params.numValues[0] > edges[2]))
310     || (dims==2 && params.numValues[2]>1)
311     || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
312 caltinay 4013 throw RipleyException("readNcGrid(): not enough data in file");
313     }
314    
315     // check if this rank contributes anything
316 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
317     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
318     params.first[1] >= m_offset[1]+myN1 ||
319     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
320     params.first[2] >= m_offset[2]+myN2 ||
321     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
322 caltinay 4013 return;
323     }
324    
325     // now determine how much this rank has to write
326    
327     // first coordinates in data object to write to
328 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
329     const int first1 = max(0, params.first[1]-m_offset[1]);
330     const int first2 = max(0, params.first[2]-m_offset[2]);
331 caltinay 4618 // indices to first value in file (not accounting for reverse yet)
332     int idx0 = max(0, m_offset[0]-params.first[0]);
333     int idx1 = max(0, m_offset[1]-params.first[1]);
334     int idx2 = max(0, m_offset[2]-params.first[2]);
335 caltinay 4277 // number of values to read
336 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
337     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
338     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
339 caltinay 4013
340 caltinay 4618 // make sure we read the right block if going backwards through file
341     if (params.reverse[0])
342     idx0 = edges[dims-1]-num0-idx0;
343     if (dims>1 && params.reverse[1])
344     idx1 = edges[dims-2]-num1-idx1;
345     if (dims>2 && params.reverse[2])
346     idx2 = edges[dims-3]-num2-idx2;
347    
348    
349 caltinay 4013 vector<double> values(num0*num1*num2);
350     if (dims==3) {
351     var->set_cur(idx2, idx1, idx0);
352     var->get(&values[0], num2, num1, num0);
353     } else if (dims==2) {
354     var->set_cur(idx1, idx0);
355     var->get(&values[0], num1, num0);
356     } else {
357     var->set_cur(idx0);
358     var->get(&values[0], num0);
359     }
360    
361     const int dpp = out.getNumDataPointsPerSample();
362     out.requireWrite();
363    
364 caltinay 4618 // helpers for reversing
365     const int x0 = (params.reverse[0] ? num0-1 : 0);
366     const int x_mult = (params.reverse[0] ? -1 : 1);
367     const int y0 = (params.reverse[1] ? num1-1 : 0);
368     const int y_mult = (params.reverse[1] ? -1 : 1);
369     const int z0 = (params.reverse[2] ? num2-1 : 0);
370     const int z_mult = (params.reverse[2] ? -1 : 1);
371    
372 caltinay 4013 for (index_t z=0; z<num2; z++) {
373     for (index_t y=0; y<num1; y++) {
374     #pragma omp parallel for
375     for (index_t x=0; x<num0; x++) {
376 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
377     +(first1+y*params.multiplier[1])*myN0
378     +(first2+z*params.multiplier[2])*myN0*myN1;
379 caltinay 4618 const int srcIndex=(z0+z_mult*z)*num1*num0
380     +(y0+y_mult*y)*num0
381     +(x0+x_mult*x);
382 caltinay 4174 if (!isnan(values[srcIndex])) {
383 caltinay 4615 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
384     for (index_t m1=0; m1<params.multiplier[1]; m1++) {
385     for (index_t m0=0; m0<params.multiplier[0]; m0++) {
386 caltinay 4277 const int dataIndex = baseIndex+m0
387     +m1*myN0
388     +m2*myN0*myN1;
389     double* dest = out.getSampleDataRW(dataIndex);
390     for (index_t q=0; q<dpp; q++) {
391     *dest++ = values[srcIndex];
392     }
393     }
394     }
395 caltinay 4174 }
396 caltinay 4013 }
397     }
398     }
399     }
400     #else
401     throw RipleyException("readNcGrid(): not compiled with netCDF support");
402     #endif
403     }
404    
405 caltinay 4334 void Brick::readBinaryGrid(escript::Data& out, string filename,
406 caltinay 4618 const ReaderParameters& params) const
407 caltinay 4334 {
408 caltinay 4495 // the mapping is not universally correct but should work on our
409     // supported platforms
410 caltinay 4615 switch (params.dataType) {
411 caltinay 4495 case DATATYPE_INT32:
412 caltinay 4615 readBinaryGridImpl<int>(out, filename, params);
413 caltinay 4495 break;
414     case DATATYPE_FLOAT32:
415 caltinay 4615 readBinaryGridImpl<float>(out, filename, params);
416 caltinay 4495 break;
417     case DATATYPE_FLOAT64:
418 caltinay 4615 readBinaryGridImpl<double>(out, filename, params);
419 caltinay 4495 break;
420     default:
421     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
422     }
423     }
424    
425     template<typename ValueType>
426     void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
427 caltinay 4618 const ReaderParameters& params) const
428 caltinay 4495 {
429 caltinay 4334 // check destination function space
430     int myN0, myN1, myN2;
431     if (out.getFunctionSpace().getTypeCode() == Nodes) {
432     myN0 = m_NN[0];
433     myN1 = m_NN[1];
434     myN2 = m_NN[2];
435     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
436     out.getFunctionSpace().getTypeCode() == ReducedElements) {
437     myN0 = m_NE[0];
438     myN1 = m_NE[1];
439     myN2 = m_NE[2];
440     } else
441     throw RipleyException("readBinaryGrid(): invalid function space for output data object");
442    
443 caltinay 4615 if (params.first.size() != 3)
444 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
445    
446 caltinay 4615 if (params.numValues.size() != 3)
447 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
448    
449 caltinay 4615 if (params.multiplier.size() != 3)
450 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
451 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
452     if (params.multiplier[i]<1)
453 caltinay 4334 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
454    
455     // check file existence and size
456     ifstream f(filename.c_str(), ifstream::binary);
457     if (f.fail()) {
458     throw RipleyException("readBinaryGrid(): cannot open file");
459     }
460     f.seekg(0, ios::end);
461     const int numComp = out.getDataPointSize();
462     const int filesize = f.tellg();
463 caltinay 4615 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
464 caltinay 4334 if (filesize < reqsize) {
465     f.close();
466     throw RipleyException("readBinaryGrid(): not enough data in file");
467     }
468    
469     // check if this rank contributes anything
470 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
471     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
472     params.first[1] >= m_offset[1]+myN1 ||
473     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
474     params.first[2] >= m_offset[2]+myN2 ||
475     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
476 caltinay 4334 f.close();
477     return;
478     }
479    
480     // now determine how much this rank has to write
481    
482     // first coordinates in data object to write to
483 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
484     const int first1 = max(0, params.first[1]-m_offset[1]);
485     const int first2 = max(0, params.first[2]-m_offset[2]);
486 caltinay 4334 // indices to first value in file
487 caltinay 4615 const int idx0 = max(0, m_offset[0]-params.first[0]);
488     const int idx1 = max(0, m_offset[1]-params.first[1]);
489     const int idx2 = max(0, m_offset[2]-params.first[2]);
490 caltinay 4334 // number of values to read
491 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
492     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
493     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
494 caltinay 4334
495     out.requireWrite();
496 caltinay 4495 vector<ValueType> values(num0*numComp);
497 caltinay 4334 const int dpp = out.getNumDataPointsPerSample();
498    
499 caltinay 4529 for (int z=0; z<num2; z++) {
500     for (int y=0; y<num1; y++) {
501 caltinay 4615 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
502     +(idx2+z)*params.numValues[0]*params.numValues[1]);
503 caltinay 4495 f.seekg(fileofs*sizeof(ValueType));
504     f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
505 caltinay 4334
506 caltinay 4529 for (int x=0; x<num0; x++) {
507 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
508     +(first1+y*params.multiplier[1])*myN0
509     +(first2+z*params.multiplier[2])*myN0*myN1;
510     for (int m2=0; m2<params.multiplier[2]; m2++) {
511     for (int m1=0; m1<params.multiplier[1]; m1++) {
512     for (int m0=0; m0<params.multiplier[0]; m0++) {
513 caltinay 4334 const int dataIndex = baseIndex+m0
514     +m1*myN0
515     +m2*myN0*myN1;
516     double* dest = out.getSampleDataRW(dataIndex);
517 caltinay 4529 for (int c=0; c<numComp; c++) {
518     ValueType val = values[x*numComp+c];
519    
520 caltinay 4615 if (params.byteOrder != BYTEORDER_NATIVE) {
521 caltinay 4529 char* cval = reinterpret_cast<char*>(&val);
522     // this will alter val!!
523     byte_swap32(cval);
524     }
525     if (!std::isnan(val)) {
526     for (int q=0; q<dpp; q++) {
527     *dest++ = static_cast<double>(val);
528 caltinay 4334 }
529     }
530     }
531     }
532     }
533     }
534     }
535     }
536     }
537    
538     f.close();
539     }
540    
541 caltinay 4357 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
542     int byteOrder, int dataType) const
543 caltinay 4334 {
544 caltinay 4357 // the mapping is not universally correct but should work on our
545     // supported platforms
546     switch (dataType) {
547     case DATATYPE_INT32:
548     writeBinaryGridImpl<int>(in, filename, byteOrder);
549     break;
550     case DATATYPE_FLOAT32:
551     writeBinaryGridImpl<float>(in, filename, byteOrder);
552     break;
553     case DATATYPE_FLOAT64:
554     writeBinaryGridImpl<double>(in, filename, byteOrder);
555     break;
556     default:
557     throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
558     }
559     }
560    
561     template<typename ValueType>
562     void Brick::writeBinaryGridImpl(const escript::Data& in,
563     const string& filename, int byteOrder) const
564     {
565 caltinay 4334 // check function space and determine number of points
566     int myN0, myN1, myN2;
567     int totalN0, totalN1, totalN2;
568     if (in.getFunctionSpace().getTypeCode() == Nodes) {
569     myN0 = m_NN[0];
570     myN1 = m_NN[1];
571     myN2 = m_NN[2];
572     totalN0 = m_gNE[0]+1;
573     totalN1 = m_gNE[1]+1;
574     totalN2 = m_gNE[2]+1;
575     } else if (in.getFunctionSpace().getTypeCode() == Elements ||
576     in.getFunctionSpace().getTypeCode() == ReducedElements) {
577     myN0 = m_NE[0];
578     myN1 = m_NE[1];
579     myN2 = m_NE[2];
580     totalN0 = m_gNE[0];
581     totalN1 = m_gNE[1];
582     totalN2 = m_gNE[2];
583     } else
584     throw RipleyException("writeBinaryGrid(): invalid function space of data object");
585    
586     const int numComp = in.getDataPointSize();
587     const int dpp = in.getNumDataPointsPerSample();
588 caltinay 4357 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
589 caltinay 4334
590     if (numComp > 1 || dpp > 1)
591     throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
592    
593     // from here on we know that each sample consists of one value
594 caltinay 4482 FileWriter fw;
595     fw.openFile(filename, fileSize);
596 caltinay 4334 MPIBarrier();
597    
598     for (index_t z=0; z<myN2; z++) {
599     for (index_t y=0; y<myN1; y++) {
600     const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
601 caltinay 4357 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
602 caltinay 4334 ostringstream oss;
603    
604     for (index_t x=0; x<myN0; x++) {
605 caltinay 4626 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
606 caltinay 4357 ValueType fvalue = static_cast<ValueType>(*sample);
607     if (byteOrder == BYTEORDER_NATIVE) {
608 caltinay 4334 oss.write((char*)&fvalue, sizeof(fvalue));
609     } else {
610     char* value = reinterpret_cast<char*>(&fvalue);
611 caltinay 4357 oss.write(byte_swap32(value), sizeof(fvalue));
612 caltinay 4334 }
613     }
614 caltinay 4482 fw.writeAt(oss, fileofs);
615 caltinay 4334 }
616     }
617 caltinay 4482 fw.close();
618 caltinay 4334 }
619    
620 caltinay 3691 void Brick::dump(const string& fileName) const
621     {
622     #if USE_SILO
623     string fn(fileName);
624     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
625     fn+=".silo";
626     }
627    
628     int driver=DB_HDF5;
629     string siloPath;
630     DBfile* dbfile = NULL;
631    
632     #ifdef ESYS_MPI
633     PMPIO_baton_t* baton = NULL;
634 gross 3793 const int NUM_SILO_FILES = 1;
635     const char* blockDirFmt = "/block%04d";
636 caltinay 3691 #endif
637    
638     if (m_mpiInfo->size > 1) {
639     #ifdef ESYS_MPI
640     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
641     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
642     PMPIO_DefaultClose, (void*)&driver);
643     // try the fallback driver in case of error
644     if (!baton && driver != DB_PDB) {
645     driver = DB_PDB;
646     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
647     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
648     PMPIO_DefaultClose, (void*)&driver);
649     }
650     if (baton) {
651     char str[64];
652     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
653     siloPath = str;
654     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
655     }
656     #endif
657     } else {
658     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
659     getDescription().c_str(), driver);
660     // try the fallback driver in case of error
661     if (!dbfile && driver != DB_PDB) {
662     driver = DB_PDB;
663     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
664     getDescription().c_str(), driver);
665     }
666     }
667    
668     if (!dbfile)
669     throw RipleyException("dump: Could not create Silo file");
670    
671     /*
672     if (driver==DB_HDF5) {
673     // gzip level 1 already provides good compression with minimal
674     // performance penalty. Some tests showed that gzip levels >3 performed
675     // rather badly on escript data both in terms of time and space
676     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
677     }
678     */
679    
680 caltinay 4334 boost::scoped_ptr<double> x(new double[m_NN[0]]);
681     boost::scoped_ptr<double> y(new double[m_NN[1]]);
682     boost::scoped_ptr<double> z(new double[m_NN[2]]);
683 caltinay 3691 double* coords[3] = { x.get(), y.get(), z.get() };
684     #pragma omp parallel
685     {
686     #pragma omp for
687 caltinay 4334 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
688     coords[0][i0]=getLocalCoordinate(i0, 0);
689 caltinay 3691 }
690     #pragma omp for
691 caltinay 4334 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
692     coords[1][i1]=getLocalCoordinate(i1, 1);
693 caltinay 3691 }
694     #pragma omp for
695 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
696     coords[2][i2]=getLocalCoordinate(i2, 2);
697 caltinay 3691 }
698     }
699 caltinay 4334 int* dims = const_cast<int*>(getNumNodesPerDim());
700    
701     // write mesh
702     DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
703 caltinay 3691 DB_COLLINEAR, NULL);
704    
705 caltinay 4334 // write node ids
706     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
707 caltinay 3698 NULL, 0, DB_INT, DB_NODECENT, NULL);
708 caltinay 3691
709 caltinay 3698 // write element ids
710 caltinay 4334 dims = const_cast<int*>(getNumElementsPerDim());
711 caltinay 3698 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
712 caltinay 4334 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
713 caltinay 3698
714     // rank 0 writes multimesh and multivar
715 caltinay 3691 if (m_mpiInfo->rank == 0) {
716     vector<string> tempstrings;
717     vector<char*> names;
718     for (dim_t i=0; i<m_mpiInfo->size; i++) {
719     stringstream path;
720     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
721     tempstrings.push_back(path.str());
722     names.push_back((char*)tempstrings.back().c_str());
723     }
724     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
725     DBSetDir(dbfile, "/");
726     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
727     &types[0], NULL);
728     tempstrings.clear();
729     names.clear();
730     for (dim_t i=0; i<m_mpiInfo->size; i++) {
731     stringstream path;
732     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
733     tempstrings.push_back(path.str());
734     names.push_back((char*)tempstrings.back().c_str());
735     }
736     types.assign(m_mpiInfo->size, DB_QUADVAR);
737     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
738     &types[0], NULL);
739 caltinay 3698 tempstrings.clear();
740     names.clear();
741     for (dim_t i=0; i<m_mpiInfo->size; i++) {
742     stringstream path;
743     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
744     tempstrings.push_back(path.str());
745     names.push_back((char*)tempstrings.back().c_str());
746     }
747     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
748     &types[0], NULL);
749 caltinay 3691 }
750    
751     if (m_mpiInfo->size > 1) {
752     #ifdef ESYS_MPI
753     PMPIO_HandOffBaton(baton, dbfile);
754     PMPIO_Finish(baton);
755     #endif
756     } else {
757     DBClose(dbfile);
758     }
759    
760     #else // USE_SILO
761 caltinay 3791 throw RipleyException("dump: no Silo support");
762 caltinay 3691 #endif
763     }
764    
765     const int* Brick::borrowSampleReferenceIDs(int fsType) const
766     {
767 caltinay 3697 switch (fsType) {
768     case Nodes:
769 caltinay 3748 case ReducedNodes: //FIXME: reduced
770 caltinay 3753 return &m_nodeId[0];
771 caltinay 3757 case DegreesOfFreedom:
772     case ReducedDegreesOfFreedom: //FIXME: reduced
773 caltinay 3753 return &m_dofId[0];
774 caltinay 3697 case Elements:
775 caltinay 3733 case ReducedElements:
776 caltinay 3697 return &m_elementId[0];
777 caltinay 3757 case FaceElements:
778 caltinay 3733 case ReducedFaceElements:
779 caltinay 3697 return &m_faceId[0];
780     default:
781     break;
782     }
783 caltinay 3691
784 caltinay 3697 stringstream msg;
785 caltinay 3791 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
786 caltinay 3697 throw RipleyException(msg.str());
787 caltinay 3691 }
788    
789 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
790 caltinay 3691 {
791 caltinay 3759 if (getMPISize()==1)
792     return true;
793    
794 caltinay 3757 switch (fsType) {
795     case Nodes:
796     case ReducedNodes: //FIXME: reduced
797     return (m_dofMap[id] < getNumDOF());
798     case DegreesOfFreedom:
799     case ReducedDegreesOfFreedom:
800     return true;
801     case Elements:
802     case ReducedElements:
803     {
804     // check ownership of element's _last_ node
805 caltinay 4334 const index_t x=id%m_NE[0] + 1;
806     const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
807     const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
808     return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
809 caltinay 3757 }
810     case FaceElements:
811     case ReducedFaceElements:
812 caltinay 3759 {
813     // check ownership of face element's last node
814     dim_t n=0;
815 caltinay 4334 for (size_t i=0; i<6; i++) {
816     n+=m_faceCount[i];
817 caltinay 3759 if (id<n) {
818 caltinay 4334 const index_t j=id-n+m_faceCount[i];
819 caltinay 3759 if (i>=4) { // front or back
820 caltinay 4334 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
821     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
822 caltinay 3759 } else if (i>=2) { // bottom or top
823 caltinay 4334 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
824     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
825 caltinay 3759 } else { // left or right
826 caltinay 4334 const index_t first=(i==0 ? 0 : m_NN[0]-1);
827     return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
828 caltinay 3759 }
829     }
830     }
831     return false;
832     }
833 caltinay 3757 default:
834     break;
835 caltinay 3753 }
836 caltinay 3757
837     stringstream msg;
838 caltinay 3791 msg << "ownSample: invalid function space type " << fsType;
839 caltinay 3757 throw RipleyException(msg.str());
840 caltinay 3691 }
841    
842 caltinay 3764 void Brick::setToNormal(escript::Data& out) const
843 caltinay 3703 {
844 caltinay 3764 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
845     out.requireWrite();
846     #pragma omp parallel
847     {
848     if (m_faceOffset[0] > -1) {
849     #pragma omp for nowait
850 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
851     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
852     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
853 caltinay 3764 // set vector at four quadrature points
854     *o++ = -1.; *o++ = 0.; *o++ = 0.;
855     *o++ = -1.; *o++ = 0.; *o++ = 0.;
856     *o++ = -1.; *o++ = 0.; *o++ = 0.;
857     *o++ = -1.; *o++ = 0.; *o = 0.;
858     }
859     }
860     }
861    
862     if (m_faceOffset[1] > -1) {
863     #pragma omp for nowait
864 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
865     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
866     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
867 caltinay 3764 // set vector at four quadrature points
868     *o++ = 1.; *o++ = 0.; *o++ = 0.;
869     *o++ = 1.; *o++ = 0.; *o++ = 0.;
870     *o++ = 1.; *o++ = 0.; *o++ = 0.;
871     *o++ = 1.; *o++ = 0.; *o = 0.;
872     }
873     }
874     }
875    
876     if (m_faceOffset[2] > -1) {
877     #pragma omp for nowait
878 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
879     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
880     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
881 caltinay 3764 // set vector at four quadrature points
882     *o++ = 0.; *o++ = -1.; *o++ = 0.;
883     *o++ = 0.; *o++ = -1.; *o++ = 0.;
884     *o++ = 0.; *o++ = -1.; *o++ = 0.;
885     *o++ = 0.; *o++ = -1.; *o = 0.;
886     }
887     }
888     }
889    
890     if (m_faceOffset[3] > -1) {
891     #pragma omp for nowait
892 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
893     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
894     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
895 caltinay 3764 // set vector at four quadrature points
896     *o++ = 0.; *o++ = 1.; *o++ = 0.;
897     *o++ = 0.; *o++ = 1.; *o++ = 0.;
898     *o++ = 0.; *o++ = 1.; *o++ = 0.;
899     *o++ = 0.; *o++ = 1.; *o = 0.;
900     }
901     }
902     }
903    
904     if (m_faceOffset[4] > -1) {
905     #pragma omp for nowait
906 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
907     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
908     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
909 caltinay 3764 // set vector at four quadrature points
910     *o++ = 0.; *o++ = 0.; *o++ = -1.;
911     *o++ = 0.; *o++ = 0.; *o++ = -1.;
912     *o++ = 0.; *o++ = 0.; *o++ = -1.;
913     *o++ = 0.; *o++ = 0.; *o = -1.;
914     }
915     }
916     }
917    
918     if (m_faceOffset[5] > -1) {
919     #pragma omp for nowait
920 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
921     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
922     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
923 caltinay 3764 // set vector at four quadrature points
924     *o++ = 0.; *o++ = 0.; *o++ = 1.;
925     *o++ = 0.; *o++ = 0.; *o++ = 1.;
926     *o++ = 0.; *o++ = 0.; *o++ = 1.;
927     *o++ = 0.; *o++ = 0.; *o = 1.;
928     }
929     }
930     }
931     } // end of parallel section
932     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
933     out.requireWrite();
934     #pragma omp parallel
935     {
936     if (m_faceOffset[0] > -1) {
937     #pragma omp for nowait
938 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
939     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
940     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
941 caltinay 3764 *o++ = -1.;
942     *o++ = 0.;
943     *o = 0.;
944     }
945     }
946     }
947    
948     if (m_faceOffset[1] > -1) {
949     #pragma omp for nowait
950 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
951     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
952     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
953 caltinay 3764 *o++ = 1.;
954     *o++ = 0.;
955     *o = 0.;
956     }
957     }
958     }
959    
960     if (m_faceOffset[2] > -1) {
961     #pragma omp for nowait
962 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
963     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
964     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
965 caltinay 3764 *o++ = 0.;
966     *o++ = -1.;
967     *o = 0.;
968     }
969     }
970     }
971    
972     if (m_faceOffset[3] > -1) {
973     #pragma omp for nowait
974 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
975     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
976     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
977 caltinay 3764 *o++ = 0.;
978     *o++ = 1.;
979     *o = 0.;
980     }
981     }
982     }
983    
984     if (m_faceOffset[4] > -1) {
985     #pragma omp for nowait
986 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
987     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
988     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
989 caltinay 3764 *o++ = 0.;
990     *o++ = 0.;
991     *o = -1.;
992     }
993     }
994     }
995    
996     if (m_faceOffset[5] > -1) {
997     #pragma omp for nowait
998 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
999     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1000     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1001 caltinay 3764 *o++ = 0.;
1002     *o++ = 0.;
1003     *o = 1.;
1004     }
1005     }
1006     }
1007     } // end of parallel section
1008    
1009     } else {
1010     stringstream msg;
1011 caltinay 3791 msg << "setToNormal: invalid function space type "
1012     << out.getFunctionSpace().getTypeCode();
1013 caltinay 3764 throw RipleyException(msg.str());
1014     }
1015     }
1016    
1017     void Brick::setToSize(escript::Data& out) const
1018     {
1019     if (out.getFunctionSpace().getTypeCode() == Elements
1020     || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1021     out.requireWrite();
1022     const dim_t numQuad=out.getNumDataPointsPerSample();
1023 caltinay 4334 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1024 caltinay 3764 #pragma omp parallel for
1025     for (index_t k = 0; k < getNumElements(); ++k) {
1026     double* o = out.getSampleDataRW(k);
1027     fill(o, o+numQuad, size);
1028     }
1029     } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1030     || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1031     out.requireWrite();
1032     const dim_t numQuad=out.getNumDataPointsPerSample();
1033     #pragma omp parallel
1034     {
1035     if (m_faceOffset[0] > -1) {
1036 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1037 caltinay 3764 #pragma omp for nowait
1038 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1039     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1040     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1041 caltinay 3764 fill(o, o+numQuad, size);
1042     }
1043     }
1044     }
1045    
1046     if (m_faceOffset[1] > -1) {
1047 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1048 caltinay 3764 #pragma omp for nowait
1049 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1050     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1051     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1052 caltinay 3764 fill(o, o+numQuad, size);
1053     }
1054     }
1055     }
1056    
1057     if (m_faceOffset[2] > -1) {
1058 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1059 caltinay 3764 #pragma omp for nowait
1060 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1061     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1062     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1063 caltinay 3764 fill(o, o+numQuad, size);
1064     }
1065     }
1066     }
1067    
1068     if (m_faceOffset[3] > -1) {
1069 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1070 caltinay 3764 #pragma omp for nowait
1071 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1072     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1073     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1074 caltinay 3764 fill(o, o+numQuad, size);
1075     }
1076     }
1077     }
1078    
1079     if (m_faceOffset[4] > -1) {
1080 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1081 caltinay 3764 #pragma omp for nowait
1082 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1083     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1084     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1085 caltinay 3764 fill(o, o+numQuad, size);
1086     }
1087     }
1088     }
1089    
1090     if (m_faceOffset[5] > -1) {
1091 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1092 caltinay 3764 #pragma omp for nowait
1093 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1094     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1095     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1096 caltinay 3764 fill(o, o+numQuad, size);
1097     }
1098     }
1099     }
1100     } // end of parallel section
1101    
1102     } else {
1103     stringstream msg;
1104 caltinay 3791 msg << "setToSize: invalid function space type "
1105     << out.getFunctionSpace().getTypeCode();
1106 caltinay 3764 throw RipleyException(msg.str());
1107     }
1108     }
1109    
1110     void Brick::Print_Mesh_Info(const bool full) const
1111     {
1112     RipleyDomain::Print_Mesh_Info(full);
1113     if (full) {
1114     cout << " Id Coordinates" << endl;
1115     cout.precision(15);
1116     cout.setf(ios::scientific, ios::floatfield);
1117     for (index_t i=0; i < getNumNodes(); i++) {
1118     cout << " " << setw(5) << m_nodeId[i]
1119 caltinay 4334 << " " << getLocalCoordinate(i%m_NN[0], 0)
1120     << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1121     << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1122 caltinay 3764 }
1123     }
1124     }
1125    
1126    
1127     //protected
1128     void Brick::assembleCoordinates(escript::Data& arg) const
1129     {
1130     escriptDataC x = arg.getDataC();
1131     int numDim = m_numDim;
1132     if (!isDataPointShapeEqual(&x, 1, &numDim))
1133     throw RipleyException("setToX: Invalid Data object shape");
1134     if (!numSamplesEqual(&x, 1, getNumNodes()))
1135     throw RipleyException("setToX: Illegal number of samples in Data object");
1136    
1137     arg.requireWrite();
1138     #pragma omp parallel for
1139 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1140     for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1141     for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1142     double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1143     point[0] = getLocalCoordinate(i0, 0);
1144     point[1] = getLocalCoordinate(i1, 1);
1145     point[2] = getLocalCoordinate(i2, 2);
1146 caltinay 3764 }
1147     }
1148     }
1149     }
1150    
1151     //protected
1152 caltinay 4626 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1153 caltinay 3764 {
1154 caltinay 3703 const dim_t numComp = in.getDataPointSize();
1155 caltinay 3731 const double C0 = .044658198738520451079;
1156     const double C1 = .16666666666666666667;
1157     const double C2 = .21132486540518711775;
1158     const double C3 = .25;
1159     const double C4 = .5;
1160     const double C5 = .62200846792814621559;
1161     const double C6 = .78867513459481288225;
1162    
1163 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
1164 caltinay 3760 out.requireWrite();
1165 caltinay 3913 #pragma omp parallel
1166     {
1167     vector<double> f_000(numComp);
1168     vector<double> f_001(numComp);
1169     vector<double> f_010(numComp);
1170     vector<double> f_011(numComp);
1171     vector<double> f_100(numComp);
1172     vector<double> f_101(numComp);
1173     vector<double> f_110(numComp);
1174     vector<double> f_111(numComp);
1175     #pragma omp for
1176 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1177     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1178     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1179     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1180     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1181     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1182     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1183     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1184     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1185     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1186     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1187     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1188 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1189 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1190     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1191     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1192     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1193     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1194     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1195     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1196     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1197     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1198     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1199     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1200     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1201 caltinay 3913 o[INDEX3(i,0,0,numComp,3)] = V0;
1202     o[INDEX3(i,1,0,numComp,3)] = V4;
1203     o[INDEX3(i,2,0,numComp,3)] = V8;
1204     o[INDEX3(i,0,1,numComp,3)] = V0;
1205     o[INDEX3(i,1,1,numComp,3)] = V5;
1206     o[INDEX3(i,2,1,numComp,3)] = V9;
1207     o[INDEX3(i,0,2,numComp,3)] = V1;
1208     o[INDEX3(i,1,2,numComp,3)] = V4;
1209     o[INDEX3(i,2,2,numComp,3)] = V10;
1210     o[INDEX3(i,0,3,numComp,3)] = V1;
1211     o[INDEX3(i,1,3,numComp,3)] = V5;
1212     o[INDEX3(i,2,3,numComp,3)] = V11;
1213     o[INDEX3(i,0,4,numComp,3)] = V2;
1214     o[INDEX3(i,1,4,numComp,3)] = V6;
1215     o[INDEX3(i,2,4,numComp,3)] = V8;
1216     o[INDEX3(i,0,5,numComp,3)] = V2;
1217     o[INDEX3(i,1,5,numComp,3)] = V7;
1218     o[INDEX3(i,2,5,numComp,3)] = V9;
1219     o[INDEX3(i,0,6,numComp,3)] = V3;
1220     o[INDEX3(i,1,6,numComp,3)] = V6;
1221     o[INDEX3(i,2,6,numComp,3)] = V10;
1222     o[INDEX3(i,0,7,numComp,3)] = V3;
1223     o[INDEX3(i,1,7,numComp,3)] = V7;
1224     o[INDEX3(i,2,7,numComp,3)] = V11;
1225     } // end of component loop i
1226     } // end of k0 loop
1227     } // end of k1 loop
1228     } // end of k2 loop
1229     } // end of parallel section
1230 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1231 caltinay 3760 out.requireWrite();
1232 caltinay 3913 #pragma omp parallel
1233     {
1234     vector<double> f_000(numComp);
1235     vector<double> f_001(numComp);
1236     vector<double> f_010(numComp);
1237     vector<double> f_011(numComp);
1238     vector<double> f_100(numComp);
1239     vector<double> f_101(numComp);
1240     vector<double> f_110(numComp);
1241     vector<double> f_111(numComp);
1242     #pragma omp for
1243 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1244     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1245     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1246     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1247     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1248     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1249     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1250     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1251     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1252     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1253     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1254     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1255 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1256 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1257     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1258     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1259 caltinay 3913 } // end of component loop i
1260     } // end of k0 loop
1261     } // end of k1 loop
1262     } // end of k2 loop
1263     } // end of parallel section
1264 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1265 caltinay 3760 out.requireWrite();
1266 caltinay 3722 #pragma omp parallel
1267     {
1268 caltinay 3913 vector<double> f_000(numComp);
1269     vector<double> f_001(numComp);
1270     vector<double> f_010(numComp);
1271     vector<double> f_011(numComp);
1272     vector<double> f_100(numComp);
1273     vector<double> f_101(numComp);
1274     vector<double> f_110(numComp);
1275     vector<double> f_111(numComp);
1276 caltinay 3722 if (m_faceOffset[0] > -1) {
1277     #pragma omp for nowait
1278 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1279     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1280     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1281     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1282     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1283     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1284     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1285     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1286     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1287     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1288     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1289 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1290 caltinay 4375 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1291     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1292     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1293     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1294     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1295 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1296     o[INDEX3(i,2,0,numComp,3)] = V2;
1297 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1298 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1299     o[INDEX3(i,2,1,numComp,3)] = V3;
1300 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1301 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1302     o[INDEX3(i,2,2,numComp,3)] = V2;
1303 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1304 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1305     o[INDEX3(i,2,3,numComp,3)] = V3;
1306 caltinay 3764 } // end of component loop i
1307     } // end of k1 loop
1308     } // end of k2 loop
1309     } // end of face 0
1310 caltinay 3722 if (m_faceOffset[1] > -1) {
1311     #pragma omp for nowait
1312 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1313     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1314     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1315     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1316     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1317     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1318     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1319     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1320     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1321     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1322     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1323 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1324 caltinay 4375 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1325     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1326     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1327     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1328     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1329 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1330     o[INDEX3(i,2,0,numComp,3)] = V2;
1331 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1332 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1333     o[INDEX3(i,2,1,numComp,3)] = V3;
1334 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1335 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1336     o[INDEX3(i,2,2,numComp,3)] = V2;
1337 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1338 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1339     o[INDEX3(i,2,3,numComp,3)] = V3;
1340 caltinay 3764 } // end of component loop i
1341     } // end of k1 loop
1342     } // end of k2 loop
1343     } // end of face 1
1344 caltinay 3722 if (m_faceOffset[2] > -1) {
1345     #pragma omp for nowait
1346 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1347     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1348     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1349     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1350     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1351     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1352     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1353     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1354     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1355     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1356     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1357 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1358 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1359     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1360     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1361 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1362 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1363 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V1;
1364     o[INDEX3(i,0,1,numComp,3)] = V0;
1365 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1366 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V2;
1367     o[INDEX3(i,0,2,numComp,3)] = V0;
1368 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1369 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V1;
1370     o[INDEX3(i,0,3,numComp,3)] = V0;
1371 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1372 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V2;
1373 caltinay 3764 } // end of component loop i
1374     } // end of k0 loop
1375     } // end of k2 loop
1376     } // end of face 2
1377 caltinay 3722 if (m_faceOffset[3] > -1) {
1378     #pragma omp for nowait
1379 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1380     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1381     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1382     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1383     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1384     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1385     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1386     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1387     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1388     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1389     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1390 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1391 caltinay 4375 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1392     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1393     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1394     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1395 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1396 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1397 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V2;
1398     o[INDEX3(i,0,1,numComp,3)] = V0;
1399 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1400 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V3;
1401     o[INDEX3(i,0,2,numComp,3)] = V1;
1402 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1403 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V2;
1404     o[INDEX3(i,0,3,numComp,3)] = V1;
1405 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1406 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V3;
1407 caltinay 3764 } // end of component loop i
1408     } // end of k0 loop
1409     } // end of k2 loop
1410     } // end of face 3
1411 caltinay 3722 if (m_faceOffset[4] > -1) {
1412     #pragma omp for nowait
1413 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1414     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1415     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1416     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1417     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1418     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1419     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1420     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1421     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1422     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1423     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1424 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1425 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1426     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1427     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1428     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1429 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1430     o[INDEX3(i,1,0,numComp,3)] = V2;
1431 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1432 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1433     o[INDEX3(i,1,1,numComp,3)] = V3;
1434 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1435 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1436     o[INDEX3(i,1,2,numComp,3)] = V2;
1437 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1438 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1439     o[INDEX3(i,1,3,numComp,3)] = V3;
1440 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1441 caltinay 3764 } // end of component loop i
1442     } // end of k0 loop
1443     } // end of k1 loop
1444     } // end of face 4
1445 caltinay 3722 if (m_faceOffset[5] > -1) {
1446     #pragma omp for nowait
1447 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1448     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1449     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1450     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1451     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1452     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1453     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1454     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1455     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1456     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1457     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1458 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1459 caltinay 4375 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1460     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1461     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1462     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1463 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1464     o[INDEX3(i,1,0,numComp,3)] = V2;
1465 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1466 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1467     o[INDEX3(i,1,1,numComp,3)] = V3;
1468 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1469 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1470     o[INDEX3(i,1,2,numComp,3)] = V2;
1471 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1472 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1473     o[INDEX3(i,1,3,numComp,3)] = V3;
1474 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1475 caltinay 3764 } // end of component loop i
1476     } // end of k0 loop
1477     } // end of k1 loop
1478     } // end of face 5
1479 caltinay 3722 } // end of parallel section
1480 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1481 caltinay 3760 out.requireWrite();
1482 caltinay 3722 #pragma omp parallel
1483     {
1484 caltinay 3913 vector<double> f_000(numComp);
1485     vector<double> f_001(numComp);
1486     vector<double> f_010(numComp);
1487     vector<double> f_011(numComp);
1488     vector<double> f_100(numComp);
1489     vector<double> f_101(numComp);
1490     vector<double> f_110(numComp);
1491     vector<double> f_111(numComp);
1492 caltinay 3722 if (m_faceOffset[0] > -1) {
1493     #pragma omp for nowait
1494 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1495     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1496     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1497     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1498     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1499     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1500     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1501     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1502     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1503     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1504     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1505 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1506 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1507     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1508     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1509 caltinay 3764 } // end of component loop i
1510     } // end of k1 loop
1511     } // end of k2 loop
1512     } // end of face 0
1513 caltinay 3722 if (m_faceOffset[1] > -1) {
1514     #pragma omp for nowait
1515 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1516     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1517     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1518     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1519     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1520     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1521     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1522     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1523     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1524     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1525     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1526 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1527 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1528     o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1529     o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1530 caltinay 3764 } // end of component loop i
1531     } // end of k1 loop
1532     } // end of k2 loop
1533     } // end of face 1
1534 caltinay 3722 if (m_faceOffset[2] > -1) {
1535     #pragma omp for nowait
1536 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1537     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1538     memcpy(&f_000[0], in.getSampleDataRO(INDEX3