/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4696 - (hide annotations)
Wed Feb 19 07:29:50 2014 UTC (5 years, 1 month ago) by jfenwick
Original Path: trunk/ripley/src/Brick.cpp
File size: 159184 byte(s)
Correcting python libname for sage.
Fixing bug where the number of values in the shape was not considered for buffer and message size.

1 caltinay 3691
2 jfenwick 3981 /*****************************************************************************
3 caltinay 3691 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 caltinay 3691 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 caltinay 3691
17     #include <ripley/Brick.h>
18 caltinay 3791 #include <paso/SystemMatrix.h>
19 caltinay 4334 #include <esysUtils/esysFileWriter.h>
20 sshaw 4622 #include <ripley/DefaultAssembler3D.h>
21 sshaw 4629 #include <ripley/WaveAssembler3D.h>
22 caltinay 4477 #include <boost/scoped_array.hpp>
23    
24 caltinay 4013 #ifdef USE_NETCDF
25     #include <netcdfcpp.h>
26     #endif
27    
28 caltinay 3691 #if USE_SILO
29     #include <silo.h>
30     #ifdef ESYS_MPI
31     #include <pmpio.h>
32     #endif
33     #endif
34    
35     #include <iomanip>
36    
37 jfenwick 4650 #include "esysUtils/EsysRandom.h"
38     #include "blocktools.h"
39    
40    
41 caltinay 3691 using namespace std;
42 caltinay 4334 using esysUtils::FileWriter;
43 caltinay 3691
44     namespace ripley {
45    
46 caltinay 3781 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
47 sshaw 4622 double x1, double y1, double z1, int d0, int d1, int d2,
48     const std::vector<double>& points, const std::vector<int>& tags,
49 jfenwick 4638 const simap_t& tagnamestonums) :
50 caltinay 4334 RipleyDomain(3)
51 caltinay 3691 {
52 caltinay 3943 // ignore subdivision parameters for serial run
53     if (m_mpiInfo->size == 1) {
54     d0=1;
55     d1=1;
56     d2=1;
57     }
58     bool warn=false;
59     // if number of subdivisions is non-positive, try to subdivide by the same
60     // ratio as the number of elements
61     if (d0<=0 && d1<=0 && d2<=0) {
62     warn=true;
63 jfenwick 3962 d0=(int)(pow(m_mpiInfo->size*(n0+1)*(n0+1)/((float)(n1+1)*(n2+1)), 1.f/3));
64 caltinay 4010 d0=max(1, d0);
65     d1=max(1, (int)(d0*n1/(float)n0));
66 caltinay 3943 d2=m_mpiInfo->size/(d0*d1);
67     if (d0*d1*d2 != m_mpiInfo->size) {
68     // ratios not the same so leave "smallest" side undivided and try
69     // dividing 2 sides only
70     if (n0>=n1) {
71     if (n1>=n2) {
72     d0=d1=0;
73     d2=1;
74     } else {
75     d0=d2=0;
76     d1=1;
77     }
78     } else {
79     if (n0>=n2) {
80     d0=d1=0;
81     d2=1;
82     } else {
83     d0=1;
84     d1=d2=0;
85     }
86     }
87     }
88     }
89     if (d0<=0 && d1<=0) {
90     warn=true;
91 caltinay 4010 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1))));
92 caltinay 3943 d1=m_mpiInfo->size/d0;
93     if (d0*d1*d2 != m_mpiInfo->size) {
94     // ratios not the same so subdivide side with more elements only
95     if (n0>n1) {
96 caltinay 3944 d0=0;
97 caltinay 3943 d1=1;
98     } else {
99     d0=1;
100 caltinay 3944 d1=0;
101 caltinay 3943 }
102     }
103     } else if (d0<=0 && d2<=0) {
104     warn=true;
105 caltinay 4010 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n2+1))));
106 caltinay 3943 d2=m_mpiInfo->size/d0;
107     if (d0*d1*d2 != m_mpiInfo->size) {
108     // ratios not the same so subdivide side with more elements only
109     if (n0>n2) {
110 caltinay 3944 d0=0;
111 caltinay 3943 d2=1;
112     } else {
113     d0=1;
114 caltinay 3944 d2=0;
115 caltinay 3943 }
116     }
117     } else if (d1<=0 && d2<=0) {
118     warn=true;
119 caltinay 4010 d1=max(1, int(sqrt(m_mpiInfo->size*(n1+1)/(float)(n2+1))));
120 caltinay 3943 d2=m_mpiInfo->size/d1;
121     if (d0*d1*d2 != m_mpiInfo->size) {
122     // ratios not the same so subdivide side with more elements only
123     if (n1>n2) {
124 caltinay 3944 d1=0;
125 caltinay 3943 d2=1;
126     } else {
127     d1=1;
128 caltinay 3944 d2=0;
129 caltinay 3943 }
130     }
131     }
132     if (d0<=0) {
133     // d1,d2 are preset, determine d0
134     d0=m_mpiInfo->size/(d1*d2);
135     } else if (d1<=0) {
136     // d0,d2 are preset, determine d1
137     d1=m_mpiInfo->size/(d0*d2);
138     } else if (d2<=0) {
139     // d0,d1 are preset, determine d2
140     d2=m_mpiInfo->size/(d0*d1);
141     }
142    
143 caltinay 3691 // ensure number of subdivisions is valid and nodes can be distributed
144     // among number of ranks
145 caltinay 4334 if (d0*d1*d2 != m_mpiInfo->size)
146 caltinay 3691 throw RipleyException("Invalid number of spatial subdivisions");
147    
148 caltinay 3943 if (warn) {
149     cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
150     << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
151     }
152 caltinay 3691
153 caltinay 4334 double l0 = x1-x0;
154     double l1 = y1-y0;
155     double l2 = z1-z0;
156     m_dx[0] = l0/n0;
157     m_dx[1] = l1/n1;
158     m_dx[2] = l2/n2;
159    
160     if ((n0+1)%d0 > 0) {
161 caltinay 3943 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
162 caltinay 4334 l0=m_dx[0]*n0;
163 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N0="
164 caltinay 4334 << n0 << ", l0=" << l0 << endl;
165 caltinay 3943 }
166 caltinay 4334 if ((n1+1)%d1 > 0) {
167 caltinay 3943 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
168 caltinay 4334 l1=m_dx[1]*n1;
169 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N1="
170 caltinay 4334 << n1 << ", l1=" << l1 << endl;
171 caltinay 3943 }
172 caltinay 4334 if ((n2+1)%d2 > 0) {
173 caltinay 3943 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
174 caltinay 4334 l2=m_dx[2]*n2;
175 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N2="
176 caltinay 4334 << n2 << ", l2=" << l2 << endl;
177 caltinay 3943 }
178    
179 caltinay 4334 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
180 caltinay 3753 throw RipleyException("Too few elements for the number of ranks");
181    
182 caltinay 4334 m_gNE[0] = n0;
183     m_gNE[1] = n1;
184     m_gNE[2] = n2;
185     m_origin[0] = x0;
186     m_origin[1] = y0;
187     m_origin[2] = z0;
188     m_length[0] = l0;
189     m_length[1] = l1;
190     m_length[2] = l2;
191     m_NX[0] = d0;
192     m_NX[1] = d1;
193     m_NX[2] = d2;
194    
195 caltinay 3753 // local number of elements (including overlap)
196 caltinay 4334 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
197     if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
198     m_NE[0]++;
199     else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
200     m_ownNE[0]--;
201 caltinay 3764
202 caltinay 4334 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
203     if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
204     m_NE[1]++;
205     else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
206     m_ownNE[1]--;
207 caltinay 3764
208 caltinay 4334 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
209     if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
210     m_NE[2]++;
211     else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
212     m_ownNE[2]--;
213 caltinay 3753
214     // local number of nodes
215 caltinay 4334 m_NN[0] = m_NE[0]+1;
216     m_NN[1] = m_NE[1]+1;
217     m_NN[2] = m_NE[2]+1;
218 caltinay 3753
219 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
220 caltinay 4334 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
221     if (m_offset[0] > 0)
222     m_offset[0]--;
223     m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
224     if (m_offset[1] > 0)
225     m_offset[1]--;
226     m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
227     if (m_offset[2] > 0)
228     m_offset[2]--;
229 caltinay 3753
230 caltinay 3691 populateSampleIds();
231 caltinay 3756 createPattern();
232 sshaw 4622
233     assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
234 sshaw 4629 for (map<string, int>::const_iterator i = tagnamestonums.begin();
235     i != tagnamestonums.end(); i++) {
236     setTagMap(i->first, i->second);
237     }
238 sshaw 4622 addPoints(tags.size(), &points[0], &tags[0]);
239 caltinay 3691 }
240    
241    
242     Brick::~Brick()
243     {
244 caltinay 3785 Paso_SystemMatrixPattern_free(m_pattern);
245     Paso_Connector_free(m_connector);
246 sshaw 4622 delete assembler;
247 caltinay 3691 }
248    
249     string Brick::getDescription() const
250     {
251     return "ripley::Brick";
252     }
253    
254     bool Brick::operator==(const AbstractDomain& other) const
255     {
256 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
257     if (o) {
258     return (RipleyDomain::operator==(other) &&
259 caltinay 4334 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
260     && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
261     && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
262     && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
263 caltinay 3744 }
264 caltinay 3691
265     return false;
266     }
267    
268 caltinay 4013 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
269 caltinay 4618 const ReaderParameters& params) const
270 caltinay 4013 {
271     #ifdef USE_NETCDF
272     // check destination function space
273     int myN0, myN1, myN2;
274     if (out.getFunctionSpace().getTypeCode() == Nodes) {
275 caltinay 4334 myN0 = m_NN[0];
276     myN1 = m_NN[1];
277     myN2 = m_NN[2];
278 caltinay 4013 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
279     out.getFunctionSpace().getTypeCode() == ReducedElements) {
280 caltinay 4334 myN0 = m_NE[0];
281     myN1 = m_NE[1];
282     myN2 = m_NE[2];
283 caltinay 4013 } else
284     throw RipleyException("readNcGrid(): invalid function space for output data object");
285    
286 caltinay 4615 if (params.first.size() != 3)
287 caltinay 4013 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
288    
289 caltinay 4615 if (params.numValues.size() != 3)
290 caltinay 4013 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
291    
292 caltinay 4615 if (params.multiplier.size() != 3)
293 caltinay 4277 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
294 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
295     if (params.multiplier[i]<1)
296 caltinay 4277 throw RipleyException("readNcGrid(): all multipliers must be positive");
297    
298 caltinay 4013 // check file existence and size
299     NcFile f(filename.c_str(), NcFile::ReadOnly);
300     if (!f.is_valid())
301     throw RipleyException("readNcGrid(): cannot open file");
302    
303     NcVar* var = f.get_var(varname.c_str());
304     if (!var)
305     throw RipleyException("readNcGrid(): invalid variable name");
306    
307     // TODO: rank>0 data support
308     const int numComp = out.getDataPointSize();
309     if (numComp > 1)
310     throw RipleyException("readNcGrid(): only scalar data supported");
311    
312     const int dims = var->num_dims();
313 caltinay 4477 boost::scoped_array<long> edges(var->edges());
314 caltinay 4013
315     // is this a slice of the data object (dims!=3)?
316     // note the expected ordering of edges (as in numpy: z,y,x)
317 caltinay 4615 if ( (dims==3 && (params.numValues[2] > edges[0] ||
318     params.numValues[1] > edges[1] ||
319     params.numValues[0] > edges[2]))
320     || (dims==2 && params.numValues[2]>1)
321     || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
322 caltinay 4013 throw RipleyException("readNcGrid(): not enough data in file");
323     }
324    
325     // check if this rank contributes anything
326 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
327     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
328     params.first[1] >= m_offset[1]+myN1 ||
329     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
330     params.first[2] >= m_offset[2]+myN2 ||
331     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
332 caltinay 4013 return;
333     }
334    
335     // now determine how much this rank has to write
336    
337     // first coordinates in data object to write to
338 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
339     const int first1 = max(0, params.first[1]-m_offset[1]);
340     const int first2 = max(0, params.first[2]-m_offset[2]);
341 caltinay 4618 // indices to first value in file (not accounting for reverse yet)
342     int idx0 = max(0, m_offset[0]-params.first[0]);
343     int idx1 = max(0, m_offset[1]-params.first[1]);
344     int idx2 = max(0, m_offset[2]-params.first[2]);
345 caltinay 4277 // number of values to read
346 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
347     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
348     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
349 caltinay 4013
350 caltinay 4618 // make sure we read the right block if going backwards through file
351     if (params.reverse[0])
352     idx0 = edges[dims-1]-num0-idx0;
353     if (dims>1 && params.reverse[1])
354     idx1 = edges[dims-2]-num1-idx1;
355     if (dims>2 && params.reverse[2])
356     idx2 = edges[dims-3]-num2-idx2;
357    
358    
359 caltinay 4013 vector<double> values(num0*num1*num2);
360     if (dims==3) {
361     var->set_cur(idx2, idx1, idx0);
362     var->get(&values[0], num2, num1, num0);
363     } else if (dims==2) {
364     var->set_cur(idx1, idx0);
365     var->get(&values[0], num1, num0);
366     } else {
367     var->set_cur(idx0);
368     var->get(&values[0], num0);
369     }
370    
371     const int dpp = out.getNumDataPointsPerSample();
372     out.requireWrite();
373    
374 caltinay 4618 // helpers for reversing
375     const int x0 = (params.reverse[0] ? num0-1 : 0);
376     const int x_mult = (params.reverse[0] ? -1 : 1);
377     const int y0 = (params.reverse[1] ? num1-1 : 0);
378     const int y_mult = (params.reverse[1] ? -1 : 1);
379     const int z0 = (params.reverse[2] ? num2-1 : 0);
380     const int z_mult = (params.reverse[2] ? -1 : 1);
381    
382 caltinay 4013 for (index_t z=0; z<num2; z++) {
383     for (index_t y=0; y<num1; y++) {
384     #pragma omp parallel for
385     for (index_t x=0; x<num0; x++) {
386 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
387     +(first1+y*params.multiplier[1])*myN0
388     +(first2+z*params.multiplier[2])*myN0*myN1;
389 caltinay 4618 const int srcIndex=(z0+z_mult*z)*num1*num0
390     +(y0+y_mult*y)*num0
391     +(x0+x_mult*x);
392 caltinay 4174 if (!isnan(values[srcIndex])) {
393 caltinay 4615 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
394     for (index_t m1=0; m1<params.multiplier[1]; m1++) {
395     for (index_t m0=0; m0<params.multiplier[0]; m0++) {
396 caltinay 4277 const int dataIndex = baseIndex+m0
397     +m1*myN0
398     +m2*myN0*myN1;
399     double* dest = out.getSampleDataRW(dataIndex);
400     for (index_t q=0; q<dpp; q++) {
401     *dest++ = values[srcIndex];
402     }
403     }
404     }
405 caltinay 4174 }
406 caltinay 4013 }
407     }
408     }
409     }
410     #else
411     throw RipleyException("readNcGrid(): not compiled with netCDF support");
412     #endif
413     }
414    
415 caltinay 4334 void Brick::readBinaryGrid(escript::Data& out, string filename,
416 caltinay 4618 const ReaderParameters& params) const
417 caltinay 4334 {
418 caltinay 4495 // the mapping is not universally correct but should work on our
419     // supported platforms
420 caltinay 4615 switch (params.dataType) {
421 caltinay 4495 case DATATYPE_INT32:
422 caltinay 4615 readBinaryGridImpl<int>(out, filename, params);
423 caltinay 4495 break;
424     case DATATYPE_FLOAT32:
425 caltinay 4615 readBinaryGridImpl<float>(out, filename, params);
426 caltinay 4495 break;
427     case DATATYPE_FLOAT64:
428 caltinay 4615 readBinaryGridImpl<double>(out, filename, params);
429 caltinay 4495 break;
430     default:
431     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
432     }
433     }
434    
435     template<typename ValueType>
436     void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
437 caltinay 4618 const ReaderParameters& params) const
438 caltinay 4495 {
439 caltinay 4334 // check destination function space
440     int myN0, myN1, myN2;
441     if (out.getFunctionSpace().getTypeCode() == Nodes) {
442     myN0 = m_NN[0];
443     myN1 = m_NN[1];
444     myN2 = m_NN[2];
445     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
446     out.getFunctionSpace().getTypeCode() == ReducedElements) {
447     myN0 = m_NE[0];
448     myN1 = m_NE[1];
449     myN2 = m_NE[2];
450     } else
451     throw RipleyException("readBinaryGrid(): invalid function space for output data object");
452    
453 caltinay 4615 if (params.first.size() != 3)
454 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
455    
456 caltinay 4615 if (params.numValues.size() != 3)
457 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
458    
459 caltinay 4615 if (params.multiplier.size() != 3)
460 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
461 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
462     if (params.multiplier[i]<1)
463 caltinay 4334 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
464    
465     // check file existence and size
466     ifstream f(filename.c_str(), ifstream::binary);
467     if (f.fail()) {
468     throw RipleyException("readBinaryGrid(): cannot open file");
469     }
470     f.seekg(0, ios::end);
471     const int numComp = out.getDataPointSize();
472     const int filesize = f.tellg();
473 caltinay 4615 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
474 caltinay 4334 if (filesize < reqsize) {
475     f.close();
476     throw RipleyException("readBinaryGrid(): not enough data in file");
477     }
478    
479     // check if this rank contributes anything
480 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
481     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
482     params.first[1] >= m_offset[1]+myN1 ||
483     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
484     params.first[2] >= m_offset[2]+myN2 ||
485     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
486 caltinay 4334 f.close();
487     return;
488     }
489    
490     // now determine how much this rank has to write
491    
492     // first coordinates in data object to write to
493 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
494     const int first1 = max(0, params.first[1]-m_offset[1]);
495     const int first2 = max(0, params.first[2]-m_offset[2]);
496 caltinay 4334 // indices to first value in file
497 caltinay 4615 const int idx0 = max(0, m_offset[0]-params.first[0]);
498     const int idx1 = max(0, m_offset[1]-params.first[1]);
499     const int idx2 = max(0, m_offset[2]-params.first[2]);
500 caltinay 4334 // number of values to read
501 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
502     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
503     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
504 caltinay 4334
505     out.requireWrite();
506 caltinay 4495 vector<ValueType> values(num0*numComp);
507 caltinay 4334 const int dpp = out.getNumDataPointsPerSample();
508    
509 caltinay 4529 for (int z=0; z<num2; z++) {
510     for (int y=0; y<num1; y++) {
511 caltinay 4615 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
512     +(idx2+z)*params.numValues[0]*params.numValues[1]);
513 caltinay 4495 f.seekg(fileofs*sizeof(ValueType));
514     f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
515 caltinay 4334
516 caltinay 4529 for (int x=0; x<num0; x++) {
517 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
518     +(first1+y*params.multiplier[1])*myN0
519     +(first2+z*params.multiplier[2])*myN0*myN1;
520     for (int m2=0; m2<params.multiplier[2]; m2++) {
521     for (int m1=0; m1<params.multiplier[1]; m1++) {
522     for (int m0=0; m0<params.multiplier[0]; m0++) {
523 caltinay 4334 const int dataIndex = baseIndex+m0
524     +m1*myN0
525     +m2*myN0*myN1;
526     double* dest = out.getSampleDataRW(dataIndex);
527 caltinay 4529 for (int c=0; c<numComp; c++) {
528     ValueType val = values[x*numComp+c];
529    
530 caltinay 4615 if (params.byteOrder != BYTEORDER_NATIVE) {
531 caltinay 4529 char* cval = reinterpret_cast<char*>(&val);
532     // this will alter val!!
533     byte_swap32(cval);
534     }
535     if (!std::isnan(val)) {
536     for (int q=0; q<dpp; q++) {
537     *dest++ = static_cast<double>(val);
538 caltinay 4334 }
539     }
540     }
541     }
542     }
543     }
544     }
545     }
546     }
547    
548     f.close();
549     }
550    
551 caltinay 4357 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
552     int byteOrder, int dataType) const
553 caltinay 4334 {
554 caltinay 4357 // the mapping is not universally correct but should work on our
555     // supported platforms
556     switch (dataType) {
557     case DATATYPE_INT32:
558     writeBinaryGridImpl<int>(in, filename, byteOrder);
559     break;
560     case DATATYPE_FLOAT32:
561     writeBinaryGridImpl<float>(in, filename, byteOrder);
562     break;
563     case DATATYPE_FLOAT64:
564     writeBinaryGridImpl<double>(in, filename, byteOrder);
565     break;
566     default:
567     throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
568     }
569     }
570    
571     template<typename ValueType>
572     void Brick::writeBinaryGridImpl(const escript::Data& in,
573     const string& filename, int byteOrder) const
574     {
575 caltinay 4334 // check function space and determine number of points
576     int myN0, myN1, myN2;
577     int totalN0, totalN1, totalN2;
578     if (in.getFunctionSpace().getTypeCode() == Nodes) {
579     myN0 = m_NN[0];
580     myN1 = m_NN[1];
581     myN2 = m_NN[2];
582     totalN0 = m_gNE[0]+1;
583     totalN1 = m_gNE[1]+1;
584     totalN2 = m_gNE[2]+1;
585     } else if (in.getFunctionSpace().getTypeCode() == Elements ||
586     in.getFunctionSpace().getTypeCode() == ReducedElements) {
587     myN0 = m_NE[0];
588     myN1 = m_NE[1];
589     myN2 = m_NE[2];
590     totalN0 = m_gNE[0];
591     totalN1 = m_gNE[1];
592     totalN2 = m_gNE[2];
593     } else
594     throw RipleyException("writeBinaryGrid(): invalid function space of data object");
595    
596     const int numComp = in.getDataPointSize();
597     const int dpp = in.getNumDataPointsPerSample();
598 caltinay 4357 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
599 caltinay 4334
600     if (numComp > 1 || dpp > 1)
601     throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
602    
603     // from here on we know that each sample consists of one value
604 caltinay 4482 FileWriter fw;
605     fw.openFile(filename, fileSize);
606 caltinay 4334 MPIBarrier();
607    
608     for (index_t z=0; z<myN2; z++) {
609     for (index_t y=0; y<myN1; y++) {
610     const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
611 caltinay 4357 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
612 caltinay 4334 ostringstream oss;
613    
614     for (index_t x=0; x<myN0; x++) {
615 caltinay 4626 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
616 caltinay 4357 ValueType fvalue = static_cast<ValueType>(*sample);
617     if (byteOrder == BYTEORDER_NATIVE) {
618 caltinay 4334 oss.write((char*)&fvalue, sizeof(fvalue));
619     } else {
620     char* value = reinterpret_cast<char*>(&fvalue);
621 caltinay 4357 oss.write(byte_swap32(value), sizeof(fvalue));
622 caltinay 4334 }
623     }
624 caltinay 4482 fw.writeAt(oss, fileofs);
625 caltinay 4334 }
626     }
627 caltinay 4482 fw.close();
628 caltinay 4334 }
629    
630 caltinay 3691 void Brick::dump(const string& fileName) const
631     {
632     #if USE_SILO
633     string fn(fileName);
634     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
635     fn+=".silo";
636     }
637    
638     int driver=DB_HDF5;
639     string siloPath;
640     DBfile* dbfile = NULL;
641    
642     #ifdef ESYS_MPI
643     PMPIO_baton_t* baton = NULL;
644 gross 3793 const int NUM_SILO_FILES = 1;
645     const char* blockDirFmt = "/block%04d";
646 caltinay 3691 #endif
647    
648     if (m_mpiInfo->size > 1) {
649     #ifdef ESYS_MPI
650     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
651     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
652     PMPIO_DefaultClose, (void*)&driver);
653     // try the fallback driver in case of error
654     if (!baton && driver != DB_PDB) {
655     driver = DB_PDB;
656     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
657     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
658     PMPIO_DefaultClose, (void*)&driver);
659     }
660     if (baton) {
661     char str[64];
662     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
663     siloPath = str;
664     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
665     }
666     #endif
667     } else {
668     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
669     getDescription().c_str(), driver);
670     // try the fallback driver in case of error
671     if (!dbfile && driver != DB_PDB) {
672     driver = DB_PDB;
673     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
674     getDescription().c_str(), driver);
675     }
676     }
677    
678     if (!dbfile)
679     throw RipleyException("dump: Could not create Silo file");
680    
681     /*
682     if (driver==DB_HDF5) {
683     // gzip level 1 already provides good compression with minimal
684     // performance penalty. Some tests showed that gzip levels >3 performed
685     // rather badly on escript data both in terms of time and space
686     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
687     }
688     */
689    
690 caltinay 4334 boost::scoped_ptr<double> x(new double[m_NN[0]]);
691     boost::scoped_ptr<double> y(new double[m_NN[1]]);
692     boost::scoped_ptr<double> z(new double[m_NN[2]]);
693 caltinay 3691 double* coords[3] = { x.get(), y.get(), z.get() };
694     #pragma omp parallel
695     {
696     #pragma omp for
697 caltinay 4334 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
698     coords[0][i0]=getLocalCoordinate(i0, 0);
699 caltinay 3691 }
700     #pragma omp for
701 caltinay 4334 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
702     coords[1][i1]=getLocalCoordinate(i1, 1);
703 caltinay 3691 }
704     #pragma omp for
705 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
706     coords[2][i2]=getLocalCoordinate(i2, 2);
707 caltinay 3691 }
708     }
709 caltinay 4334 int* dims = const_cast<int*>(getNumNodesPerDim());
710    
711     // write mesh
712     DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
713 caltinay 3691 DB_COLLINEAR, NULL);
714    
715 caltinay 4334 // write node ids
716     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
717 caltinay 3698 NULL, 0, DB_INT, DB_NODECENT, NULL);
718 caltinay 3691
719 caltinay 3698 // write element ids
720 caltinay 4334 dims = const_cast<int*>(getNumElementsPerDim());
721 caltinay 3698 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
722 caltinay 4334 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
723 caltinay 3698
724     // rank 0 writes multimesh and multivar
725 caltinay 3691 if (m_mpiInfo->rank == 0) {
726     vector<string> tempstrings;
727     vector<char*> names;
728     for (dim_t i=0; i<m_mpiInfo->size; i++) {
729     stringstream path;
730     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
731     tempstrings.push_back(path.str());
732     names.push_back((char*)tempstrings.back().c_str());
733     }
734     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
735     DBSetDir(dbfile, "/");
736     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
737     &types[0], NULL);
738     tempstrings.clear();
739     names.clear();
740     for (dim_t i=0; i<m_mpiInfo->size; i++) {
741     stringstream path;
742     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
743     tempstrings.push_back(path.str());
744     names.push_back((char*)tempstrings.back().c_str());
745     }
746     types.assign(m_mpiInfo->size, DB_QUADVAR);
747     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
748     &types[0], NULL);
749 caltinay 3698 tempstrings.clear();
750     names.clear();
751     for (dim_t i=0; i<m_mpiInfo->size; i++) {
752     stringstream path;
753     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
754     tempstrings.push_back(path.str());
755     names.push_back((char*)tempstrings.back().c_str());
756     }
757     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
758     &types[0], NULL);
759 caltinay 3691 }
760    
761     if (m_mpiInfo->size > 1) {
762     #ifdef ESYS_MPI
763     PMPIO_HandOffBaton(baton, dbfile);
764     PMPIO_Finish(baton);
765     #endif
766     } else {
767     DBClose(dbfile);
768     }
769    
770     #else // USE_SILO
771 caltinay 3791 throw RipleyException("dump: no Silo support");
772 caltinay 3691 #endif
773     }
774    
775     const int* Brick::borrowSampleReferenceIDs(int fsType) const
776     {
777 caltinay 3697 switch (fsType) {
778     case Nodes:
779 caltinay 3748 case ReducedNodes: //FIXME: reduced
780 caltinay 3753 return &m_nodeId[0];
781 caltinay 3757 case DegreesOfFreedom:
782     case ReducedDegreesOfFreedom: //FIXME: reduced
783 caltinay 3753 return &m_dofId[0];
784 caltinay 3697 case Elements:
785 caltinay 3733 case ReducedElements:
786 caltinay 3697 return &m_elementId[0];
787 caltinay 3757 case FaceElements:
788 caltinay 3733 case ReducedFaceElements:
789 caltinay 3697 return &m_faceId[0];
790 sshaw 4660 case Points:
791     return &m_diracPointNodeIDs[0];
792 caltinay 3697 default:
793     break;
794     }
795 caltinay 3691
796 caltinay 3697 stringstream msg;
797 caltinay 3791 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
798 caltinay 3697 throw RipleyException(msg.str());
799 caltinay 3691 }
800    
801 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
802 caltinay 3691 {
803 caltinay 3759 if (getMPISize()==1)
804     return true;
805    
806 caltinay 3757 switch (fsType) {
807     case Nodes:
808     case ReducedNodes: //FIXME: reduced
809     return (m_dofMap[id] < getNumDOF());
810     case DegreesOfFreedom:
811     case ReducedDegreesOfFreedom:
812     return true;
813     case Elements:
814     case ReducedElements:
815     {
816     // check ownership of element's _last_ node
817 caltinay 4334 const index_t x=id%m_NE[0] + 1;
818     const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
819     const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
820     return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
821 caltinay 3757 }
822     case FaceElements:
823     case ReducedFaceElements:
824 caltinay 3759 {
825     // check ownership of face element's last node
826     dim_t n=0;
827 caltinay 4334 for (size_t i=0; i<6; i++) {
828     n+=m_faceCount[i];
829 caltinay 3759 if (id<n) {
830 caltinay 4334 const index_t j=id-n+m_faceCount[i];
831 caltinay 3759 if (i>=4) { // front or back
832 caltinay 4334 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
833     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
834 caltinay 3759 } else if (i>=2) { // bottom or top
835 caltinay 4334 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
836     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
837 caltinay 3759 } else { // left or right
838 caltinay 4334 const index_t first=(i==0 ? 0 : m_NN[0]-1);
839     return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
840 caltinay 3759 }
841     }
842     }
843     return false;
844     }
845 caltinay 3757 default:
846     break;
847 caltinay 3753 }
848 caltinay 3757
849     stringstream msg;
850 caltinay 3791 msg << "ownSample: invalid function space type " << fsType;
851 caltinay 3757 throw RipleyException(msg.str());
852 caltinay 3691 }
853    
854 caltinay 3764 void Brick::setToNormal(escript::Data& out) const
855 caltinay 3703 {
856 caltinay 3764 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
857     out.requireWrite();
858     #pragma omp parallel
859     {
860     if (m_faceOffset[0] > -1) {
861     #pragma omp for nowait
862 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
863     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
864     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
865 caltinay 3764 // set vector at four quadrature points
866     *o++ = -1.; *o++ = 0.; *o++ = 0.;
867     *o++ = -1.; *o++ = 0.; *o++ = 0.;
868     *o++ = -1.; *o++ = 0.; *o++ = 0.;
869     *o++ = -1.; *o++ = 0.; *o = 0.;
870     }
871     }
872     }
873    
874     if (m_faceOffset[1] > -1) {
875     #pragma omp for nowait
876 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
877     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
878     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
879 caltinay 3764 // set vector at four quadrature points
880     *o++ = 1.; *o++ = 0.; *o++ = 0.;
881     *o++ = 1.; *o++ = 0.; *o++ = 0.;
882     *o++ = 1.; *o++ = 0.; *o++ = 0.;
883     *o++ = 1.; *o++ = 0.; *o = 0.;
884     }
885     }
886     }
887    
888     if (m_faceOffset[2] > -1) {
889     #pragma omp for nowait
890 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
891     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
892     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
893 caltinay 3764 // set vector at four quadrature points
894     *o++ = 0.; *o++ = -1.; *o++ = 0.;
895     *o++ = 0.; *o++ = -1.; *o++ = 0.;
896     *o++ = 0.; *o++ = -1.; *o++ = 0.;
897     *o++ = 0.; *o++ = -1.; *o = 0.;
898     }
899     }
900     }
901    
902     if (m_faceOffset[3] > -1) {
903     #pragma omp for nowait
904 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
905     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
906     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
907 caltinay 3764 // set vector at four quadrature points
908     *o++ = 0.; *o++ = 1.; *o++ = 0.;
909     *o++ = 0.; *o++ = 1.; *o++ = 0.;
910     *o++ = 0.; *o++ = 1.; *o++ = 0.;
911     *o++ = 0.; *o++ = 1.; *o = 0.;
912     }
913     }
914     }
915    
916     if (m_faceOffset[4] > -1) {
917     #pragma omp for nowait
918 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
919     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
920     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
921 caltinay 3764 // set vector at four quadrature points
922     *o++ = 0.; *o++ = 0.; *o++ = -1.;
923     *o++ = 0.; *o++ = 0.; *o++ = -1.;
924     *o++ = 0.; *o++ = 0.; *o++ = -1.;
925     *o++ = 0.; *o++ = 0.; *o = -1.;
926     }
927     }
928     }
929    
930     if (m_faceOffset[5] > -1) {
931     #pragma omp for nowait
932 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
933     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
934     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
935 caltinay 3764 // set vector at four quadrature points
936     *o++ = 0.; *o++ = 0.; *o++ = 1.;
937     *o++ = 0.; *o++ = 0.; *o++ = 1.;
938     *o++ = 0.; *o++ = 0.; *o++ = 1.;
939     *o++ = 0.; *o++ = 0.; *o = 1.;
940     }
941     }
942     }
943     } // end of parallel section
944     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
945     out.requireWrite();
946     #pragma omp parallel
947     {
948     if (m_faceOffset[0] > -1) {
949     #pragma omp for nowait
950 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
951     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
952     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
953 caltinay 3764 *o++ = -1.;
954     *o++ = 0.;
955     *o = 0.;
956     }
957     }
958     }
959    
960     if (m_faceOffset[1] > -1) {
961     #pragma omp for nowait
962 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
963     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
964     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
965 caltinay 3764 *o++ = 1.;
966     *o++ = 0.;
967     *o = 0.;
968     }
969     }
970     }
971    
972     if (m_faceOffset[2] > -1) {
973     #pragma omp for nowait
974 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
975     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
976     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
977 caltinay 3764 *o++ = 0.;
978     *o++ = -1.;
979     *o = 0.;
980     }
981     }
982     }
983    
984     if (m_faceOffset[3] > -1) {
985     #pragma omp for nowait
986 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
987     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
988     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
989 caltinay 3764 *o++ = 0.;
990     *o++ = 1.;
991     *o = 0.;
992     }
993     }
994     }
995    
996     if (m_faceOffset[4] > -1) {
997     #pragma omp for nowait
998 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
999     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1000     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1001 caltinay 3764 *o++ = 0.;
1002     *o++ = 0.;
1003     *o = -1.;
1004     }
1005     }
1006     }
1007    
1008     if (m_faceOffset[5] > -1) {
1009     #pragma omp for nowait
1010 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1011     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1012     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1013 caltinay 3764 *o++ = 0.;
1014     *o++ = 0.;
1015     *o = 1.;
1016     }
1017     }
1018     }
1019     } // end of parallel section
1020    
1021     } else {
1022     stringstream msg;
1023 caltinay 3791 msg << "setToNormal: invalid function space type "
1024     << out.getFunctionSpace().getTypeCode();
1025 caltinay 3764 throw RipleyException(msg.str());
1026     }
1027     }
1028    
1029     void Brick::setToSize(escript::Data& out) const
1030     {
1031     if (out.getFunctionSpace().getTypeCode() == Elements
1032     || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1033     out.requireWrite();
1034     const dim_t numQuad=out.getNumDataPointsPerSample();
1035 caltinay 4334 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1036 caltinay 3764 #pragma omp parallel for
1037     for (index_t k = 0; k < getNumElements(); ++k) {
1038     double* o = out.getSampleDataRW(k);
1039     fill(o, o+numQuad, size);
1040     }
1041     } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1042     || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1043     out.requireWrite();
1044     const dim_t numQuad=out.getNumDataPointsPerSample();
1045     #pragma omp parallel
1046     {
1047     if (m_faceOffset[0] > -1) {
1048 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1049 caltinay 3764 #pragma omp for nowait
1050 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1051     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1052     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1053 caltinay 3764 fill(o, o+numQuad, size);
1054     }
1055     }
1056     }
1057    
1058     if (m_faceOffset[1] > -1) {
1059 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1060 caltinay 3764 #pragma omp for nowait
1061 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1062     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1063     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1064 caltinay 3764 fill(o, o+numQuad, size);
1065     }
1066     }
1067     }
1068    
1069     if (m_faceOffset[2] > -1) {
1070 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1071 caltinay 3764 #pragma omp for nowait
1072 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1073     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1074     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1075 caltinay 3764 fill(o, o+numQuad, size);
1076     }
1077     }
1078     }
1079    
1080     if (m_faceOffset[3] > -1) {
1081 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1082 caltinay 3764 #pragma omp for nowait
1083 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1084     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1085     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1086 caltinay 3764 fill(o, o+numQuad, size);
1087     }
1088     }
1089     }
1090    
1091     if (m_faceOffset[4] > -1) {
1092 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1093 caltinay 3764 #pragma omp for nowait
1094 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1095     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1096     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1097 caltinay 3764 fill(o, o+numQuad, size);
1098     }
1099     }
1100     }
1101    
1102     if (m_faceOffset[5] > -1) {
1103 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1104 caltinay 3764 #pragma omp for nowait
1105 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1106     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1107     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1108 caltinay 3764 fill(o, o+numQuad, size);
1109     }
1110     }
1111     }
1112     } // end of parallel section
1113    
1114     } else {
1115     stringstream msg;
1116 caltinay 3791 msg << "setToSize: invalid function space type "
1117     << out.getFunctionSpace().getTypeCode();
1118 caltinay 3764 throw RipleyException(msg.str());
1119     }
1120     }
1121    
1122     void Brick::Print_Mesh_Info(const bool full) const
1123     {
1124     RipleyDomain::Print_Mesh_Info(full);
1125     if (full) {
1126     cout << " Id Coordinates" << endl;
1127     cout.precision(15);
1128     cout.setf(ios::scientific, ios::floatfield);
1129     for (index_t i=0; i < getNumNodes(); i++) {
1130     cout << " " << setw(5) << m_nodeId[i]
1131 caltinay 4334 << " " << getLocalCoordinate(i%m_NN[0], 0)
1132     << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1133     << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1134 caltinay 3764 }
1135     }
1136     }
1137    
1138    
1139     //protected
1140     void Brick::assembleCoordinates(escript::Data& arg) const
1141     {
1142     escriptDataC x = arg.getDataC();
1143     int numDim = m_numDim;
1144     if (!isDataPointShapeEqual(&x, 1, &numDim))
1145     throw RipleyException("setToX: Invalid Data object shape");
1146     if (!numSamplesEqual(&x, 1, getNumNodes()))
1147     throw RipleyException("setToX: Illegal number of samples in Data object");
1148    
1149     arg.requireWrite();
1150     #pragma omp parallel for
1151 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1152     for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1153     for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1154     double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1155     point[0] = getLocalCoordinate(i0, 0);
1156     point[1] = getLocalCoordinate(i1, 1);
1157     point[2] = getLocalCoordinate(i2, 2);
1158 caltinay 3764 }
1159     }
1160     }
1161     }
1162    
1163     //protected
1164 caltinay 4626 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1165 caltinay 3764 {
1166 caltinay 3703 const dim_t numComp = in.getDataPointSize();
1167 caltinay 3731 const double C0 = .044658198738520451079;
1168     const double C1 = .16666666666666666667;
1169     const double C2 = .21132486540518711775;
1170     const double C3 = .25;
1171     const double C4 = .5;
1172     const double C5 = .62200846792814621559;
1173     const double C6 = .78867513459481288225;
1174    
1175 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
1176 caltinay 3760 out.requireWrite();
1177 caltinay 3913 #pragma omp parallel
1178     {
1179     vector<double> f_000(numComp);
1180     vector<double> f_001(numComp);
1181     vector<double> f_010(numComp);
1182     vector<double> f_011(numComp);
1183     vector<double> f_100(numComp);
1184     vector<double> f_101(numComp);
1185     vector<double> f_110(numComp);
1186     vector<double> f_111(numComp);
1187     #pragma omp for
1188 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1189     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1190     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1191     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1192     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1193     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1194     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1195     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1196     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1197     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1198     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1199     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1200 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1201 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1202     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1203     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1204     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1205     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1206     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1207     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1208     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1209     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1210     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1211     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1212     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1213 caltinay 3913 o[INDEX3(i,0,0,numComp,3)] = V0;
1214     o[INDEX3(i,1,0,numComp,3)] = V4;
1215     o[INDEX3(i,2,0,numComp,3)] = V8;
1216     o[INDEX3(i,0,1,numComp,3)] = V0;
1217     o[INDEX3(i,1,1,numComp,3)] = V5;
1218     o[INDEX3(i,2,1,numComp,3)] = V9;
1219     o[INDEX3(i,0,2,numComp,3)] = V1;
1220     o[INDEX3(i,1,2,numComp,3)] = V4;
1221     o[INDEX3(i,2,2,numComp,3)] = V10;
1222     o[INDEX3(i,0,3,numComp,3)] = V1;
1223     o[INDEX3(i,1,3,numComp,3)] = V5;
1224     o[INDEX3(i,2,3,numComp,3)] = V11;
1225     o[INDEX3(i,0,4,numComp,3)] = V2;
1226     o[INDEX3(i,1,4,numComp,3)] = V6;
1227     o[INDEX3(i,2,4,numComp,3)] = V8;
1228     o[INDEX3(i,0,5,numComp,3)] = V2;
1229     o[INDEX3(i,1,5,numComp,3)] = V7;
1230     o[INDEX3(i,2,5,numComp,3)] = V9;
1231     o[INDEX3(i,0,6,numComp,3)] = V3;
1232     o[INDEX3(i,1,6,numComp,3)] = V6;
1233     o[INDEX3(i,2,6,numComp,3)] = V10;
1234     o[INDEX3(i,0,7,numComp,3)] = V3;
1235     o[INDEX3(i,1,7,numComp,3)] = V7;
1236     o[INDEX3(i,2,7,numComp,3)] = V11;
1237     } // end of component loop i
1238     } // end of k0 loop
1239     } // end of k1 loop
1240     } // end of k2 loop
1241     } // end of parallel section
1242 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1243 caltinay 3760 out.requireWrite();
1244 caltinay 3913 #pragma omp parallel
1245     {
1246     vector<double> f_000(numComp);
1247     vector<double> f_001(numComp);
1248     vector<double> f_010(numComp);
1249     vector<double> f_011(numComp);
1250     vector<double> f_100(numComp);
1251     vector<double> f_101(numComp);
1252     vector<double> f_110(numComp);
1253     vector<double> f_111(numComp);
1254     #pragma omp for
1255 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1256     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1257     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1258     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1259     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1260     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1261     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1262     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1263     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1264     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1265     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1266     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1267 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1268 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1269     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1270     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1271 caltinay 3913 } // end of component loop i
1272     } // end of k0 loop
1273     } // end of k1 loop
1274     } // end of k2 loop
1275     } // end of parallel section
1276 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1277 caltinay 3760 out.requireWrite();
1278 caltinay 3722 #pragma omp parallel
1279     {
1280 caltinay 3913 vector<double> f_000(numComp);
1281     vector<double> f_001(numComp);
1282     vector<double> f_010(numComp);
1283     vector<double> f_011(numComp);
1284     vector<double> f_100(numComp);
1285     vector<double> f_101(numComp);
1286     vector<double> f_110(numComp);
1287     vector<double> f_111(numComp);
1288 caltinay 3722 if (m_faceOffset[0] > -1) {
1289     #pragma omp for nowait
1290 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1291     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1292     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1293     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1294     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1295     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1296     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1297     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1298     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1299     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1300     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1301 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1302 caltinay 4375 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1303     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1304     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1305     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1306     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1307 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1308     o[INDEX3(i,2,0,numComp,3)] = V2;
1309 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1310 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1311     o[INDEX3(i,2,1,numComp,3)] = V3;
1312 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1313 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1314     o[INDEX3(i,2,2,numComp,3)] = V2;
1315 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1316 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1317     o[INDEX3(i,2,3,numComp,3)] = V3;
1318 caltinay 3764 } // end of component loop i
1319     } // end of k1 loop
1320     } // end of k2 loop
1321     } // end of face 0
1322 caltinay 3722 if (m_faceOffset[1] > -1) {
1323     #pragma omp for nowait
1324 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1325     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1326     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1327     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1328     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1329     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1330     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1331     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1332     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1333     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1334     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1335 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1336 caltinay 4375 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1337     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1338     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1339     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1340     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1341 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1342     o[INDEX3(i,2,0,numComp,3)] = V2;
1343 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1344 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1345     o[INDEX3(i,2,1,numComp,3)] = V3;
1346 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1347 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1348     o[INDEX3(i,2,2,numComp,3)] = V2;
1349 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1350 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1351     o[INDEX3(i,2,3,numComp,3)] = V3;
1352 caltinay 3764 } // end of component loop i
1353     } // end of k1 loop
1354     } // end of k2 loop
1355     } // end of face 1
1356 caltinay 3722 if (m_faceOffset[2] > -1) {
1357     #pragma omp for nowait
1358 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1359     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1360     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1361     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1362     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1363     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1364     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1365     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1366     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1367     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1368     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1369 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1370 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1371     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1372     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1373 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1374 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1375 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V1;
1376     o[INDEX3(i,0,1,numComp,3)] = V0;
1377 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1378 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V2;
1379     o[INDEX3(i,0,2,numComp,3)] = V0;
1380 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1381 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V1;
1382     o[INDEX3(i,0,3,numComp,3)] = V0;
1383 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1384 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V2;
1385 caltinay 3764 } // end of component loop i
1386     } // end of k0 loop
1387     } // end of k2 loop
1388     } // end of face 2
1389 caltinay 3722 if (m_faceOffset[3] > -1) {
1390     #pragma omp for nowait
1391 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1392     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1393     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1394     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1395     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1396     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1397     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1398     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1399     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1400     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1401     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1402 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1403 caltinay 4375 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1404     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1405     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1406     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1407 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1408 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1409 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V2;
1410     o[INDEX3(i,0,1,numComp,3)] = V0;
1411 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1412 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V3;
1413     o[INDEX3(i,0,2,numComp,3)] = V1;
1414 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1415 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V2;
1416     o[INDEX3(i,0,3,numComp,3)] = V1;
1417 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1418 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V3;
1419 caltinay 3764 } // end of component loop i
1420     } // end of k0 loop
1421     } // end of k2 loop
1422     } // end of face 3
1423 caltinay 3722 if (m_faceOffset[4] > -1) {
1424     #pragma omp for nowait
1425 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1426     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1427     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1428     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1429     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1430     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1431     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1432     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1433     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1434     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1435     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1436 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1437 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1438     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1439     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1440     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1441 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1442     o[INDEX3(i,1,0,numComp,3)] = V2;
1443 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1444 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1445     o[INDEX3(i,1,1,numComp,3)] = V3;
1446 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1447 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1448     o[INDEX3(i,1,2,numComp,3)] = V2;
1449 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1450 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1451     o[INDEX3(i,1,3,numComp,3)] = V3;
1452 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1453 caltinay 3764 } // end of component loop i
1454     } // end of k0 loop
1455     } // end of k1 loop
1456     } // end of face 4
1457 caltinay 3722 if (m_faceOffset[5] > -1) {
1458     #pragma omp for nowait
1459 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1460     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1461     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1462     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1463     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1464     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1465     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1466     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1467     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1468     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1469     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1470 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1471 caltinay 4375 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1472     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1473     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1474     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1475 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1476     o[INDEX3(i,1,0,numComp,3)] = V2;
1477 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1478 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1479     o[INDEX3(i,1,1,numComp,3)] = V3;
1480 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1481 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1482     o[INDEX3(i,1,2,numComp,3)] = V2;
1483 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1484 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1485     o[INDEX3(i,1,3,numComp,3)] = V3;
1486 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1487 caltinay 3764 } // end of component loop i
1488     } // end of k0 loop
1489     } // end of k1 loop
1490     } // end of face 5
1491 caltinay 3722 } // end of parallel section
1492 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1493 caltinay 3760 out.requireWrite();
1494 caltinay 3722 #pragma omp parallel
1495     {
1496 caltinay 3913 vector<double> f_000(numComp);
1497     vector<double> f_001(numComp);
1498     vector<double> f_010(numComp);
1499     vector<double> f_011(numComp);
1500     vector<double> f_100(numComp);
1501     vector<double> f_101(numComp);
1502     vector<double> f_110(numComp);
1503     vector<double> f_111(numComp);
1504 caltinay 3722 if (m_faceOffset[0] > -1) {
1505     #pragma omp for nowait
1506 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1507     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1508     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1509     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1510     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1511     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1512     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1513     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1514     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1515     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1516     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1517 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1518 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1519     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1520     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1521 caltinay 3764 } // end of component loop i
1522     } // end of k1 loop
1523     } // end of k2 loop
1524     } // end of face 0
1525 caltinay 3722 if (m_faceOffset[1] > -1) {
1526     #pragma omp for nowait
1527 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1528     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1529     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1530     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1531     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1532     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1533     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1534     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1535     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1536     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1537     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1538 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1539 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1540     o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1541     o[INDEX3(i,2,0,numComp,3)] = (