/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4657 - (hide annotations)
Thu Feb 6 06:12:20 2014 UTC (5 years, 2 months ago) by jfenwick
Original Path: trunk/ripley/src/Brick.cpp
File size: 154421 byte(s)
I changed some files.
Updated copyright notices, added GeoComp.



1 caltinay 3691
2 jfenwick 3981 /*****************************************************************************
3 caltinay 3691 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 caltinay 3691 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 caltinay 3691
17     #include <ripley/Brick.h>
18 caltinay 3791 #include <paso/SystemMatrix.h>
19 caltinay 4334 #include <esysUtils/esysFileWriter.h>
20 sshaw 4622 #include <ripley/DefaultAssembler3D.h>
21 sshaw 4629 #include <ripley/WaveAssembler3D.h>
22 caltinay 4477 #include <boost/scoped_array.hpp>
23    
24 caltinay 4013 #ifdef USE_NETCDF
25     #include <netcdfcpp.h>
26     #endif
27    
28 caltinay 3691 #if USE_SILO
29     #include <silo.h>
30     #ifdef ESYS_MPI
31     #include <pmpio.h>
32     #endif
33     #endif
34    
35     #include <iomanip>
36    
37 jfenwick 4650 #include "esysUtils/EsysRandom.h"
38     #include "blocktools.h"
39    
40    
41 caltinay 3691 using namespace std;
42 caltinay 4334 using esysUtils::FileWriter;
43 caltinay 3691
44     namespace ripley {
45    
46 caltinay 3781 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
47 sshaw 4622 double x1, double y1, double z1, int d0, int d1, int d2,
48     const std::vector<double>& points, const std::vector<int>& tags,
49 jfenwick 4638 const simap_t& tagnamestonums) :
50 caltinay 4334 RipleyDomain(3)
51 caltinay 3691 {
52 caltinay 3943 // ignore subdivision parameters for serial run
53     if (m_mpiInfo->size == 1) {
54     d0=1;
55     d1=1;
56     d2=1;
57     }
58     bool warn=false;
59     // if number of subdivisions is non-positive, try to subdivide by the same
60     // ratio as the number of elements
61     if (d0<=0 && d1<=0 && d2<=0) {
62     warn=true;
63 jfenwick 3962 d0=(int)(pow(m_mpiInfo->size*(n0+1)*(n0+1)/((float)(n1+1)*(n2+1)), 1.f/3));
64 caltinay 4010 d0=max(1, d0);
65     d1=max(1, (int)(d0*n1/(float)n0));
66 caltinay 3943 d2=m_mpiInfo->size/(d0*d1);
67     if (d0*d1*d2 != m_mpiInfo->size) {
68     // ratios not the same so leave "smallest" side undivided and try
69     // dividing 2 sides only
70     if (n0>=n1) {
71     if (n1>=n2) {
72     d0=d1=0;
73     d2=1;
74     } else {
75     d0=d2=0;
76     d1=1;
77     }
78     } else {
79     if (n0>=n2) {
80     d0=d1=0;
81     d2=1;
82     } else {
83     d0=1;
84     d1=d2=0;
85     }
86     }
87     }
88     }
89     if (d0<=0 && d1<=0) {
90     warn=true;
91 caltinay 4010 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1))));
92 caltinay 3943 d1=m_mpiInfo->size/d0;
93     if (d0*d1*d2 != m_mpiInfo->size) {
94     // ratios not the same so subdivide side with more elements only
95     if (n0>n1) {
96 caltinay 3944 d0=0;
97 caltinay 3943 d1=1;
98     } else {
99     d0=1;
100 caltinay 3944 d1=0;
101 caltinay 3943 }
102     }
103     } else if (d0<=0 && d2<=0) {
104     warn=true;
105 caltinay 4010 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n2+1))));
106 caltinay 3943 d2=m_mpiInfo->size/d0;
107     if (d0*d1*d2 != m_mpiInfo->size) {
108     // ratios not the same so subdivide side with more elements only
109     if (n0>n2) {
110 caltinay 3944 d0=0;
111 caltinay 3943 d2=1;
112     } else {
113     d0=1;
114 caltinay 3944 d2=0;
115 caltinay 3943 }
116     }
117     } else if (d1<=0 && d2<=0) {
118     warn=true;
119 caltinay 4010 d1=max(1, int(sqrt(m_mpiInfo->size*(n1+1)/(float)(n2+1))));
120 caltinay 3943 d2=m_mpiInfo->size/d1;
121     if (d0*d1*d2 != m_mpiInfo->size) {
122     // ratios not the same so subdivide side with more elements only
123     if (n1>n2) {
124 caltinay 3944 d1=0;
125 caltinay 3943 d2=1;
126     } else {
127     d1=1;
128 caltinay 3944 d2=0;
129 caltinay 3943 }
130     }
131     }
132     if (d0<=0) {
133     // d1,d2 are preset, determine d0
134     d0=m_mpiInfo->size/(d1*d2);
135     } else if (d1<=0) {
136     // d0,d2 are preset, determine d1
137     d1=m_mpiInfo->size/(d0*d2);
138     } else if (d2<=0) {
139     // d0,d1 are preset, determine d2
140     d2=m_mpiInfo->size/(d0*d1);
141     }
142    
143 caltinay 3691 // ensure number of subdivisions is valid and nodes can be distributed
144     // among number of ranks
145 caltinay 4334 if (d0*d1*d2 != m_mpiInfo->size)
146 caltinay 3691 throw RipleyException("Invalid number of spatial subdivisions");
147    
148 caltinay 3943 if (warn) {
149     cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
150     << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
151     }
152 caltinay 3691
153 caltinay 4334 double l0 = x1-x0;
154     double l1 = y1-y0;
155     double l2 = z1-z0;
156     m_dx[0] = l0/n0;
157     m_dx[1] = l1/n1;
158     m_dx[2] = l2/n2;
159    
160     if ((n0+1)%d0 > 0) {
161 caltinay 3943 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
162 caltinay 4334 l0=m_dx[0]*n0;
163 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N0="
164 caltinay 4334 << n0 << ", l0=" << l0 << endl;
165 caltinay 3943 }
166 caltinay 4334 if ((n1+1)%d1 > 0) {
167 caltinay 3943 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
168 caltinay 4334 l1=m_dx[1]*n1;
169 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N1="
170 caltinay 4334 << n1 << ", l1=" << l1 << endl;
171 caltinay 3943 }
172 caltinay 4334 if ((n2+1)%d2 > 0) {
173 caltinay 3943 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
174 caltinay 4334 l2=m_dx[2]*n2;
175 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N2="
176 caltinay 4334 << n2 << ", l2=" << l2 << endl;
177 caltinay 3943 }
178    
179 caltinay 4334 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
180 caltinay 3753 throw RipleyException("Too few elements for the number of ranks");
181    
182 caltinay 4334 m_gNE[0] = n0;
183     m_gNE[1] = n1;
184     m_gNE[2] = n2;
185     m_origin[0] = x0;
186     m_origin[1] = y0;
187     m_origin[2] = z0;
188     m_length[0] = l0;
189     m_length[1] = l1;
190     m_length[2] = l2;
191     m_NX[0] = d0;
192     m_NX[1] = d1;
193     m_NX[2] = d2;
194    
195 caltinay 3753 // local number of elements (including overlap)
196 caltinay 4334 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
197     if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
198     m_NE[0]++;
199     else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
200     m_ownNE[0]--;
201 caltinay 3764
202 caltinay 4334 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
203     if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
204     m_NE[1]++;
205     else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
206     m_ownNE[1]--;
207 caltinay 3764
208 caltinay 4334 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
209     if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
210     m_NE[2]++;
211     else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
212     m_ownNE[2]--;
213 caltinay 3753
214     // local number of nodes
215 caltinay 4334 m_NN[0] = m_NE[0]+1;
216     m_NN[1] = m_NE[1]+1;
217     m_NN[2] = m_NE[2]+1;
218 caltinay 3753
219 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
220 caltinay 4334 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
221     if (m_offset[0] > 0)
222     m_offset[0]--;
223     m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
224     if (m_offset[1] > 0)
225     m_offset[1]--;
226     m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
227     if (m_offset[2] > 0)
228     m_offset[2]--;
229 caltinay 3753
230 caltinay 3691 populateSampleIds();
231 caltinay 3756 createPattern();
232 sshaw 4622
233     assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
234 sshaw 4629 for (map<string, int>::const_iterator i = tagnamestonums.begin();
235     i != tagnamestonums.end(); i++) {
236     setTagMap(i->first, i->second);
237     }
238 sshaw 4622 addPoints(tags.size(), &points[0], &tags[0]);
239 caltinay 3691 }
240    
241    
242     Brick::~Brick()
243     {
244 caltinay 3785 Paso_SystemMatrixPattern_free(m_pattern);
245     Paso_Connector_free(m_connector);
246 sshaw 4622 delete assembler;
247 caltinay 3691 }
248    
249     string Brick::getDescription() const
250     {
251     return "ripley::Brick";
252     }
253    
254     bool Brick::operator==(const AbstractDomain& other) const
255     {
256 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
257     if (o) {
258     return (RipleyDomain::operator==(other) &&
259 caltinay 4334 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
260     && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
261     && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
262     && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
263 caltinay 3744 }
264 caltinay 3691
265     return false;
266     }
267    
268 caltinay 4013 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
269 caltinay 4618 const ReaderParameters& params) const
270 caltinay 4013 {
271     #ifdef USE_NETCDF
272     // check destination function space
273     int myN0, myN1, myN2;
274     if (out.getFunctionSpace().getTypeCode() == Nodes) {
275 caltinay 4334 myN0 = m_NN[0];
276     myN1 = m_NN[1];
277     myN2 = m_NN[2];
278 caltinay 4013 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
279     out.getFunctionSpace().getTypeCode() == ReducedElements) {
280 caltinay 4334 myN0 = m_NE[0];
281     myN1 = m_NE[1];
282     myN2 = m_NE[2];
283 caltinay 4013 } else
284     throw RipleyException("readNcGrid(): invalid function space for output data object");
285    
286 caltinay 4615 if (params.first.size() != 3)
287 caltinay 4013 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
288    
289 caltinay 4615 if (params.numValues.size() != 3)
290 caltinay 4013 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
291    
292 caltinay 4615 if (params.multiplier.size() != 3)
293 caltinay 4277 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
294 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
295     if (params.multiplier[i]<1)
296 caltinay 4277 throw RipleyException("readNcGrid(): all multipliers must be positive");
297    
298 caltinay 4013 // check file existence and size
299     NcFile f(filename.c_str(), NcFile::ReadOnly);
300     if (!f.is_valid())
301     throw RipleyException("readNcGrid(): cannot open file");
302    
303     NcVar* var = f.get_var(varname.c_str());
304     if (!var)
305     throw RipleyException("readNcGrid(): invalid variable name");
306    
307     // TODO: rank>0 data support
308     const int numComp = out.getDataPointSize();
309     if (numComp > 1)
310     throw RipleyException("readNcGrid(): only scalar data supported");
311    
312     const int dims = var->num_dims();
313 caltinay 4477 boost::scoped_array<long> edges(var->edges());
314 caltinay 4013
315     // is this a slice of the data object (dims!=3)?
316     // note the expected ordering of edges (as in numpy: z,y,x)
317 caltinay 4615 if ( (dims==3 && (params.numValues[2] > edges[0] ||
318     params.numValues[1] > edges[1] ||
319     params.numValues[0] > edges[2]))
320     || (dims==2 && params.numValues[2]>1)
321     || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
322 caltinay 4013 throw RipleyException("readNcGrid(): not enough data in file");
323     }
324    
325     // check if this rank contributes anything
326 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
327     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
328     params.first[1] >= m_offset[1]+myN1 ||
329     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
330     params.first[2] >= m_offset[2]+myN2 ||
331     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
332 caltinay 4013 return;
333     }
334    
335     // now determine how much this rank has to write
336    
337     // first coordinates in data object to write to
338 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
339     const int first1 = max(0, params.first[1]-m_offset[1]);
340     const int first2 = max(0, params.first[2]-m_offset[2]);
341 caltinay 4618 // indices to first value in file (not accounting for reverse yet)
342     int idx0 = max(0, m_offset[0]-params.first[0]);
343     int idx1 = max(0, m_offset[1]-params.first[1]);
344     int idx2 = max(0, m_offset[2]-params.first[2]);
345 caltinay 4277 // number of values to read
346 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
347     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
348     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
349 caltinay 4013
350 caltinay 4618 // make sure we read the right block if going backwards through file
351     if (params.reverse[0])
352     idx0 = edges[dims-1]-num0-idx0;
353     if (dims>1 && params.reverse[1])
354     idx1 = edges[dims-2]-num1-idx1;
355     if (dims>2 && params.reverse[2])
356     idx2 = edges[dims-3]-num2-idx2;
357    
358    
359 caltinay 4013 vector<double> values(num0*num1*num2);
360     if (dims==3) {
361     var->set_cur(idx2, idx1, idx0);
362     var->get(&values[0], num2, num1, num0);
363     } else if (dims==2) {
364     var->set_cur(idx1, idx0);
365     var->get(&values[0], num1, num0);
366     } else {
367     var->set_cur(idx0);
368     var->get(&values[0], num0);
369     }
370    
371     const int dpp = out.getNumDataPointsPerSample();
372     out.requireWrite();
373    
374 caltinay 4618 // helpers for reversing
375     const int x0 = (params.reverse[0] ? num0-1 : 0);
376     const int x_mult = (params.reverse[0] ? -1 : 1);
377     const int y0 = (params.reverse[1] ? num1-1 : 0);
378     const int y_mult = (params.reverse[1] ? -1 : 1);
379     const int z0 = (params.reverse[2] ? num2-1 : 0);
380     const int z_mult = (params.reverse[2] ? -1 : 1);
381    
382 caltinay 4013 for (index_t z=0; z<num2; z++) {
383     for (index_t y=0; y<num1; y++) {
384     #pragma omp parallel for
385     for (index_t x=0; x<num0; x++) {
386 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
387     +(first1+y*params.multiplier[1])*myN0
388     +(first2+z*params.multiplier[2])*myN0*myN1;
389 caltinay 4618 const int srcIndex=(z0+z_mult*z)*num1*num0
390     +(y0+y_mult*y)*num0
391     +(x0+x_mult*x);
392 caltinay 4174 if (!isnan(values[srcIndex])) {
393 caltinay 4615 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
394     for (index_t m1=0; m1<params.multiplier[1]; m1++) {
395     for (index_t m0=0; m0<params.multiplier[0]; m0++) {
396 caltinay 4277 const int dataIndex = baseIndex+m0
397     +m1*myN0
398     +m2*myN0*myN1;
399     double* dest = out.getSampleDataRW(dataIndex);
400     for (index_t q=0; q<dpp; q++) {
401     *dest++ = values[srcIndex];
402     }
403     }
404     }
405 caltinay 4174 }
406 caltinay 4013 }
407     }
408     }
409     }
410     #else
411     throw RipleyException("readNcGrid(): not compiled with netCDF support");
412     #endif
413     }
414    
415 caltinay 4334 void Brick::readBinaryGrid(escript::Data& out, string filename,
416 caltinay 4618 const ReaderParameters& params) const
417 caltinay 4334 {
418 caltinay 4495 // the mapping is not universally correct but should work on our
419     // supported platforms
420 caltinay 4615 switch (params.dataType) {
421 caltinay 4495 case DATATYPE_INT32:
422 caltinay 4615 readBinaryGridImpl<int>(out, filename, params);
423 caltinay 4495 break;
424     case DATATYPE_FLOAT32:
425 caltinay 4615 readBinaryGridImpl<float>(out, filename, params);
426 caltinay 4495 break;
427     case DATATYPE_FLOAT64:
428 caltinay 4615 readBinaryGridImpl<double>(out, filename, params);
429 caltinay 4495 break;
430     default:
431     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
432     }
433     }
434    
435     template<typename ValueType>
436     void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
437 caltinay 4618 const ReaderParameters& params) const
438 caltinay 4495 {
439 caltinay 4334 // check destination function space
440     int myN0, myN1, myN2;
441     if (out.getFunctionSpace().getTypeCode() == Nodes) {
442     myN0 = m_NN[0];
443     myN1 = m_NN[1];
444     myN2 = m_NN[2];
445     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
446     out.getFunctionSpace().getTypeCode() == ReducedElements) {
447     myN0 = m_NE[0];
448     myN1 = m_NE[1];
449     myN2 = m_NE[2];
450     } else
451     throw RipleyException("readBinaryGrid(): invalid function space for output data object");
452    
453 caltinay 4615 if (params.first.size() != 3)
454 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
455    
456 caltinay 4615 if (params.numValues.size() != 3)
457 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
458    
459 caltinay 4615 if (params.multiplier.size() != 3)
460 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
461 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
462     if (params.multiplier[i]<1)
463 caltinay 4334 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
464    
465     // check file existence and size
466     ifstream f(filename.c_str(), ifstream::binary);
467     if (f.fail()) {
468     throw RipleyException("readBinaryGrid(): cannot open file");
469     }
470     f.seekg(0, ios::end);
471     const int numComp = out.getDataPointSize();
472     const int filesize = f.tellg();
473 caltinay 4615 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
474 caltinay 4334 if (filesize < reqsize) {
475     f.close();
476     throw RipleyException("readBinaryGrid(): not enough data in file");
477     }
478    
479     // check if this rank contributes anything
480 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
481     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
482     params.first[1] >= m_offset[1]+myN1 ||
483     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
484     params.first[2] >= m_offset[2]+myN2 ||
485     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
486 caltinay 4334 f.close();
487     return;
488     }
489    
490     // now determine how much this rank has to write
491    
492     // first coordinates in data object to write to
493 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
494     const int first1 = max(0, params.first[1]-m_offset[1]);
495     const int first2 = max(0, params.first[2]-m_offset[2]);
496 caltinay 4334 // indices to first value in file
497 caltinay 4615 const int idx0 = max(0, m_offset[0]-params.first[0]);
498     const int idx1 = max(0, m_offset[1]-params.first[1]);
499     const int idx2 = max(0, m_offset[2]-params.first[2]);
500 caltinay 4334 // number of values to read
501 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
502     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
503     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
504 caltinay 4334
505     out.requireWrite();
506 caltinay 4495 vector<ValueType> values(num0*numComp);
507 caltinay 4334 const int dpp = out.getNumDataPointsPerSample();
508    
509 caltinay 4529 for (int z=0; z<num2; z++) {
510     for (int y=0; y<num1; y++) {
511 caltinay 4615 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
512     +(idx2+z)*params.numValues[0]*params.numValues[1]);
513 caltinay 4495 f.seekg(fileofs*sizeof(ValueType));
514     f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
515 caltinay 4334
516 caltinay 4529 for (int x=0; x<num0; x++) {
517 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
518     +(first1+y*params.multiplier[1])*myN0
519     +(first2+z*params.multiplier[2])*myN0*myN1;
520     for (int m2=0; m2<params.multiplier[2]; m2++) {
521     for (int m1=0; m1<params.multiplier[1]; m1++) {
522     for (int m0=0; m0<params.multiplier[0]; m0++) {
523 caltinay 4334 const int dataIndex = baseIndex+m0
524     +m1*myN0
525     +m2*myN0*myN1;
526     double* dest = out.getSampleDataRW(dataIndex);
527 caltinay 4529 for (int c=0; c<numComp; c++) {
528     ValueType val = values[x*numComp+c];
529    
530 caltinay 4615 if (params.byteOrder != BYTEORDER_NATIVE) {
531 caltinay 4529 char* cval = reinterpret_cast<char*>(&val);
532     // this will alter val!!
533     byte_swap32(cval);
534     }
535     if (!std::isnan(val)) {
536     for (int q=0; q<dpp; q++) {
537     *dest++ = static_cast<double>(val);
538 caltinay 4334 }
539     }
540     }
541     }
542     }
543     }
544     }
545     }
546     }
547    
548     f.close();
549     }
550    
551 caltinay 4357 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
552     int byteOrder, int dataType) const
553 caltinay 4334 {
554 caltinay 4357 // the mapping is not universally correct but should work on our
555     // supported platforms
556     switch (dataType) {
557     case DATATYPE_INT32:
558     writeBinaryGridImpl<int>(in, filename, byteOrder);
559     break;
560     case DATATYPE_FLOAT32:
561     writeBinaryGridImpl<float>(in, filename, byteOrder);
562     break;
563     case DATATYPE_FLOAT64:
564     writeBinaryGridImpl<double>(in, filename, byteOrder);
565     break;
566     default:
567     throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
568     }
569     }
570    
571     template<typename ValueType>
572     void Brick::writeBinaryGridImpl(const escript::Data& in,
573     const string& filename, int byteOrder) const
574     {
575 caltinay 4334 // check function space and determine number of points
576     int myN0, myN1, myN2;
577     int totalN0, totalN1, totalN2;
578     if (in.getFunctionSpace().getTypeCode() == Nodes) {
579     myN0 = m_NN[0];
580     myN1 = m_NN[1];
581     myN2 = m_NN[2];
582     totalN0 = m_gNE[0]+1;
583     totalN1 = m_gNE[1]+1;
584     totalN2 = m_gNE[2]+1;
585     } else if (in.getFunctionSpace().getTypeCode() == Elements ||
586     in.getFunctionSpace().getTypeCode() == ReducedElements) {
587     myN0 = m_NE[0];
588     myN1 = m_NE[1];
589     myN2 = m_NE[2];
590     totalN0 = m_gNE[0];
591     totalN1 = m_gNE[1];
592     totalN2 = m_gNE[2];
593     } else
594     throw RipleyException("writeBinaryGrid(): invalid function space of data object");
595    
596     const int numComp = in.getDataPointSize();
597     const int dpp = in.getNumDataPointsPerSample();
598 caltinay 4357 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
599 caltinay 4334
600     if (numComp > 1 || dpp > 1)
601     throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
602    
603     // from here on we know that each sample consists of one value
604 caltinay 4482 FileWriter fw;
605     fw.openFile(filename, fileSize);
606 caltinay 4334 MPIBarrier();
607    
608     for (index_t z=0; z<myN2; z++) {
609     for (index_t y=0; y<myN1; y++) {
610     const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
611 caltinay 4357 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
612 caltinay 4334 ostringstream oss;
613    
614     for (index_t x=0; x<myN0; x++) {
615 caltinay 4626 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
616 caltinay 4357 ValueType fvalue = static_cast<ValueType>(*sample);
617     if (byteOrder == BYTEORDER_NATIVE) {
618 caltinay 4334 oss.write((char*)&fvalue, sizeof(fvalue));
619     } else {
620     char* value = reinterpret_cast<char*>(&fvalue);
621 caltinay 4357 oss.write(byte_swap32(value), sizeof(fvalue));
622 caltinay 4334 }
623     }
624 caltinay 4482 fw.writeAt(oss, fileofs);
625 caltinay 4334 }
626     }
627 caltinay 4482 fw.close();
628 caltinay 4334 }
629    
630 caltinay 3691 void Brick::dump(const string& fileName) const
631     {
632     #if USE_SILO
633     string fn(fileName);
634     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
635     fn+=".silo";
636     }
637    
638     int driver=DB_HDF5;
639     string siloPath;
640     DBfile* dbfile = NULL;
641    
642     #ifdef ESYS_MPI
643     PMPIO_baton_t* baton = NULL;
644 gross 3793 const int NUM_SILO_FILES = 1;
645     const char* blockDirFmt = "/block%04d";
646 caltinay 3691 #endif
647    
648     if (m_mpiInfo->size > 1) {
649     #ifdef ESYS_MPI
650     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
651     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
652     PMPIO_DefaultClose, (void*)&driver);
653     // try the fallback driver in case of error
654     if (!baton && driver != DB_PDB) {
655     driver = DB_PDB;
656     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
657     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
658     PMPIO_DefaultClose, (void*)&driver);
659     }
660     if (baton) {
661     char str[64];
662     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
663     siloPath = str;
664     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
665     }
666     #endif
667     } else {
668     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
669     getDescription().c_str(), driver);
670     // try the fallback driver in case of error
671     if (!dbfile && driver != DB_PDB) {
672     driver = DB_PDB;
673     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
674     getDescription().c_str(), driver);
675     }
676     }
677    
678     if (!dbfile)
679     throw RipleyException("dump: Could not create Silo file");
680    
681     /*
682     if (driver==DB_HDF5) {
683     // gzip level 1 already provides good compression with minimal
684     // performance penalty. Some tests showed that gzip levels >3 performed
685     // rather badly on escript data both in terms of time and space
686     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
687     }
688     */
689    
690 caltinay 4334 boost::scoped_ptr<double> x(new double[m_NN[0]]);
691     boost::scoped_ptr<double> y(new double[m_NN[1]]);
692     boost::scoped_ptr<double> z(new double[m_NN[2]]);
693 caltinay 3691 double* coords[3] = { x.get(), y.get(), z.get() };
694     #pragma omp parallel
695     {
696     #pragma omp for
697 caltinay 4334 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
698     coords[0][i0]=getLocalCoordinate(i0, 0);
699 caltinay 3691 }
700     #pragma omp for
701 caltinay 4334 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
702     coords[1][i1]=getLocalCoordinate(i1, 1);
703 caltinay 3691 }
704     #pragma omp for
705 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
706     coords[2][i2]=getLocalCoordinate(i2, 2);
707 caltinay 3691 }
708     }
709 caltinay 4334 int* dims = const_cast<int*>(getNumNodesPerDim());
710    
711     // write mesh
712     DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
713 caltinay 3691 DB_COLLINEAR, NULL);
714    
715 caltinay 4334 // write node ids
716     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
717 caltinay 3698 NULL, 0, DB_INT, DB_NODECENT, NULL);
718 caltinay 3691
719 caltinay 3698 // write element ids
720 caltinay 4334 dims = const_cast<int*>(getNumElementsPerDim());
721 caltinay 3698 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
722 caltinay 4334 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
723 caltinay 3698
724     // rank 0 writes multimesh and multivar
725 caltinay 3691 if (m_mpiInfo->rank == 0) {
726     vector<string> tempstrings;
727     vector<char*> names;
728     for (dim_t i=0; i<m_mpiInfo->size; i++) {
729     stringstream path;
730     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
731     tempstrings.push_back(path.str());
732     names.push_back((char*)tempstrings.back().c_str());
733     }
734     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
735     DBSetDir(dbfile, "/");
736     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
737     &types[0], NULL);
738     tempstrings.clear();
739     names.clear();
740     for (dim_t i=0; i<m_mpiInfo->size; i++) {
741     stringstream path;
742     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
743     tempstrings.push_back(path.str());
744     names.push_back((char*)tempstrings.back().c_str());
745     }
746     types.assign(m_mpiInfo->size, DB_QUADVAR);
747     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
748     &types[0], NULL);
749 caltinay 3698 tempstrings.clear();
750     names.clear();
751     for (dim_t i=0; i<m_mpiInfo->size; i++) {
752     stringstream path;
753     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
754     tempstrings.push_back(path.str());
755     names.push_back((char*)tempstrings.back().c_str());
756     }
757     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
758     &types[0], NULL);
759 caltinay 3691 }
760    
761     if (m_mpiInfo->size > 1) {
762     #ifdef ESYS_MPI
763     PMPIO_HandOffBaton(baton, dbfile);
764     PMPIO_Finish(baton);
765     #endif
766     } else {
767     DBClose(dbfile);
768     }
769    
770     #else // USE_SILO
771 caltinay 3791 throw RipleyException("dump: no Silo support");
772 caltinay 3691 #endif
773     }
774    
775     const int* Brick::borrowSampleReferenceIDs(int fsType) const
776     {
777 caltinay 3697 switch (fsType) {
778     case Nodes:
779 caltinay 3748 case ReducedNodes: //FIXME: reduced
780 caltinay 3753 return &m_nodeId[0];
781 caltinay 3757 case DegreesOfFreedom:
782     case ReducedDegreesOfFreedom: //FIXME: reduced
783 caltinay 3753 return &m_dofId[0];
784 caltinay 3697 case Elements:
785 caltinay 3733 case ReducedElements:
786 caltinay 3697 return &m_elementId[0];
787 caltinay 3757 case FaceElements:
788 caltinay 3733 case ReducedFaceElements:
789 caltinay 3697 return &m_faceId[0];
790     default:
791     break;
792     }
793 caltinay 3691
794 caltinay 3697 stringstream msg;
795 caltinay 3791 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
796 caltinay 3697 throw RipleyException(msg.str());
797 caltinay 3691 }
798    
799 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
800 caltinay 3691 {
801 caltinay 3759 if (getMPISize()==1)
802     return true;
803    
804 caltinay 3757 switch (fsType) {
805     case Nodes:
806     case ReducedNodes: //FIXME: reduced
807     return (m_dofMap[id] < getNumDOF());
808     case DegreesOfFreedom:
809     case ReducedDegreesOfFreedom:
810     return true;
811     case Elements:
812     case ReducedElements:
813     {
814     // check ownership of element's _last_ node
815 caltinay 4334 const index_t x=id%m_NE[0] + 1;
816     const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
817     const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
818     return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
819 caltinay 3757 }
820     case FaceElements:
821     case ReducedFaceElements:
822 caltinay 3759 {
823     // check ownership of face element's last node
824     dim_t n=0;
825 caltinay 4334 for (size_t i=0; i<6; i++) {
826     n+=m_faceCount[i];
827 caltinay 3759 if (id<n) {
828 caltinay 4334 const index_t j=id-n+m_faceCount[i];
829 caltinay 3759 if (i>=4) { // front or back
830 caltinay 4334 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
831     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
832 caltinay 3759 } else if (i>=2) { // bottom or top
833 caltinay 4334 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
834     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
835 caltinay 3759 } else { // left or right
836 caltinay 4334 const index_t first=(i==0 ? 0 : m_NN[0]-1);
837     return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
838 caltinay 3759 }
839     }
840     }
841     return false;
842     }
843 caltinay 3757 default:
844     break;
845 caltinay 3753 }
846 caltinay 3757
847     stringstream msg;
848 caltinay 3791 msg << "ownSample: invalid function space type " << fsType;
849 caltinay 3757 throw RipleyException(msg.str());
850 caltinay 3691 }
851    
852 caltinay 3764 void Brick::setToNormal(escript::Data& out) const
853 caltinay 3703 {
854 caltinay 3764 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
855     out.requireWrite();
856     #pragma omp parallel
857     {
858     if (m_faceOffset[0] > -1) {
859     #pragma omp for nowait
860 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
861     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
862     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
863 caltinay 3764 // set vector at four quadrature points
864     *o++ = -1.; *o++ = 0.; *o++ = 0.;
865     *o++ = -1.; *o++ = 0.; *o++ = 0.;
866     *o++ = -1.; *o++ = 0.; *o++ = 0.;
867     *o++ = -1.; *o++ = 0.; *o = 0.;
868     }
869     }
870     }
871    
872     if (m_faceOffset[1] > -1) {
873     #pragma omp for nowait
874 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
875     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
876     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
877 caltinay 3764 // set vector at four quadrature points
878     *o++ = 1.; *o++ = 0.; *o++ = 0.;
879     *o++ = 1.; *o++ = 0.; *o++ = 0.;
880     *o++ = 1.; *o++ = 0.; *o++ = 0.;
881     *o++ = 1.; *o++ = 0.; *o = 0.;
882     }
883     }
884     }
885    
886     if (m_faceOffset[2] > -1) {
887     #pragma omp for nowait
888 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
889     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
890     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
891 caltinay 3764 // set vector at four quadrature points
892     *o++ = 0.; *o++ = -1.; *o++ = 0.;
893     *o++ = 0.; *o++ = -1.; *o++ = 0.;
894     *o++ = 0.; *o++ = -1.; *o++ = 0.;
895     *o++ = 0.; *o++ = -1.; *o = 0.;
896     }
897     }
898     }
899    
900     if (m_faceOffset[3] > -1) {
901     #pragma omp for nowait
902 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
903     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
904     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
905 caltinay 3764 // set vector at four quadrature points
906     *o++ = 0.; *o++ = 1.; *o++ = 0.;
907     *o++ = 0.; *o++ = 1.; *o++ = 0.;
908     *o++ = 0.; *o++ = 1.; *o++ = 0.;
909     *o++ = 0.; *o++ = 1.; *o = 0.;
910     }
911     }
912     }
913    
914     if (m_faceOffset[4] > -1) {
915     #pragma omp for nowait
916 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
917     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
918     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
919 caltinay 3764 // set vector at four quadrature points
920     *o++ = 0.; *o++ = 0.; *o++ = -1.;
921     *o++ = 0.; *o++ = 0.; *o++ = -1.;
922     *o++ = 0.; *o++ = 0.; *o++ = -1.;
923     *o++ = 0.; *o++ = 0.; *o = -1.;
924     }
925     }
926     }
927    
928     if (m_faceOffset[5] > -1) {
929     #pragma omp for nowait
930 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
931     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
932     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
933 caltinay 3764 // set vector at four quadrature points
934     *o++ = 0.; *o++ = 0.; *o++ = 1.;
935     *o++ = 0.; *o++ = 0.; *o++ = 1.;
936     *o++ = 0.; *o++ = 0.; *o++ = 1.;
937     *o++ = 0.; *o++ = 0.; *o = 1.;
938     }
939     }
940     }
941     } // end of parallel section
942     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
943     out.requireWrite();
944     #pragma omp parallel
945     {
946     if (m_faceOffset[0] > -1) {
947     #pragma omp for nowait
948 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
949     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
950     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
951 caltinay 3764 *o++ = -1.;
952     *o++ = 0.;
953     *o = 0.;
954     }
955     }
956     }
957    
958     if (m_faceOffset[1] > -1) {
959     #pragma omp for nowait
960 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
961     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
962     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
963 caltinay 3764 *o++ = 1.;
964     *o++ = 0.;
965     *o = 0.;
966     }
967     }
968     }
969    
970     if (m_faceOffset[2] > -1) {
971     #pragma omp for nowait
972 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
973     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
974     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
975 caltinay 3764 *o++ = 0.;
976     *o++ = -1.;
977     *o = 0.;
978     }
979     }
980     }
981    
982     if (m_faceOffset[3] > -1) {
983     #pragma omp for nowait
984 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
985     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
986     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
987 caltinay 3764 *o++ = 0.;
988     *o++ = 1.;
989     *o = 0.;
990     }
991     }
992     }
993    
994     if (m_faceOffset[4] > -1) {
995     #pragma omp for nowait
996 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
997     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
998     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
999 caltinay 3764 *o++ = 0.;
1000     *o++ = 0.;
1001     *o = -1.;
1002     }
1003     }
1004     }
1005    
1006     if (m_faceOffset[5] > -1) {
1007     #pragma omp for nowait
1008 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1009     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1010     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1011 caltinay 3764 *o++ = 0.;
1012     *o++ = 0.;
1013     *o = 1.;
1014     }
1015     }
1016     }
1017     } // end of parallel section
1018    
1019     } else {
1020     stringstream msg;
1021 caltinay 3791 msg << "setToNormal: invalid function space type "
1022     << out.getFunctionSpace().getTypeCode();
1023 caltinay 3764 throw RipleyException(msg.str());
1024     }
1025     }
1026    
1027     void Brick::setToSize(escript::Data& out) const
1028     {
1029     if (out.getFunctionSpace().getTypeCode() == Elements
1030     || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1031     out.requireWrite();
1032     const dim_t numQuad=out.getNumDataPointsPerSample();
1033 caltinay 4334 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1034 caltinay 3764 #pragma omp parallel for
1035     for (index_t k = 0; k < getNumElements(); ++k) {
1036     double* o = out.getSampleDataRW(k);
1037     fill(o, o+numQuad, size);
1038     }
1039     } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1040     || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1041     out.requireWrite();
1042     const dim_t numQuad=out.getNumDataPointsPerSample();
1043     #pragma omp parallel
1044     {
1045     if (m_faceOffset[0] > -1) {
1046 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1047 caltinay 3764 #pragma omp for nowait
1048 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1049     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1050     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1051 caltinay 3764 fill(o, o+numQuad, size);
1052     }
1053     }
1054     }
1055    
1056     if (m_faceOffset[1] > -1) {
1057 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1058 caltinay 3764 #pragma omp for nowait
1059 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1060     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1061     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1062 caltinay 3764 fill(o, o+numQuad, size);
1063     }
1064     }
1065     }
1066    
1067     if (m_faceOffset[2] > -1) {
1068 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1069 caltinay 3764 #pragma omp for nowait
1070 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1071     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1072     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1073 caltinay 3764 fill(o, o+numQuad, size);
1074     }
1075     }
1076     }
1077    
1078     if (m_faceOffset[3] > -1) {
1079 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1080 caltinay 3764 #pragma omp for nowait
1081 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1082     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1083     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1084 caltinay 3764 fill(o, o+numQuad, size);
1085     }
1086     }
1087     }
1088    
1089     if (m_faceOffset[4] > -1) {
1090 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1091 caltinay 3764 #pragma omp for nowait
1092 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1093     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1094     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1095 caltinay 3764 fill(o, o+numQuad, size);
1096     }
1097     }
1098     }
1099    
1100     if (m_faceOffset[5] > -1) {
1101 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1102 caltinay 3764 #pragma omp for nowait
1103 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1104     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1105     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1106 caltinay 3764 fill(o, o+numQuad, size);
1107     }
1108     }
1109     }
1110     } // end of parallel section
1111    
1112     } else {
1113     stringstream msg;
1114 caltinay 3791 msg << "setToSize: invalid function space type "
1115     << out.getFunctionSpace().getTypeCode();
1116 caltinay 3764 throw RipleyException(msg.str());
1117     }
1118     }
1119    
1120     void Brick::Print_Mesh_Info(const bool full) const
1121     {
1122     RipleyDomain::Print_Mesh_Info(full);
1123     if (full) {
1124     cout << " Id Coordinates" << endl;
1125     cout.precision(15);
1126     cout.setf(ios::scientific, ios::floatfield);
1127     for (index_t i=0; i < getNumNodes(); i++) {
1128     cout << " " << setw(5) << m_nodeId[i]
1129 caltinay 4334 << " " << getLocalCoordinate(i%m_NN[0], 0)
1130     << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1131     << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1132 caltinay 3764 }
1133     }
1134     }
1135    
1136    
1137     //protected
1138     void Brick::assembleCoordinates(escript::Data& arg) const
1139     {
1140     escriptDataC x = arg.getDataC();
1141     int numDim = m_numDim;
1142     if (!isDataPointShapeEqual(&x, 1, &numDim))
1143     throw RipleyException("setToX: Invalid Data object shape");
1144     if (!numSamplesEqual(&x, 1, getNumNodes()))
1145     throw RipleyException("setToX: Illegal number of samples in Data object");
1146    
1147     arg.requireWrite();
1148     #pragma omp parallel for
1149 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1150     for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1151     for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1152     double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1153     point[0] = getLocalCoordinate(i0, 0);
1154     point[1] = getLocalCoordinate(i1, 1);
1155     point[2] = getLocalCoordinate(i2, 2);
1156 caltinay 3764 }
1157     }
1158     }
1159     }
1160    
1161     //protected
1162 caltinay 4626 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1163 caltinay 3764 {
1164 caltinay 3703 const dim_t numComp = in.getDataPointSize();
1165 caltinay 3731 const double C0 = .044658198738520451079;
1166     const double C1 = .16666666666666666667;
1167     const double C2 = .21132486540518711775;
1168     const double C3 = .25;
1169     const double C4 = .5;
1170     const double C5 = .62200846792814621559;
1171     const double C6 = .78867513459481288225;
1172    
1173 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
1174 caltinay 3760 out.requireWrite();
1175 caltinay 3913 #pragma omp parallel
1176     {
1177     vector<double> f_000(numComp);
1178     vector<double> f_001(numComp);
1179     vector<double> f_010(numComp);
1180     vector<double> f_011(numComp);
1181     vector<double> f_100(numComp);
1182     vector<double> f_101(numComp);
1183     vector<double> f_110(numComp);
1184     vector<double> f_111(numComp);
1185     #pragma omp for
1186 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1187     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1188     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1189     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1190     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1191     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1192     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1193     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1194     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1195     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1196     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1197     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1198 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1199 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1200     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1201     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1202     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1203     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1204     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1205     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1206     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1207     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1208     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1209     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1210     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1211 caltinay 3913 o[INDEX3(i,0,0,numComp,3)] = V0;
1212     o[INDEX3(i,1,0,numComp,3)] = V4;
1213     o[INDEX3(i,2,0,numComp,3)] = V8;
1214     o[INDEX3(i,0,1,numComp,3)] = V0;
1215     o[INDEX3(i,1,1,numComp,3)] = V5;
1216     o[INDEX3(i,2,1,numComp,3)] = V9;
1217     o[INDEX3(i,0,2,numComp,3)] = V1;
1218     o[INDEX3(i,1,2,numComp,3)] = V4;
1219     o[INDEX3(i,2,2,numComp,3)] = V10;
1220     o[INDEX3(i,0,3,numComp,3)] = V1;
1221     o[INDEX3(i,1,3,numComp,3)] = V5;
1222     o[INDEX3(i,2,3,numComp,3)] = V11;
1223     o[INDEX3(i,0,4,numComp,3)] = V2;
1224     o[INDEX3(i,1,4,numComp,3)] = V6;
1225     o[INDEX3(i,2,4,numComp,3)] = V8;
1226     o[INDEX3(i,0,5,numComp,3)] = V2;
1227     o[INDEX3(i,1,5,numComp,3)] = V7;
1228     o[INDEX3(i,2,5,numComp,3)] = V9;
1229     o[INDEX3(i,0,6,numComp,3)] = V3;
1230     o[INDEX3(i,1,6,numComp,3)] = V6;
1231     o[INDEX3(i,2,6,numComp,3)] = V10;
1232     o[INDEX3(i,0,7,numComp,3)] = V3;
1233     o[INDEX3(i,1,7,numComp,3)] = V7;
1234     o[INDEX3(i,2,7,numComp,3)] = V11;
1235     } // end of component loop i
1236     } // end of k0 loop
1237     } // end of k1 loop
1238     } // end of k2 loop
1239     } // end of parallel section
1240 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1241 caltinay 3760 out.requireWrite();
1242 caltinay 3913 #pragma omp parallel
1243     {
1244     vector<double> f_000(numComp);
1245     vector<double> f_001(numComp);
1246     vector<double> f_010(numComp);
1247     vector<double> f_011(numComp);
1248     vector<double> f_100(numComp);
1249     vector<double> f_101(numComp);
1250     vector<double> f_110(numComp);
1251     vector<double> f_111(numComp);
1252     #pragma omp for
1253 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1254     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1255     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1256     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1257     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1258     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1259     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1260     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1261     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1262     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1263     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1264     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1265 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1266 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1267     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1268     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1269 caltinay 3913 } // end of component loop i
1270     } // end of k0 loop
1271     } // end of k1 loop
1272     } // end of k2 loop
1273     } // end of parallel section
1274 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1275 caltinay 3760 out.requireWrite();
1276 caltinay 3722 #pragma omp parallel
1277     {
1278 caltinay 3913 vector<double> f_000(numComp);
1279     vector<double> f_001(numComp);
1280     vector<double> f_010(numComp);
1281     vector<double> f_011(numComp);
1282     vector<double> f_100(numComp);
1283     vector<double> f_101(numComp);
1284     vector<double> f_110(numComp);
1285     vector<double> f_111(numComp);
1286 caltinay 3722 if (m_faceOffset[0] > -1) {
1287     #pragma omp for nowait
1288 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1289     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1290     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1291     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1292     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1293     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1294     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1295     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1296     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1297     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1298     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1299 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1300 caltinay 4375 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1301     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1302     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1303     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1304     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1305 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1306     o[INDEX3(i,2,0,numComp,3)] = V2;
1307 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1308 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1309     o[INDEX3(i,2,1,numComp,3)] = V3;
1310 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1311 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1312     o[INDEX3(i,2,2,numComp,3)] = V2;
1313 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1314 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1315     o[INDEX3(i,2,3,numComp,3)] = V3;
1316 caltinay 3764 } // end of component loop i
1317     } // end of k1 loop
1318     } // end of k2 loop
1319     } // end of face 0
1320 caltinay 3722 if (m_faceOffset[1] > -1) {
1321     #pragma omp for nowait
1322 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1323     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1324     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1325     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1326     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1327     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1328     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1329     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1330     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1331     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1332     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1333 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1334 caltinay 4375 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1335     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1336     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1337     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1338     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1339 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1340     o[INDEX3(i,2,0,numComp,3)] = V2;
1341 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1342 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1343     o[INDEX3(i,2,1,numComp,3)] = V3;
1344 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1345 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1346     o[INDEX3(i,2,2,numComp,3)] = V2;
1347 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1348 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1349     o[INDEX3(i,2,3,numComp,3)] = V3;
1350 caltinay 3764 } // end of component loop i
1351     } // end of k1 loop
1352     } // end of k2 loop
1353     } // end of face 1
1354 caltinay 3722 if (m_faceOffset[2] > -1) {
1355     #pragma omp for nowait
1356 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1357     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1358     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1359     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1360     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1361     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1362     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1363     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1364     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1365     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1366     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1367 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1368 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1369     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1370     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1371 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1372 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1373 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V1;
1374     o[INDEX3(i,0,1,numComp,3)] = V0;
1375 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1376 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V2;
1377     o[INDEX3(i,0,2,numComp,3)] = V0;
1378 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1379 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V1;
1380     o[INDEX3(i,0,3,numComp,3)] = V0;
1381 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1382 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V2;
1383 caltinay 3764 } // end of component loop i
1384     } // end of k0 loop
1385     } // end of k2 loop
1386     } // end of face 2
1387 caltinay 3722 if (m_faceOffset[3] > -1) {
1388     #pragma omp for nowait
1389 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1390     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1391     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1392     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1393     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1394     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1395     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1396     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1397     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1398     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1399     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1400 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1401 caltinay 4375 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1402     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1403     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1404     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1405 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1406 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1407 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V2;
1408     o[INDEX3(i,0,1,numComp,3)] = V0;
1409 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1410 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V3;
1411     o[INDEX3(i,0,2,numComp,3)] = V1;
1412 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1413 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V2;
1414     o[INDEX3(i,0,3,numComp,3)] = V1;
1415 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1416 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V3;
1417 caltinay 3764 } // end of component loop i
1418     } // end of k0 loop
1419     } // end of k2 loop
1420     } // end of face 3
1421 caltinay 3722 if (m_faceOffset[4] > -1) {
1422     #pragma omp for nowait
1423 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1424     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1425     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1426     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1427     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1428     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1429     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1430     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1431     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1432     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1433     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1434 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1435 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1436     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1437     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1438     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1439 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1440     o[INDEX3(i,1,0,numComp,3)] = V2;
1441 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1442 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1443     o[INDEX3(i,1,1,numComp,3)] = V3;
1444 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1445 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1446     o[INDEX3(i,1,2,numComp,3)] = V2;
1447 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1448 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1449     o[INDEX3(i,1,3,numComp,3)] = V3;
1450 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1451 caltinay 3764 } // end of component loop i
1452     } // end of k0 loop
1453     } // end of k1 loop
1454     } // end of face 4
1455 caltinay 3722 if (m_faceOffset[5] > -1) {
1456     #pragma omp for nowait
1457 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1458     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1459     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1460     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1461     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1462     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1463     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1464     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1465     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1466     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1467     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1468 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1469 caltinay 4375 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1470     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1471     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1472     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1473 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1474     o[INDEX3(i,1,0,numComp,3)] = V2;
1475 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1476 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1477     o[INDEX3(i,1,1,numComp,3)] = V3;
1478 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1479 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1480     o[INDEX3(i,1,2,numComp,3)] = V2;
1481 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1482 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1483     o[INDEX3(i,1,3,numComp,3)] = V3;
1484 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1485 caltinay 3764 } // end of component loop i
1486     } // end of k0 loop
1487     } // end of k1 loop
1488     } // end of face 5
1489 caltinay 3722 } // end of parallel section
1490 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1491 caltinay 3760 out.requireWrite();
1492 caltinay 3722 #pragma omp parallel
1493     {
1494 caltinay 3913 vector<double> f_000(numComp);
1495     vector<double> f_001(numComp);
1496     vector<double> f_010(numComp);
1497     vector<double> f_011(numComp);
1498     vector<double> f_100(numComp);
1499     vector<double> f_101(numComp);
1500     vector<double> f_110(numComp);
1501     vector<double> f_111(numComp);
1502 caltinay 3722 if (m_faceOffset[0] > -1) {
1503     #pragma omp for nowait
1504 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1505     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1506     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1507     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1508     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1509     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1510     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1511     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1512     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1513     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1514     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1515 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1516 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1517     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1518     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1519 caltinay 3764 } // end of component loop i
1520     } // end of k1 loop
1521     } // end of k2 loop
1522     } // end of face 0
1523 caltinay 3722 if (m_faceOffset[1] > -1) {
1524     #pragma omp for nowait
1525 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1526     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1527     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1528     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1529     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1530     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1531     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1532     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1533     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1534     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1535     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1536 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1537 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1538     o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1539     o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1540 caltinay 3764 } // end of component