/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4633 - (hide annotations)
Tue Jan 28 01:04:17 2014 UTC (5 years, 2 months ago) by caltinay
Original Path: trunk/ripley/src/Brick.cpp
File size: 147414 byte(s)
Fixed failure for lazy data when interpolating.

1 caltinay 3691
2 jfenwick 3981 /*****************************************************************************
3 caltinay 3691 *
4 jfenwick 4154 * Copyright (c) 2003-2013 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 caltinay 3691 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12     * Development since 2012 by School of Earth Sciences
13     *
14     *****************************************************************************/
15 caltinay 3691
16     #include <ripley/Brick.h>
17 caltinay 3791 #include <paso/SystemMatrix.h>
18 caltinay 4334 #include <esysUtils/esysFileWriter.h>
19 sshaw 4622 #include <ripley/DefaultAssembler3D.h>
20 sshaw 4629 #include <ripley/WaveAssembler3D.h>
21 caltinay 4477 #include <boost/scoped_array.hpp>
22    
23 caltinay 4013 #ifdef USE_NETCDF
24     #include <netcdfcpp.h>
25     #endif
26    
27 caltinay 3691 #if USE_SILO
28     #include <silo.h>
29     #ifdef ESYS_MPI
30     #include <pmpio.h>
31     #endif
32     #endif
33    
34     #include <iomanip>
35    
36     using namespace std;
37 caltinay 4334 using esysUtils::FileWriter;
38 caltinay 3691
39     namespace ripley {
40    
41 caltinay 3781 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
42 sshaw 4622 double x1, double y1, double z1, int d0, int d1, int d2,
43     const std::vector<double>& points, const std::vector<int>& tags,
44     const std::map<std::string, int>& tagnamestonums) :
45 caltinay 4334 RipleyDomain(3)
46 caltinay 3691 {
47 caltinay 3943 // ignore subdivision parameters for serial run
48     if (m_mpiInfo->size == 1) {
49     d0=1;
50     d1=1;
51     d2=1;
52     }
53     bool warn=false;
54     // if number of subdivisions is non-positive, try to subdivide by the same
55     // ratio as the number of elements
56     if (d0<=0 && d1<=0 && d2<=0) {
57     warn=true;
58 jfenwick 3962 d0=(int)(pow(m_mpiInfo->size*(n0+1)*(n0+1)/((float)(n1+1)*(n2+1)), 1.f/3));
59 caltinay 4010 d0=max(1, d0);
60     d1=max(1, (int)(d0*n1/(float)n0));
61 caltinay 3943 d2=m_mpiInfo->size/(d0*d1);
62     if (d0*d1*d2 != m_mpiInfo->size) {
63     // ratios not the same so leave "smallest" side undivided and try
64     // dividing 2 sides only
65     if (n0>=n1) {
66     if (n1>=n2) {
67     d0=d1=0;
68     d2=1;
69     } else {
70     d0=d2=0;
71     d1=1;
72     }
73     } else {
74     if (n0>=n2) {
75     d0=d1=0;
76     d2=1;
77     } else {
78     d0=1;
79     d1=d2=0;
80     }
81     }
82     }
83     }
84     if (d0<=0 && d1<=0) {
85     warn=true;
86 caltinay 4010 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1))));
87 caltinay 3943 d1=m_mpiInfo->size/d0;
88     if (d0*d1*d2 != m_mpiInfo->size) {
89     // ratios not the same so subdivide side with more elements only
90     if (n0>n1) {
91 caltinay 3944 d0=0;
92 caltinay 3943 d1=1;
93     } else {
94     d0=1;
95 caltinay 3944 d1=0;
96 caltinay 3943 }
97     }
98     } else if (d0<=0 && d2<=0) {
99     warn=true;
100 caltinay 4010 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n2+1))));
101 caltinay 3943 d2=m_mpiInfo->size/d0;
102     if (d0*d1*d2 != m_mpiInfo->size) {
103     // ratios not the same so subdivide side with more elements only
104     if (n0>n2) {
105 caltinay 3944 d0=0;
106 caltinay 3943 d2=1;
107     } else {
108     d0=1;
109 caltinay 3944 d2=0;
110 caltinay 3943 }
111     }
112     } else if (d1<=0 && d2<=0) {
113     warn=true;
114 caltinay 4010 d1=max(1, int(sqrt(m_mpiInfo->size*(n1+1)/(float)(n2+1))));
115 caltinay 3943 d2=m_mpiInfo->size/d1;
116     if (d0*d1*d2 != m_mpiInfo->size) {
117     // ratios not the same so subdivide side with more elements only
118     if (n1>n2) {
119 caltinay 3944 d1=0;
120 caltinay 3943 d2=1;
121     } else {
122     d1=1;
123 caltinay 3944 d2=0;
124 caltinay 3943 }
125     }
126     }
127     if (d0<=0) {
128     // d1,d2 are preset, determine d0
129     d0=m_mpiInfo->size/(d1*d2);
130     } else if (d1<=0) {
131     // d0,d2 are preset, determine d1
132     d1=m_mpiInfo->size/(d0*d2);
133     } else if (d2<=0) {
134     // d0,d1 are preset, determine d2
135     d2=m_mpiInfo->size/(d0*d1);
136     }
137    
138 caltinay 3691 // ensure number of subdivisions is valid and nodes can be distributed
139     // among number of ranks
140 caltinay 4334 if (d0*d1*d2 != m_mpiInfo->size)
141 caltinay 3691 throw RipleyException("Invalid number of spatial subdivisions");
142    
143 caltinay 3943 if (warn) {
144     cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
145     << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
146     }
147 caltinay 3691
148 caltinay 4334 double l0 = x1-x0;
149     double l1 = y1-y0;
150     double l2 = z1-z0;
151     m_dx[0] = l0/n0;
152     m_dx[1] = l1/n1;
153     m_dx[2] = l2/n2;
154    
155     if ((n0+1)%d0 > 0) {
156 caltinay 3943 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
157 caltinay 4334 l0=m_dx[0]*n0;
158 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N0="
159 caltinay 4334 << n0 << ", l0=" << l0 << endl;
160 caltinay 3943 }
161 caltinay 4334 if ((n1+1)%d1 > 0) {
162 caltinay 3943 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
163 caltinay 4334 l1=m_dx[1]*n1;
164 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N1="
165 caltinay 4334 << n1 << ", l1=" << l1 << endl;
166 caltinay 3943 }
167 caltinay 4334 if ((n2+1)%d2 > 0) {
168 caltinay 3943 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
169 caltinay 4334 l2=m_dx[2]*n2;
170 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N2="
171 caltinay 4334 << n2 << ", l2=" << l2 << endl;
172 caltinay 3943 }
173    
174 caltinay 4334 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
175 caltinay 3753 throw RipleyException("Too few elements for the number of ranks");
176    
177 caltinay 4334 m_gNE[0] = n0;
178     m_gNE[1] = n1;
179     m_gNE[2] = n2;
180     m_origin[0] = x0;
181     m_origin[1] = y0;
182     m_origin[2] = z0;
183     m_length[0] = l0;
184     m_length[1] = l1;
185     m_length[2] = l2;
186     m_NX[0] = d0;
187     m_NX[1] = d1;
188     m_NX[2] = d2;
189    
190 caltinay 3753 // local number of elements (including overlap)
191 caltinay 4334 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
192     if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
193     m_NE[0]++;
194     else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
195     m_ownNE[0]--;
196 caltinay 3764
197 caltinay 4334 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
198     if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
199     m_NE[1]++;
200     else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
201     m_ownNE[1]--;
202 caltinay 3764
203 caltinay 4334 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
204     if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
205     m_NE[2]++;
206     else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
207     m_ownNE[2]--;
208 caltinay 3753
209     // local number of nodes
210 caltinay 4334 m_NN[0] = m_NE[0]+1;
211     m_NN[1] = m_NE[1]+1;
212     m_NN[2] = m_NE[2]+1;
213 caltinay 3753
214 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
215 caltinay 4334 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
216     if (m_offset[0] > 0)
217     m_offset[0]--;
218     m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
219     if (m_offset[1] > 0)
220     m_offset[1]--;
221     m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
222     if (m_offset[2] > 0)
223     m_offset[2]--;
224 caltinay 3753
225 caltinay 3691 populateSampleIds();
226 caltinay 3756 createPattern();
227 sshaw 4622
228     assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
229 sshaw 4629 for (map<string, int>::const_iterator i = tagnamestonums.begin();
230     i != tagnamestonums.end(); i++) {
231     setTagMap(i->first, i->second);
232     }
233 sshaw 4622 addPoints(tags.size(), &points[0], &tags[0]);
234 caltinay 3691 }
235    
236    
237     Brick::~Brick()
238     {
239 caltinay 3785 Paso_SystemMatrixPattern_free(m_pattern);
240     Paso_Connector_free(m_connector);
241 sshaw 4622 delete assembler;
242 caltinay 3691 }
243    
244     string Brick::getDescription() const
245     {
246     return "ripley::Brick";
247     }
248    
249     bool Brick::operator==(const AbstractDomain& other) const
250     {
251 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
252     if (o) {
253     return (RipleyDomain::operator==(other) &&
254 caltinay 4334 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
255     && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
256     && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
257     && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
258 caltinay 3744 }
259 caltinay 3691
260     return false;
261     }
262    
263 caltinay 4013 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
264 caltinay 4618 const ReaderParameters& params) const
265 caltinay 4013 {
266     #ifdef USE_NETCDF
267     // check destination function space
268     int myN0, myN1, myN2;
269     if (out.getFunctionSpace().getTypeCode() == Nodes) {
270 caltinay 4334 myN0 = m_NN[0];
271     myN1 = m_NN[1];
272     myN2 = m_NN[2];
273 caltinay 4013 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
274     out.getFunctionSpace().getTypeCode() == ReducedElements) {
275 caltinay 4334 myN0 = m_NE[0];
276     myN1 = m_NE[1];
277     myN2 = m_NE[2];
278 caltinay 4013 } else
279     throw RipleyException("readNcGrid(): invalid function space for output data object");
280    
281 caltinay 4615 if (params.first.size() != 3)
282 caltinay 4013 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
283    
284 caltinay 4615 if (params.numValues.size() != 3)
285 caltinay 4013 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
286    
287 caltinay 4615 if (params.multiplier.size() != 3)
288 caltinay 4277 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
289 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
290     if (params.multiplier[i]<1)
291 caltinay 4277 throw RipleyException("readNcGrid(): all multipliers must be positive");
292    
293 caltinay 4013 // check file existence and size
294     NcFile f(filename.c_str(), NcFile::ReadOnly);
295     if (!f.is_valid())
296     throw RipleyException("readNcGrid(): cannot open file");
297    
298     NcVar* var = f.get_var(varname.c_str());
299     if (!var)
300     throw RipleyException("readNcGrid(): invalid variable name");
301    
302     // TODO: rank>0 data support
303     const int numComp = out.getDataPointSize();
304     if (numComp > 1)
305     throw RipleyException("readNcGrid(): only scalar data supported");
306    
307     const int dims = var->num_dims();
308 caltinay 4477 boost::scoped_array<long> edges(var->edges());
309 caltinay 4013
310     // is this a slice of the data object (dims!=3)?
311     // note the expected ordering of edges (as in numpy: z,y,x)
312 caltinay 4615 if ( (dims==3 && (params.numValues[2] > edges[0] ||
313     params.numValues[1] > edges[1] ||
314     params.numValues[0] > edges[2]))
315     || (dims==2 && params.numValues[2]>1)
316     || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
317 caltinay 4013 throw RipleyException("readNcGrid(): not enough data in file");
318     }
319    
320     // check if this rank contributes anything
321 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
322     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
323     params.first[1] >= m_offset[1]+myN1 ||
324     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
325     params.first[2] >= m_offset[2]+myN2 ||
326     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
327 caltinay 4013 return;
328     }
329    
330     // now determine how much this rank has to write
331    
332     // first coordinates in data object to write to
333 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
334     const int first1 = max(0, params.first[1]-m_offset[1]);
335     const int first2 = max(0, params.first[2]-m_offset[2]);
336 caltinay 4618 // indices to first value in file (not accounting for reverse yet)
337     int idx0 = max(0, m_offset[0]-params.first[0]);
338     int idx1 = max(0, m_offset[1]-params.first[1]);
339     int idx2 = max(0, m_offset[2]-params.first[2]);
340 caltinay 4277 // number of values to read
341 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
342     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
343     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
344 caltinay 4013
345 caltinay 4618 // make sure we read the right block if going backwards through file
346     if (params.reverse[0])
347     idx0 = edges[dims-1]-num0-idx0;
348     if (dims>1 && params.reverse[1])
349     idx1 = edges[dims-2]-num1-idx1;
350     if (dims>2 && params.reverse[2])
351     idx2 = edges[dims-3]-num2-idx2;
352    
353    
354 caltinay 4013 vector<double> values(num0*num1*num2);
355     if (dims==3) {
356     var->set_cur(idx2, idx1, idx0);
357     var->get(&values[0], num2, num1, num0);
358     } else if (dims==2) {
359     var->set_cur(idx1, idx0);
360     var->get(&values[0], num1, num0);
361     } else {
362     var->set_cur(idx0);
363     var->get(&values[0], num0);
364     }
365    
366     const int dpp = out.getNumDataPointsPerSample();
367     out.requireWrite();
368    
369 caltinay 4618 // helpers for reversing
370     const int x0 = (params.reverse[0] ? num0-1 : 0);
371     const int x_mult = (params.reverse[0] ? -1 : 1);
372     const int y0 = (params.reverse[1] ? num1-1 : 0);
373     const int y_mult = (params.reverse[1] ? -1 : 1);
374     const int z0 = (params.reverse[2] ? num2-1 : 0);
375     const int z_mult = (params.reverse[2] ? -1 : 1);
376    
377 caltinay 4013 for (index_t z=0; z<num2; z++) {
378     for (index_t y=0; y<num1; y++) {
379     #pragma omp parallel for
380     for (index_t x=0; x<num0; x++) {
381 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
382     +(first1+y*params.multiplier[1])*myN0
383     +(first2+z*params.multiplier[2])*myN0*myN1;
384 caltinay 4618 const int srcIndex=(z0+z_mult*z)*num1*num0
385     +(y0+y_mult*y)*num0
386     +(x0+x_mult*x);
387 caltinay 4174 if (!isnan(values[srcIndex])) {
388 caltinay 4615 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
389     for (index_t m1=0; m1<params.multiplier[1]; m1++) {
390     for (index_t m0=0; m0<params.multiplier[0]; m0++) {
391 caltinay 4277 const int dataIndex = baseIndex+m0
392     +m1*myN0
393     +m2*myN0*myN1;
394     double* dest = out.getSampleDataRW(dataIndex);
395     for (index_t q=0; q<dpp; q++) {
396     *dest++ = values[srcIndex];
397     }
398     }
399     }
400 caltinay 4174 }
401 caltinay 4013 }
402     }
403     }
404     }
405     #else
406     throw RipleyException("readNcGrid(): not compiled with netCDF support");
407     #endif
408     }
409    
410 caltinay 4334 void Brick::readBinaryGrid(escript::Data& out, string filename,
411 caltinay 4618 const ReaderParameters& params) const
412 caltinay 4334 {
413 caltinay 4495 // the mapping is not universally correct but should work on our
414     // supported platforms
415 caltinay 4615 switch (params.dataType) {
416 caltinay 4495 case DATATYPE_INT32:
417 caltinay 4615 readBinaryGridImpl<int>(out, filename, params);
418 caltinay 4495 break;
419     case DATATYPE_FLOAT32:
420 caltinay 4615 readBinaryGridImpl<float>(out, filename, params);
421 caltinay 4495 break;
422     case DATATYPE_FLOAT64:
423 caltinay 4615 readBinaryGridImpl<double>(out, filename, params);
424 caltinay 4495 break;
425     default:
426     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
427     }
428     }
429    
430     template<typename ValueType>
431     void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
432 caltinay 4618 const ReaderParameters& params) const
433 caltinay 4495 {
434 caltinay 4334 // check destination function space
435     int myN0, myN1, myN2;
436     if (out.getFunctionSpace().getTypeCode() == Nodes) {
437     myN0 = m_NN[0];
438     myN1 = m_NN[1];
439     myN2 = m_NN[2];
440     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
441     out.getFunctionSpace().getTypeCode() == ReducedElements) {
442     myN0 = m_NE[0];
443     myN1 = m_NE[1];
444     myN2 = m_NE[2];
445     } else
446     throw RipleyException("readBinaryGrid(): invalid function space for output data object");
447    
448 caltinay 4615 if (params.first.size() != 3)
449 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
450    
451 caltinay 4615 if (params.numValues.size() != 3)
452 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
453    
454 caltinay 4615 if (params.multiplier.size() != 3)
455 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
456 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
457     if (params.multiplier[i]<1)
458 caltinay 4334 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
459    
460     // check file existence and size
461     ifstream f(filename.c_str(), ifstream::binary);
462     if (f.fail()) {
463     throw RipleyException("readBinaryGrid(): cannot open file");
464     }
465     f.seekg(0, ios::end);
466     const int numComp = out.getDataPointSize();
467     const int filesize = f.tellg();
468 caltinay 4615 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
469 caltinay 4334 if (filesize < reqsize) {
470     f.close();
471     throw RipleyException("readBinaryGrid(): not enough data in file");
472     }
473    
474     // check if this rank contributes anything
475 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
476     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
477     params.first[1] >= m_offset[1]+myN1 ||
478     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
479     params.first[2] >= m_offset[2]+myN2 ||
480     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
481 caltinay 4334 f.close();
482     return;
483     }
484    
485     // now determine how much this rank has to write
486    
487     // first coordinates in data object to write to
488 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
489     const int first1 = max(0, params.first[1]-m_offset[1]);
490     const int first2 = max(0, params.first[2]-m_offset[2]);
491 caltinay 4334 // indices to first value in file
492 caltinay 4615 const int idx0 = max(0, m_offset[0]-params.first[0]);
493     const int idx1 = max(0, m_offset[1]-params.first[1]);
494     const int idx2 = max(0, m_offset[2]-params.first[2]);
495 caltinay 4334 // number of values to read
496 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
497     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
498     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
499 caltinay 4334
500     out.requireWrite();
501 caltinay 4495 vector<ValueType> values(num0*numComp);
502 caltinay 4334 const int dpp = out.getNumDataPointsPerSample();
503    
504 caltinay 4529 for (int z=0; z<num2; z++) {
505     for (int y=0; y<num1; y++) {
506 caltinay 4615 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
507     +(idx2+z)*params.numValues[0]*params.numValues[1]);
508 caltinay 4495 f.seekg(fileofs*sizeof(ValueType));
509     f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
510 caltinay 4334
511 caltinay 4529 for (int x=0; x<num0; x++) {
512 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
513     +(first1+y*params.multiplier[1])*myN0
514     +(first2+z*params.multiplier[2])*myN0*myN1;
515     for (int m2=0; m2<params.multiplier[2]; m2++) {
516     for (int m1=0; m1<params.multiplier[1]; m1++) {
517     for (int m0=0; m0<params.multiplier[0]; m0++) {
518 caltinay 4334 const int dataIndex = baseIndex+m0
519     +m1*myN0
520     +m2*myN0*myN1;
521     double* dest = out.getSampleDataRW(dataIndex);
522 caltinay 4529 for (int c=0; c<numComp; c++) {
523     ValueType val = values[x*numComp+c];
524    
525 caltinay 4615 if (params.byteOrder != BYTEORDER_NATIVE) {
526 caltinay 4529 char* cval = reinterpret_cast<char*>(&val);
527     // this will alter val!!
528     byte_swap32(cval);
529     }
530     if (!std::isnan(val)) {
531     for (int q=0; q<dpp; q++) {
532     *dest++ = static_cast<double>(val);
533 caltinay 4334 }
534     }
535     }
536     }
537     }
538     }
539     }
540     }
541     }
542    
543     f.close();
544     }
545    
546 caltinay 4357 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
547     int byteOrder, int dataType) const
548 caltinay 4334 {
549 caltinay 4357 // the mapping is not universally correct but should work on our
550     // supported platforms
551     switch (dataType) {
552     case DATATYPE_INT32:
553     writeBinaryGridImpl<int>(in, filename, byteOrder);
554     break;
555     case DATATYPE_FLOAT32:
556     writeBinaryGridImpl<float>(in, filename, byteOrder);
557     break;
558     case DATATYPE_FLOAT64:
559     writeBinaryGridImpl<double>(in, filename, byteOrder);
560     break;
561     default:
562     throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
563     }
564     }
565    
566     template<typename ValueType>
567     void Brick::writeBinaryGridImpl(const escript::Data& in,
568     const string& filename, int byteOrder) const
569     {
570 caltinay 4334 // check function space and determine number of points
571     int myN0, myN1, myN2;
572     int totalN0, totalN1, totalN2;
573     if (in.getFunctionSpace().getTypeCode() == Nodes) {
574     myN0 = m_NN[0];
575     myN1 = m_NN[1];
576     myN2 = m_NN[2];
577     totalN0 = m_gNE[0]+1;
578     totalN1 = m_gNE[1]+1;
579     totalN2 = m_gNE[2]+1;
580     } else if (in.getFunctionSpace().getTypeCode() == Elements ||
581     in.getFunctionSpace().getTypeCode() == ReducedElements) {
582     myN0 = m_NE[0];
583     myN1 = m_NE[1];
584     myN2 = m_NE[2];
585     totalN0 = m_gNE[0];
586     totalN1 = m_gNE[1];
587     totalN2 = m_gNE[2];
588     } else
589     throw RipleyException("writeBinaryGrid(): invalid function space of data object");
590    
591     const int numComp = in.getDataPointSize();
592     const int dpp = in.getNumDataPointsPerSample();
593 caltinay 4357 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
594 caltinay 4334
595     if (numComp > 1 || dpp > 1)
596     throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
597    
598     // from here on we know that each sample consists of one value
599 caltinay 4482 FileWriter fw;
600     fw.openFile(filename, fileSize);
601 caltinay 4334 MPIBarrier();
602    
603     for (index_t z=0; z<myN2; z++) {
604     for (index_t y=0; y<myN1; y++) {
605     const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
606 caltinay 4357 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
607 caltinay 4334 ostringstream oss;
608    
609     for (index_t x=0; x<myN0; x++) {
610 caltinay 4626 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
611 caltinay 4357 ValueType fvalue = static_cast<ValueType>(*sample);
612     if (byteOrder == BYTEORDER_NATIVE) {
613 caltinay 4334 oss.write((char*)&fvalue, sizeof(fvalue));
614     } else {
615     char* value = reinterpret_cast<char*>(&fvalue);
616 caltinay 4357 oss.write(byte_swap32(value), sizeof(fvalue));
617 caltinay 4334 }
618     }
619 caltinay 4482 fw.writeAt(oss, fileofs);
620 caltinay 4334 }
621     }
622 caltinay 4482 fw.close();
623 caltinay 4334 }
624    
625 caltinay 3691 void Brick::dump(const string& fileName) const
626     {
627     #if USE_SILO
628     string fn(fileName);
629     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
630     fn+=".silo";
631     }
632    
633     int driver=DB_HDF5;
634     string siloPath;
635     DBfile* dbfile = NULL;
636    
637     #ifdef ESYS_MPI
638     PMPIO_baton_t* baton = NULL;
639 gross 3793 const int NUM_SILO_FILES = 1;
640     const char* blockDirFmt = "/block%04d";
641 caltinay 3691 #endif
642    
643     if (m_mpiInfo->size > 1) {
644     #ifdef ESYS_MPI
645     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
646     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
647     PMPIO_DefaultClose, (void*)&driver);
648     // try the fallback driver in case of error
649     if (!baton && driver != DB_PDB) {
650     driver = DB_PDB;
651     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
652     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
653     PMPIO_DefaultClose, (void*)&driver);
654     }
655     if (baton) {
656     char str[64];
657     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
658     siloPath = str;
659     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
660     }
661     #endif
662     } else {
663     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
664     getDescription().c_str(), driver);
665     // try the fallback driver in case of error
666     if (!dbfile && driver != DB_PDB) {
667     driver = DB_PDB;
668     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
669     getDescription().c_str(), driver);
670     }
671     }
672    
673     if (!dbfile)
674     throw RipleyException("dump: Could not create Silo file");
675    
676     /*
677     if (driver==DB_HDF5) {
678     // gzip level 1 already provides good compression with minimal
679     // performance penalty. Some tests showed that gzip levels >3 performed
680     // rather badly on escript data both in terms of time and space
681     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
682     }
683     */
684    
685 caltinay 4334 boost::scoped_ptr<double> x(new double[m_NN[0]]);
686     boost::scoped_ptr<double> y(new double[m_NN[1]]);
687     boost::scoped_ptr<double> z(new double[m_NN[2]]);
688 caltinay 3691 double* coords[3] = { x.get(), y.get(), z.get() };
689     #pragma omp parallel
690     {
691     #pragma omp for
692 caltinay 4334 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
693     coords[0][i0]=getLocalCoordinate(i0, 0);
694 caltinay 3691 }
695     #pragma omp for
696 caltinay 4334 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
697     coords[1][i1]=getLocalCoordinate(i1, 1);
698 caltinay 3691 }
699     #pragma omp for
700 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
701     coords[2][i2]=getLocalCoordinate(i2, 2);
702 caltinay 3691 }
703     }
704 caltinay 4334 int* dims = const_cast<int*>(getNumNodesPerDim());
705    
706     // write mesh
707     DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
708 caltinay 3691 DB_COLLINEAR, NULL);
709    
710 caltinay 4334 // write node ids
711     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
712 caltinay 3698 NULL, 0, DB_INT, DB_NODECENT, NULL);
713 caltinay 3691
714 caltinay 3698 // write element ids
715 caltinay 4334 dims = const_cast<int*>(getNumElementsPerDim());
716 caltinay 3698 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
717 caltinay 4334 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
718 caltinay 3698
719     // rank 0 writes multimesh and multivar
720 caltinay 3691 if (m_mpiInfo->rank == 0) {
721     vector<string> tempstrings;
722     vector<char*> names;
723     for (dim_t i=0; i<m_mpiInfo->size; i++) {
724     stringstream path;
725     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
726     tempstrings.push_back(path.str());
727     names.push_back((char*)tempstrings.back().c_str());
728     }
729     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
730     DBSetDir(dbfile, "/");
731     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
732     &types[0], NULL);
733     tempstrings.clear();
734     names.clear();
735     for (dim_t i=0; i<m_mpiInfo->size; i++) {
736     stringstream path;
737     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
738     tempstrings.push_back(path.str());
739     names.push_back((char*)tempstrings.back().c_str());
740     }
741     types.assign(m_mpiInfo->size, DB_QUADVAR);
742     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
743     &types[0], NULL);
744 caltinay 3698 tempstrings.clear();
745     names.clear();
746     for (dim_t i=0; i<m_mpiInfo->size; i++) {
747     stringstream path;
748     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
749     tempstrings.push_back(path.str());
750     names.push_back((char*)tempstrings.back().c_str());
751     }
752     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
753     &types[0], NULL);
754 caltinay 3691 }
755    
756     if (m_mpiInfo->size > 1) {
757     #ifdef ESYS_MPI
758     PMPIO_HandOffBaton(baton, dbfile);
759     PMPIO_Finish(baton);
760     #endif
761     } else {
762     DBClose(dbfile);
763     }
764    
765     #else // USE_SILO
766 caltinay 3791 throw RipleyException("dump: no Silo support");
767 caltinay 3691 #endif
768     }
769    
770     const int* Brick::borrowSampleReferenceIDs(int fsType) const
771     {
772 caltinay 3697 switch (fsType) {
773     case Nodes:
774 caltinay 3748 case ReducedNodes: //FIXME: reduced
775 caltinay 3753 return &m_nodeId[0];
776 caltinay 3757 case DegreesOfFreedom:
777     case ReducedDegreesOfFreedom: //FIXME: reduced
778 caltinay 3753 return &m_dofId[0];
779 caltinay 3697 case Elements:
780 caltinay 3733 case ReducedElements:
781 caltinay 3697 return &m_elementId[0];
782 caltinay 3757 case FaceElements:
783 caltinay 3733 case ReducedFaceElements:
784 caltinay 3697 return &m_faceId[0];
785     default:
786     break;
787     }
788 caltinay 3691
789 caltinay 3697 stringstream msg;
790 caltinay 3791 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
791 caltinay 3697 throw RipleyException(msg.str());
792 caltinay 3691 }
793    
794 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
795 caltinay 3691 {
796 caltinay 3759 if (getMPISize()==1)
797     return true;
798    
799 caltinay 3757 switch (fsType) {
800     case Nodes:
801     case ReducedNodes: //FIXME: reduced
802     return (m_dofMap[id] < getNumDOF());
803     case DegreesOfFreedom:
804     case ReducedDegreesOfFreedom:
805     return true;
806     case Elements:
807     case ReducedElements:
808     {
809     // check ownership of element's _last_ node
810 caltinay 4334 const index_t x=id%m_NE[0] + 1;
811     const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
812     const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
813     return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
814 caltinay 3757 }
815     case FaceElements:
816     case ReducedFaceElements:
817 caltinay 3759 {
818     // check ownership of face element's last node
819     dim_t n=0;
820 caltinay 4334 for (size_t i=0; i<6; i++) {
821     n+=m_faceCount[i];
822 caltinay 3759 if (id<n) {
823 caltinay 4334 const index_t j=id-n+m_faceCount[i];
824 caltinay 3759 if (i>=4) { // front or back
825 caltinay 4334 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
826     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
827 caltinay 3759 } else if (i>=2) { // bottom or top
828 caltinay 4334 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
829     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
830 caltinay 3759 } else { // left or right
831 caltinay 4334 const index_t first=(i==0 ? 0 : m_NN[0]-1);
832     return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
833 caltinay 3759 }
834     }
835     }
836     return false;
837     }
838 caltinay 3757 default:
839     break;
840 caltinay 3753 }
841 caltinay 3757
842     stringstream msg;
843 caltinay 3791 msg << "ownSample: invalid function space type " << fsType;
844 caltinay 3757 throw RipleyException(msg.str());
845 caltinay 3691 }
846    
847 caltinay 3764 void Brick::setToNormal(escript::Data& out) const
848 caltinay 3703 {
849 caltinay 3764 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
850     out.requireWrite();
851     #pragma omp parallel
852     {
853     if (m_faceOffset[0] > -1) {
854     #pragma omp for nowait
855 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
856     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
857     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
858 caltinay 3764 // set vector at four quadrature points
859     *o++ = -1.; *o++ = 0.; *o++ = 0.;
860     *o++ = -1.; *o++ = 0.; *o++ = 0.;
861     *o++ = -1.; *o++ = 0.; *o++ = 0.;
862     *o++ = -1.; *o++ = 0.; *o = 0.;
863     }
864     }
865     }
866    
867     if (m_faceOffset[1] > -1) {
868     #pragma omp for nowait
869 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
870     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
871     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
872 caltinay 3764 // set vector at four quadrature points
873     *o++ = 1.; *o++ = 0.; *o++ = 0.;
874     *o++ = 1.; *o++ = 0.; *o++ = 0.;
875     *o++ = 1.; *o++ = 0.; *o++ = 0.;
876     *o++ = 1.; *o++ = 0.; *o = 0.;
877     }
878     }
879     }
880    
881     if (m_faceOffset[2] > -1) {
882     #pragma omp for nowait
883 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
884     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
885     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
886 caltinay 3764 // set vector at four quadrature points
887     *o++ = 0.; *o++ = -1.; *o++ = 0.;
888     *o++ = 0.; *o++ = -1.; *o++ = 0.;
889     *o++ = 0.; *o++ = -1.; *o++ = 0.;
890     *o++ = 0.; *o++ = -1.; *o = 0.;
891     }
892     }
893     }
894    
895     if (m_faceOffset[3] > -1) {
896     #pragma omp for nowait
897 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
898     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
899     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
900 caltinay 3764 // set vector at four quadrature points
901     *o++ = 0.; *o++ = 1.; *o++ = 0.;
902     *o++ = 0.; *o++ = 1.; *o++ = 0.;
903     *o++ = 0.; *o++ = 1.; *o++ = 0.;
904     *o++ = 0.; *o++ = 1.; *o = 0.;
905     }
906     }
907     }
908    
909     if (m_faceOffset[4] > -1) {
910     #pragma omp for nowait
911 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
912     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
913     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
914 caltinay 3764 // set vector at four quadrature points
915     *o++ = 0.; *o++ = 0.; *o++ = -1.;
916     *o++ = 0.; *o++ = 0.; *o++ = -1.;
917     *o++ = 0.; *o++ = 0.; *o++ = -1.;
918     *o++ = 0.; *o++ = 0.; *o = -1.;
919     }
920     }
921     }
922    
923     if (m_faceOffset[5] > -1) {
924     #pragma omp for nowait
925 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
926     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
927     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
928 caltinay 3764 // set vector at four quadrature points
929     *o++ = 0.; *o++ = 0.; *o++ = 1.;
930     *o++ = 0.; *o++ = 0.; *o++ = 1.;
931     *o++ = 0.; *o++ = 0.; *o++ = 1.;
932     *o++ = 0.; *o++ = 0.; *o = 1.;
933     }
934     }
935     }
936     } // end of parallel section
937     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
938     out.requireWrite();
939     #pragma omp parallel
940     {
941     if (m_faceOffset[0] > -1) {
942     #pragma omp for nowait
943 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
944     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
945     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
946 caltinay 3764 *o++ = -1.;
947     *o++ = 0.;
948     *o = 0.;
949     }
950     }
951     }
952    
953     if (m_faceOffset[1] > -1) {
954     #pragma omp for nowait
955 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
956     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
957     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
958 caltinay 3764 *o++ = 1.;
959     *o++ = 0.;
960     *o = 0.;
961     }
962     }
963     }
964    
965     if (m_faceOffset[2] > -1) {
966     #pragma omp for nowait
967 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
968     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
969     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
970 caltinay 3764 *o++ = 0.;
971     *o++ = -1.;
972     *o = 0.;
973     }
974     }
975     }
976    
977     if (m_faceOffset[3] > -1) {
978     #pragma omp for nowait
979 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
980     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
981     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
982 caltinay 3764 *o++ = 0.;
983     *o++ = 1.;
984     *o = 0.;
985     }
986     }
987     }
988    
989     if (m_faceOffset[4] > -1) {
990     #pragma omp for nowait
991 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
992     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
993     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
994 caltinay 3764 *o++ = 0.;
995     *o++ = 0.;
996     *o = -1.;
997     }
998     }
999     }
1000    
1001     if (m_faceOffset[5] > -1) {
1002     #pragma omp for nowait
1003 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1004     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1005     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1006 caltinay 3764 *o++ = 0.;
1007     *o++ = 0.;
1008     *o = 1.;
1009     }
1010     }
1011     }
1012     } // end of parallel section
1013    
1014     } else {
1015     stringstream msg;
1016 caltinay 3791 msg << "setToNormal: invalid function space type "
1017     << out.getFunctionSpace().getTypeCode();
1018 caltinay 3764 throw RipleyException(msg.str());
1019     }
1020     }
1021    
1022     void Brick::setToSize(escript::Data& out) const
1023     {
1024     if (out.getFunctionSpace().getTypeCode() == Elements
1025     || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1026     out.requireWrite();
1027     const dim_t numQuad=out.getNumDataPointsPerSample();
1028 caltinay 4334 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1029 caltinay 3764 #pragma omp parallel for
1030     for (index_t k = 0; k < getNumElements(); ++k) {
1031     double* o = out.getSampleDataRW(k);
1032     fill(o, o+numQuad, size);
1033     }
1034     } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1035     || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1036     out.requireWrite();
1037     const dim_t numQuad=out.getNumDataPointsPerSample();
1038     #pragma omp parallel
1039     {
1040     if (m_faceOffset[0] > -1) {
1041 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1042 caltinay 3764 #pragma omp for nowait
1043 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1044     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1045     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1046 caltinay 3764 fill(o, o+numQuad, size);
1047     }
1048     }
1049     }
1050    
1051     if (m_faceOffset[1] > -1) {
1052 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1053 caltinay 3764 #pragma omp for nowait
1054 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1055     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1056     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1057 caltinay 3764 fill(o, o+numQuad, size);
1058     }
1059     }
1060     }
1061    
1062     if (m_faceOffset[2] > -1) {
1063 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1064 caltinay 3764 #pragma omp for nowait
1065 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1066     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1067     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1068 caltinay 3764 fill(o, o+numQuad, size);
1069     }
1070     }
1071     }
1072    
1073     if (m_faceOffset[3] > -1) {
1074 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1075 caltinay 3764 #pragma omp for nowait
1076 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1077     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1078     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1079 caltinay 3764 fill(o, o+numQuad, size);
1080     }
1081     }
1082     }
1083    
1084     if (m_faceOffset[4] > -1) {
1085 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1086 caltinay 3764 #pragma omp for nowait
1087 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1088     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1089     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1090 caltinay 3764 fill(o, o+numQuad, size);
1091     }
1092     }
1093     }
1094    
1095     if (m_faceOffset[5] > -1) {
1096 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1097 caltinay 3764 #pragma omp for nowait
1098 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1099     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1100     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1101 caltinay 3764 fill(o, o+numQuad, size);
1102     }
1103     }
1104     }
1105     } // end of parallel section
1106    
1107     } else {
1108     stringstream msg;
1109 caltinay 3791 msg << "setToSize: invalid function space type "
1110     << out.getFunctionSpace().getTypeCode();
1111 caltinay 3764 throw RipleyException(msg.str());
1112     }
1113     }
1114    
1115     void Brick::Print_Mesh_Info(const bool full) const
1116     {
1117     RipleyDomain::Print_Mesh_Info(full);
1118     if (full) {
1119     cout << " Id Coordinates" << endl;
1120     cout.precision(15);
1121     cout.setf(ios::scientific, ios::floatfield);
1122     for (index_t i=0; i < getNumNodes(); i++) {
1123     cout << " " << setw(5) << m_nodeId[i]
1124 caltinay 4334 << " " << getLocalCoordinate(i%m_NN[0], 0)
1125     << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1126     << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1127 caltinay 3764 }
1128     }
1129     }
1130    
1131    
1132     //protected
1133     void Brick::assembleCoordinates(escript::Data& arg) const
1134     {
1135     escriptDataC x = arg.getDataC();
1136     int numDim = m_numDim;
1137     if (!isDataPointShapeEqual(&x, 1, &numDim))
1138     throw RipleyException("setToX: Invalid Data object shape");
1139     if (!numSamplesEqual(&x, 1, getNumNodes()))
1140     throw RipleyException("setToX: Illegal number of samples in Data object");
1141    
1142     arg.requireWrite();
1143     #pragma omp parallel for
1144 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1145     for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1146     for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1147     double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1148     point[0] = getLocalCoordinate(i0, 0);
1149     point[1] = getLocalCoordinate(i1, 1);
1150     point[2] = getLocalCoordinate(i2, 2);
1151 caltinay 3764 }
1152     }
1153     }
1154     }
1155    
1156     //protected
1157 caltinay 4626 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1158 caltinay 3764 {
1159 caltinay 3703 const dim_t numComp = in.getDataPointSize();
1160 caltinay 3731 const double C0 = .044658198738520451079;
1161     const double C1 = .16666666666666666667;
1162     const double C2 = .21132486540518711775;
1163     const double C3 = .25;
1164     const double C4 = .5;
1165     const double C5 = .62200846792814621559;
1166     const double C6 = .78867513459481288225;
1167    
1168 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
1169 caltinay 3760 out.requireWrite();
1170 caltinay 3913 #pragma omp parallel
1171     {
1172     vector<double> f_000(numComp);
1173     vector<double> f_001(numComp);
1174     vector<double> f_010(numComp);
1175     vector<double> f_011(numComp);
1176     vector<double> f_100(numComp);
1177     vector<double> f_101(numComp);
1178     vector<double> f_110(numComp);
1179     vector<double> f_111(numComp);
1180     #pragma omp for
1181 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1182     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1183     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1184     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1185     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1186     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1187     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1188     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1189     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1190     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1191     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1192     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1193 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1194 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1195     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1196     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1197     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1198     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1199     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1200     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1201     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1202     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1203     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1204     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1205     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1206 caltinay 3913 o[INDEX3(i,0,0,numComp,3)] = V0;
1207     o[INDEX3(i,1,0,numComp,3)] = V4;
1208     o[INDEX3(i,2,0,numComp,3)] = V8;
1209     o[INDEX3(i,0,1,numComp,3)] = V0;
1210     o[INDEX3(i,1,1,numComp,3)] = V5;
1211     o[INDEX3(i,2,1,numComp,3)] = V9;
1212     o[INDEX3(i,0,2,numComp,3)] = V1;
1213     o[INDEX3(i,1,2,numComp,3)] = V4;
1214     o[INDEX3(i,2,2,numComp,3)] = V10;
1215     o[INDEX3(i,0,3,numComp,3)] = V1;
1216     o[INDEX3(i,1,3,numComp,3)] = V5;
1217     o[INDEX3(i,2,3,numComp,3)] = V11;
1218     o[INDEX3(i,0,4,numComp,3)] = V2;
1219     o[INDEX3(i,1,4,numComp,3)] = V6;
1220     o[INDEX3(i,2,4,numComp,3)] = V8;
1221     o[INDEX3(i,0,5,numComp,3)] = V2;
1222     o[INDEX3(i,1,5,numComp,3)] = V7;
1223     o[INDEX3(i,2,5,numComp,3)] = V9;
1224     o[INDEX3(i,0,6,numComp,3)] = V3;
1225     o[INDEX3(i,1,6,numComp,3)] = V6;
1226     o[INDEX3(i,2,6,numComp,3)] = V10;
1227     o[INDEX3(i,0,7,numComp,3)] = V3;
1228     o[INDEX3(i,1,7,numComp,3)] = V7;
1229     o[INDEX3(i,2,7,numComp,3)] = V11;
1230     } // end of component loop i
1231     } // end of k0 loop
1232     } // end of k1 loop
1233     } // end of k2 loop
1234     } // end of parallel section
1235 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1236 caltinay 3760 out.requireWrite();
1237 caltinay 3913 #pragma omp parallel
1238     {
1239     vector<double> f_000(numComp);
1240     vector<double> f_001(numComp);
1241     vector<double> f_010(numComp);
1242     vector<double> f_011(numComp);
1243     vector<double> f_100(numComp);
1244     vector<double> f_101(numComp);
1245     vector<double> f_110(numComp);
1246     vector<double> f_111(numComp);
1247     #pragma omp for
1248 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1249     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1250     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1251     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1252     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1253     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1254     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1255     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1256     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1257     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1258     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1259     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1260 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1261 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1262     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1263     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1264 caltinay 3913 } // end of component loop i
1265     } // end of k0 loop
1266     } // end of k1 loop
1267     } // end of k2 loop
1268     } // end of parallel section
1269 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1270 caltinay 3760 out.requireWrite();
1271 caltinay 3722 #pragma omp parallel
1272     {
1273 caltinay 3913 vector<double> f_000(numComp);
1274     vector<double> f_001(numComp);
1275     vector<double> f_010(numComp);
1276     vector<double> f_011(numComp);
1277     vector<double> f_100(numComp);
1278     vector<double> f_101(numComp);
1279     vector<double> f_110(numComp);
1280     vector<double> f_111(numComp);
1281 caltinay 3722 if (m_faceOffset[0] > -1) {
1282     #pragma omp for nowait
1283 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1284     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1285     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1286     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1287     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1288     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1289     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1290     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1291     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1292     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1293     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1294 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1295 caltinay 4375 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1296     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1297     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1298     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1299     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1300 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1301     o[INDEX3(i,2,0,numComp,3)] = V2;
1302 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1303 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1304     o[INDEX3(i,2,1,numComp,3)] = V3;
1305 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1306 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1307     o[INDEX3(i,2,2,numComp,3)] = V2;
1308 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1309 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1310     o[INDEX3(i,2,3,numComp,3)] = V3;
1311 caltinay 3764 } // end of component loop i
1312     } // end of k1 loop
1313     } // end of k2 loop
1314     } // end of face 0
1315 caltinay 3722 if (m_faceOffset[1] > -1) {
1316     #pragma omp for nowait
1317 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1318     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1319     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1320     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1321     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1322     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1323     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1324     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1325     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1326     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1327     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1328 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1329 caltinay 4375 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1330     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1331     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1332     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1333     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1334 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1335     o[INDEX3(i,2,0,numComp,3)] = V2;
1336 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1337 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1338     o[INDEX3(i,2,1,numComp,3)] = V3;
1339 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1340 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1341     o[INDEX3(i,2,2,numComp,3)] = V2;
1342 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1343 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1344     o[INDEX3(i,2,3,numComp,3)] = V3;
1345 caltinay 3764 } // end of component loop i
1346     } // end of k1 loop
1347     } // end of k2 loop
1348     } // end of face 1
1349 caltinay 3722 if (m_faceOffset[2] > -1) {
1350     #pragma omp for nowait
1351 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1352     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1353     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1354     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1355     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1356     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1357     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1358     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1359     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1360     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1361     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1362 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1363 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1364     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1365     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1366 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1367 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1368 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V1;
1369     o[INDEX3(i,0,1,numComp,3)] = V0;
1370 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1371 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V2;
1372     o[INDEX3(i,0,2,numComp,3)] = V0;
1373 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1374 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V1;
1375     o[INDEX3(i,0,3,numComp,3)] = V0;
1376 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1377 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V2;
1378 caltinay 3764 } // end of component loop i
1379     } // end of k0 loop
1380     } // end of k2 loop
1381     } // end of face 2
1382 caltinay 3722 if (m_faceOffset[3] > -1) {
1383     #pragma omp for nowait
1384 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1385     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1386     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1387     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1388     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1389     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1390     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1391     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1392     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1393     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1394     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1395 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1396 caltinay 4375 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1397     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1398     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1399     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1400 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1401 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1402 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V2;
1403     o[INDEX3(i,0,1,numComp,3)] = V0;
1404 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1405 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V3;
1406     o[INDEX3(i,0,2,numComp,3)] = V1;
1407 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1408 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V2;
1409     o[INDEX3(i,0,3,numComp,3)] = V1;
1410 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1411 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V3;
1412 caltinay 3764 } // end of component loop i
1413     } // end of k0 loop
1414     } // end of k2 loop
1415     } // end of face 3
1416 caltinay 3722 if (m_faceOffset[4] > -1) {
1417     #pragma omp for nowait
1418 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1419     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1420     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1421     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1422     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1423     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1424     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1425     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1426     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1427     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1428     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1429 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1430 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1431     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1432     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1433     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1434 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1435     o[INDEX3(i,1,0,numComp,3)] = V2;
1436 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1437 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1438     o[INDEX3(i,1,1,numComp,3)] = V3;
1439 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1440 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1441     o[INDEX3(i,1,2,numComp,3)] = V2;
1442 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1443 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1444     o[INDEX3(i,1,3,numComp,3)] = V3;
1445 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1446 caltinay 3764 } // end of component loop i
1447     } // end of k0 loop
1448     } // end of k1 loop
1449     } // end of face 4
1450 caltinay 3722 if (m_faceOffset[5] > -1) {
1451     #pragma omp for nowait
1452 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1453     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1454     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1455     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1456     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1457     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1458     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1459     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1460     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1461     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1462     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1463 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1464 caltinay 4375 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1465     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1466     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1467     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1468 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1469     o[INDEX3(i,1,0,numComp,3)] = V2;
1470 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1471 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1472     o[INDEX3(i,1,1,numComp,3)] = V3;
1473 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1474 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1475     o[INDEX3(i,1,2,numComp,3)] = V2;
1476 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1477 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1478     o[INDEX3(i,1,3,numComp,3)] = V3;
1479 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1480 caltinay 3764 } // end of component loop i
1481     } // end of k0 loop
1482     } // end of k1 loop
1483     } // end of face 5
1484 caltinay 3722 } // end of parallel section
1485 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1486 caltinay 3760 out.requireWrite();
1487 caltinay 3722 #pragma omp parallel
1488     {
1489 caltinay 3913 vector<double> f_000(numComp);
1490     vector<double> f_001(numComp);
1491     vector<double> f_010(numComp);
1492     vector<double> f_011(numComp);
1493     vector<double> f_100(numComp);
1494     vector<double> f_101(numComp);
1495     vector<double> f_110(numComp);
1496     vector<double> f_111(numComp);
1497 caltinay 3722 if (m_faceOffset[0] > -1) {
1498     #pragma omp for nowait
1499 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1500     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1501     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1502     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1503     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1504     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1505     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1506     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1507     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1508     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1509     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1510 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1511 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1512     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1513     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1514 caltinay 3764 } // end of component loop i
1515     } // end of k1 loop
1516     } // end of k2 loop
1517     } // end of face 0
1518 caltinay 3722 if (m_faceOffset[1] > -1) {
1519     #pragma omp for nowait
1520 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1521     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1522     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1523     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1524     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1525     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1526     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1527     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1528     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1529     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1530     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1531 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1532 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1533     o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1534     o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1535 caltinay 3764 } // end of component loop i
1536     } // end of k1 loop
1537     } // end of k2 loop
1538     } // end of face 1
1539 caltinay 3722 if (m_faceOffset[2] > -1) {<