/[escript]/trunk/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /trunk/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4653 - (hide annotations)
Wed Feb 5 05:20:41 2014 UTC (5 years, 2 months ago) by jfenwick
File size: 154353 byte(s)
Made index chars unsigned to prevent warnings
1 caltinay 3691
2 jfenwick 3981 /*****************************************************************************
3 caltinay 3691 *
4 jfenwick 4154 * Copyright (c) 2003-2013 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 caltinay 3691 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12     * Development since 2012 by School of Earth Sciences
13     *
14     *****************************************************************************/
15 caltinay 3691
16     #include <ripley/Brick.h>
17 caltinay 3791 #include <paso/SystemMatrix.h>
18 caltinay 4334 #include <esysUtils/esysFileWriter.h>
19 sshaw 4622 #include <ripley/DefaultAssembler3D.h>
20 sshaw 4629 #include <ripley/WaveAssembler3D.h>
21 caltinay 4477 #include <boost/scoped_array.hpp>
22    
23 caltinay 4013 #ifdef USE_NETCDF
24     #include <netcdfcpp.h>
25     #endif
26    
27 caltinay 3691 #if USE_SILO
28     #include <silo.h>
29     #ifdef ESYS_MPI
30     #include <pmpio.h>
31     #endif
32     #endif
33    
34     #include <iomanip>
35    
36 jfenwick 4650 #include "esysUtils/EsysRandom.h"
37     #include "blocktools.h"
38    
39    
40 caltinay 3691 using namespace std;
41 caltinay 4334 using esysUtils::FileWriter;
42 caltinay 3691
43     namespace ripley {
44    
45 caltinay 3781 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
46 sshaw 4622 double x1, double y1, double z1, int d0, int d1, int d2,
47     const std::vector<double>& points, const std::vector<int>& tags,
48 jfenwick 4638 const simap_t& tagnamestonums) :
49 caltinay 4334 RipleyDomain(3)
50 caltinay 3691 {
51 caltinay 3943 // ignore subdivision parameters for serial run
52     if (m_mpiInfo->size == 1) {
53     d0=1;
54     d1=1;
55     d2=1;
56     }
57     bool warn=false;
58     // if number of subdivisions is non-positive, try to subdivide by the same
59     // ratio as the number of elements
60     if (d0<=0 && d1<=0 && d2<=0) {
61     warn=true;
62 jfenwick 3962 d0=(int)(pow(m_mpiInfo->size*(n0+1)*(n0+1)/((float)(n1+1)*(n2+1)), 1.f/3));
63 caltinay 4010 d0=max(1, d0);
64     d1=max(1, (int)(d0*n1/(float)n0));
65 caltinay 3943 d2=m_mpiInfo->size/(d0*d1);
66     if (d0*d1*d2 != m_mpiInfo->size) {
67     // ratios not the same so leave "smallest" side undivided and try
68     // dividing 2 sides only
69     if (n0>=n1) {
70     if (n1>=n2) {
71     d0=d1=0;
72     d2=1;
73     } else {
74     d0=d2=0;
75     d1=1;
76     }
77     } else {
78     if (n0>=n2) {
79     d0=d1=0;
80     d2=1;
81     } else {
82     d0=1;
83     d1=d2=0;
84     }
85     }
86     }
87     }
88     if (d0<=0 && d1<=0) {
89     warn=true;
90 caltinay 4010 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1))));
91 caltinay 3943 d1=m_mpiInfo->size/d0;
92     if (d0*d1*d2 != m_mpiInfo->size) {
93     // ratios not the same so subdivide side with more elements only
94     if (n0>n1) {
95 caltinay 3944 d0=0;
96 caltinay 3943 d1=1;
97     } else {
98     d0=1;
99 caltinay 3944 d1=0;
100 caltinay 3943 }
101     }
102     } else if (d0<=0 && d2<=0) {
103     warn=true;
104 caltinay 4010 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n2+1))));
105 caltinay 3943 d2=m_mpiInfo->size/d0;
106     if (d0*d1*d2 != m_mpiInfo->size) {
107     // ratios not the same so subdivide side with more elements only
108     if (n0>n2) {
109 caltinay 3944 d0=0;
110 caltinay 3943 d2=1;
111     } else {
112     d0=1;
113 caltinay 3944 d2=0;
114 caltinay 3943 }
115     }
116     } else if (d1<=0 && d2<=0) {
117     warn=true;
118 caltinay 4010 d1=max(1, int(sqrt(m_mpiInfo->size*(n1+1)/(float)(n2+1))));
119 caltinay 3943 d2=m_mpiInfo->size/d1;
120     if (d0*d1*d2 != m_mpiInfo->size) {
121     // ratios not the same so subdivide side with more elements only
122     if (n1>n2) {
123 caltinay 3944 d1=0;
124 caltinay 3943 d2=1;
125     } else {
126     d1=1;
127 caltinay 3944 d2=0;
128 caltinay 3943 }
129     }
130     }
131     if (d0<=0) {
132     // d1,d2 are preset, determine d0
133     d0=m_mpiInfo->size/(d1*d2);
134     } else if (d1<=0) {
135     // d0,d2 are preset, determine d1
136     d1=m_mpiInfo->size/(d0*d2);
137     } else if (d2<=0) {
138     // d0,d1 are preset, determine d2
139     d2=m_mpiInfo->size/(d0*d1);
140     }
141    
142 caltinay 3691 // ensure number of subdivisions is valid and nodes can be distributed
143     // among number of ranks
144 caltinay 4334 if (d0*d1*d2 != m_mpiInfo->size)
145 caltinay 3691 throw RipleyException("Invalid number of spatial subdivisions");
146    
147 caltinay 3943 if (warn) {
148     cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
149     << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
150     }
151 caltinay 3691
152 caltinay 4334 double l0 = x1-x0;
153     double l1 = y1-y0;
154     double l2 = z1-z0;
155     m_dx[0] = l0/n0;
156     m_dx[1] = l1/n1;
157     m_dx[2] = l2/n2;
158    
159     if ((n0+1)%d0 > 0) {
160 caltinay 3943 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
161 caltinay 4334 l0=m_dx[0]*n0;
162 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N0="
163 caltinay 4334 << n0 << ", l0=" << l0 << endl;
164 caltinay 3943 }
165 caltinay 4334 if ((n1+1)%d1 > 0) {
166 caltinay 3943 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
167 caltinay 4334 l1=m_dx[1]*n1;
168 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N1="
169 caltinay 4334 << n1 << ", l1=" << l1 << endl;
170 caltinay 3943 }
171 caltinay 4334 if ((n2+1)%d2 > 0) {
172 caltinay 3943 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
173 caltinay 4334 l2=m_dx[2]*n2;
174 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N2="
175 caltinay 4334 << n2 << ", l2=" << l2 << endl;
176 caltinay 3943 }
177    
178 caltinay 4334 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
179 caltinay 3753 throw RipleyException("Too few elements for the number of ranks");
180    
181 caltinay 4334 m_gNE[0] = n0;
182     m_gNE[1] = n1;
183     m_gNE[2] = n2;
184     m_origin[0] = x0;
185     m_origin[1] = y0;
186     m_origin[2] = z0;
187     m_length[0] = l0;
188     m_length[1] = l1;
189     m_length[2] = l2;
190     m_NX[0] = d0;
191     m_NX[1] = d1;
192     m_NX[2] = d2;
193    
194 caltinay 3753 // local number of elements (including overlap)
195 caltinay 4334 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
196     if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
197     m_NE[0]++;
198     else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
199     m_ownNE[0]--;
200 caltinay 3764
201 caltinay 4334 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
202     if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
203     m_NE[1]++;
204     else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
205     m_ownNE[1]--;
206 caltinay 3764
207 caltinay 4334 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
208     if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
209     m_NE[2]++;
210     else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
211     m_ownNE[2]--;
212 caltinay 3753
213     // local number of nodes
214 caltinay 4334 m_NN[0] = m_NE[0]+1;
215     m_NN[1] = m_NE[1]+1;
216     m_NN[2] = m_NE[2]+1;
217 caltinay 3753
218 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
219 caltinay 4334 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
220     if (m_offset[0] > 0)
221     m_offset[0]--;
222     m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
223     if (m_offset[1] > 0)
224     m_offset[1]--;
225     m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
226     if (m_offset[2] > 0)
227     m_offset[2]--;
228 caltinay 3753
229 caltinay 3691 populateSampleIds();
230 caltinay 3756 createPattern();
231 sshaw 4622
232     assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
233 sshaw 4629 for (map<string, int>::const_iterator i = tagnamestonums.begin();
234     i != tagnamestonums.end(); i++) {
235     setTagMap(i->first, i->second);
236     }
237 sshaw 4622 addPoints(tags.size(), &points[0], &tags[0]);
238 caltinay 3691 }
239    
240    
241     Brick::~Brick()
242     {
243 caltinay 3785 Paso_SystemMatrixPattern_free(m_pattern);
244     Paso_Connector_free(m_connector);
245 sshaw 4622 delete assembler;
246 caltinay 3691 }
247    
248     string Brick::getDescription() const
249     {
250     return "ripley::Brick";
251     }
252    
253     bool Brick::operator==(const AbstractDomain& other) const
254     {
255 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
256     if (o) {
257     return (RipleyDomain::operator==(other) &&
258 caltinay 4334 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
259     && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
260     && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
261     && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
262 caltinay 3744 }
263 caltinay 3691
264     return false;
265     }
266    
267 caltinay 4013 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
268 caltinay 4618 const ReaderParameters& params) const
269 caltinay 4013 {
270     #ifdef USE_NETCDF
271     // check destination function space
272     int myN0, myN1, myN2;
273     if (out.getFunctionSpace().getTypeCode() == Nodes) {
274 caltinay 4334 myN0 = m_NN[0];
275     myN1 = m_NN[1];
276     myN2 = m_NN[2];
277 caltinay 4013 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
278     out.getFunctionSpace().getTypeCode() == ReducedElements) {
279 caltinay 4334 myN0 = m_NE[0];
280     myN1 = m_NE[1];
281     myN2 = m_NE[2];
282 caltinay 4013 } else
283     throw RipleyException("readNcGrid(): invalid function space for output data object");
284    
285 caltinay 4615 if (params.first.size() != 3)
286 caltinay 4013 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
287    
288 caltinay 4615 if (params.numValues.size() != 3)
289 caltinay 4013 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
290    
291 caltinay 4615 if (params.multiplier.size() != 3)
292 caltinay 4277 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
293 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
294     if (params.multiplier[i]<1)
295 caltinay 4277 throw RipleyException("readNcGrid(): all multipliers must be positive");
296    
297 caltinay 4013 // check file existence and size
298     NcFile f(filename.c_str(), NcFile::ReadOnly);
299     if (!f.is_valid())
300     throw RipleyException("readNcGrid(): cannot open file");
301    
302     NcVar* var = f.get_var(varname.c_str());
303     if (!var)
304     throw RipleyException("readNcGrid(): invalid variable name");
305    
306     // TODO: rank>0 data support
307     const int numComp = out.getDataPointSize();
308     if (numComp > 1)
309     throw RipleyException("readNcGrid(): only scalar data supported");
310    
311     const int dims = var->num_dims();
312 caltinay 4477 boost::scoped_array<long> edges(var->edges());
313 caltinay 4013
314     // is this a slice of the data object (dims!=3)?
315     // note the expected ordering of edges (as in numpy: z,y,x)
316 caltinay 4615 if ( (dims==3 && (params.numValues[2] > edges[0] ||
317     params.numValues[1] > edges[1] ||
318     params.numValues[0] > edges[2]))
319     || (dims==2 && params.numValues[2]>1)
320     || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
321 caltinay 4013 throw RipleyException("readNcGrid(): not enough data in file");
322     }
323    
324     // check if this rank contributes anything
325 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
326     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
327     params.first[1] >= m_offset[1]+myN1 ||
328     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
329     params.first[2] >= m_offset[2]+myN2 ||
330     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
331 caltinay 4013 return;
332     }
333    
334     // now determine how much this rank has to write
335    
336     // first coordinates in data object to write to
337 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
338     const int first1 = max(0, params.first[1]-m_offset[1]);
339     const int first2 = max(0, params.first[2]-m_offset[2]);
340 caltinay 4618 // indices to first value in file (not accounting for reverse yet)
341     int idx0 = max(0, m_offset[0]-params.first[0]);
342     int idx1 = max(0, m_offset[1]-params.first[1]);
343     int idx2 = max(0, m_offset[2]-params.first[2]);
344 caltinay 4277 // number of values to read
345 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
346     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
347     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
348 caltinay 4013
349 caltinay 4618 // make sure we read the right block if going backwards through file
350     if (params.reverse[0])
351     idx0 = edges[dims-1]-num0-idx0;
352     if (dims>1 && params.reverse[1])
353     idx1 = edges[dims-2]-num1-idx1;
354     if (dims>2 && params.reverse[2])
355     idx2 = edges[dims-3]-num2-idx2;
356    
357    
358 caltinay 4013 vector<double> values(num0*num1*num2);
359     if (dims==3) {
360     var->set_cur(idx2, idx1, idx0);
361     var->get(&values[0], num2, num1, num0);
362     } else if (dims==2) {
363     var->set_cur(idx1, idx0);
364     var->get(&values[0], num1, num0);
365     } else {
366     var->set_cur(idx0);
367     var->get(&values[0], num0);
368     }
369    
370     const int dpp = out.getNumDataPointsPerSample();
371     out.requireWrite();
372    
373 caltinay 4618 // helpers for reversing
374     const int x0 = (params.reverse[0] ? num0-1 : 0);
375     const int x_mult = (params.reverse[0] ? -1 : 1);
376     const int y0 = (params.reverse[1] ? num1-1 : 0);
377     const int y_mult = (params.reverse[1] ? -1 : 1);
378     const int z0 = (params.reverse[2] ? num2-1 : 0);
379     const int z_mult = (params.reverse[2] ? -1 : 1);
380    
381 caltinay 4013 for (index_t z=0; z<num2; z++) {
382     for (index_t y=0; y<num1; y++) {
383     #pragma omp parallel for
384     for (index_t x=0; x<num0; x++) {
385 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
386     +(first1+y*params.multiplier[1])*myN0
387     +(first2+z*params.multiplier[2])*myN0*myN1;
388 caltinay 4618 const int srcIndex=(z0+z_mult*z)*num1*num0
389     +(y0+y_mult*y)*num0
390     +(x0+x_mult*x);
391 caltinay 4174 if (!isnan(values[srcIndex])) {
392 caltinay 4615 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
393     for (index_t m1=0; m1<params.multiplier[1]; m1++) {
394     for (index_t m0=0; m0<params.multiplier[0]; m0++) {
395 caltinay 4277 const int dataIndex = baseIndex+m0
396     +m1*myN0
397     +m2*myN0*myN1;
398     double* dest = out.getSampleDataRW(dataIndex);
399     for (index_t q=0; q<dpp; q++) {
400     *dest++ = values[srcIndex];
401     }
402     }
403     }
404 caltinay 4174 }
405 caltinay 4013 }
406     }
407     }
408     }
409     #else
410     throw RipleyException("readNcGrid(): not compiled with netCDF support");
411     #endif
412     }
413    
414 caltinay 4334 void Brick::readBinaryGrid(escript::Data& out, string filename,
415 caltinay 4618 const ReaderParameters& params) const
416 caltinay 4334 {
417 caltinay 4495 // the mapping is not universally correct but should work on our
418     // supported platforms
419 caltinay 4615 switch (params.dataType) {
420 caltinay 4495 case DATATYPE_INT32:
421 caltinay 4615 readBinaryGridImpl<int>(out, filename, params);
422 caltinay 4495 break;
423     case DATATYPE_FLOAT32:
424 caltinay 4615 readBinaryGridImpl<float>(out, filename, params);
425 caltinay 4495 break;
426     case DATATYPE_FLOAT64:
427 caltinay 4615 readBinaryGridImpl<double>(out, filename, params);
428 caltinay 4495 break;
429     default:
430     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
431     }
432     }
433    
434     template<typename ValueType>
435     void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
436 caltinay 4618 const ReaderParameters& params) const
437 caltinay 4495 {
438 caltinay 4334 // check destination function space
439     int myN0, myN1, myN2;
440     if (out.getFunctionSpace().getTypeCode() == Nodes) {
441     myN0 = m_NN[0];
442     myN1 = m_NN[1];
443     myN2 = m_NN[2];
444     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
445     out.getFunctionSpace().getTypeCode() == ReducedElements) {
446     myN0 = m_NE[0];
447     myN1 = m_NE[1];
448     myN2 = m_NE[2];
449     } else
450     throw RipleyException("readBinaryGrid(): invalid function space for output data object");
451    
452 caltinay 4615 if (params.first.size() != 3)
453 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
454    
455 caltinay 4615 if (params.numValues.size() != 3)
456 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
457    
458 caltinay 4615 if (params.multiplier.size() != 3)
459 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
460 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
461     if (params.multiplier[i]<1)
462 caltinay 4334 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
463    
464     // check file existence and size
465     ifstream f(filename.c_str(), ifstream::binary);
466     if (f.fail()) {
467     throw RipleyException("readBinaryGrid(): cannot open file");
468     }
469     f.seekg(0, ios::end);
470     const int numComp = out.getDataPointSize();
471     const int filesize = f.tellg();
472 caltinay 4615 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
473 caltinay 4334 if (filesize < reqsize) {
474     f.close();
475     throw RipleyException("readBinaryGrid(): not enough data in file");
476     }
477    
478     // check if this rank contributes anything
479 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
480     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
481     params.first[1] >= m_offset[1]+myN1 ||
482     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
483     params.first[2] >= m_offset[2]+myN2 ||
484     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
485 caltinay 4334 f.close();
486     return;
487     }
488    
489     // now determine how much this rank has to write
490    
491     // first coordinates in data object to write to
492 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
493     const int first1 = max(0, params.first[1]-m_offset[1]);
494     const int first2 = max(0, params.first[2]-m_offset[2]);
495 caltinay 4334 // indices to first value in file
496 caltinay 4615 const int idx0 = max(0, m_offset[0]-params.first[0]);
497     const int idx1 = max(0, m_offset[1]-params.first[1]);
498     const int idx2 = max(0, m_offset[2]-params.first[2]);
499 caltinay 4334 // number of values to read
500 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
501     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
502     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
503 caltinay 4334
504     out.requireWrite();
505 caltinay 4495 vector<ValueType> values(num0*numComp);
506 caltinay 4334 const int dpp = out.getNumDataPointsPerSample();
507    
508 caltinay 4529 for (int z=0; z<num2; z++) {
509     for (int y=0; y<num1; y++) {
510 caltinay 4615 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
511     +(idx2+z)*params.numValues[0]*params.numValues[1]);
512 caltinay 4495 f.seekg(fileofs*sizeof(ValueType));
513     f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
514 caltinay 4334
515 caltinay 4529 for (int x=0; x<num0; x++) {
516 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
517     +(first1+y*params.multiplier[1])*myN0
518     +(first2+z*params.multiplier[2])*myN0*myN1;
519     for (int m2=0; m2<params.multiplier[2]; m2++) {
520     for (int m1=0; m1<params.multiplier[1]; m1++) {
521     for (int m0=0; m0<params.multiplier[0]; m0++) {
522 caltinay 4334 const int dataIndex = baseIndex+m0
523     +m1*myN0
524     +m2*myN0*myN1;
525     double* dest = out.getSampleDataRW(dataIndex);
526 caltinay 4529 for (int c=0; c<numComp; c++) {
527     ValueType val = values[x*numComp+c];
528    
529 caltinay 4615 if (params.byteOrder != BYTEORDER_NATIVE) {
530 caltinay 4529 char* cval = reinterpret_cast<char*>(&val);
531     // this will alter val!!
532     byte_swap32(cval);
533     }
534     if (!std::isnan(val)) {
535     for (int q=0; q<dpp; q++) {
536     *dest++ = static_cast<double>(val);
537 caltinay 4334 }
538     }
539     }
540     }
541     }
542     }
543     }
544     }
545     }
546    
547     f.close();
548     }
549    
550 caltinay 4357 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
551     int byteOrder, int dataType) const
552 caltinay 4334 {
553 caltinay 4357 // the mapping is not universally correct but should work on our
554     // supported platforms
555     switch (dataType) {
556     case DATATYPE_INT32:
557     writeBinaryGridImpl<int>(in, filename, byteOrder);
558     break;
559     case DATATYPE_FLOAT32:
560     writeBinaryGridImpl<float>(in, filename, byteOrder);
561     break;
562     case DATATYPE_FLOAT64:
563     writeBinaryGridImpl<double>(in, filename, byteOrder);
564     break;
565     default:
566     throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
567     }
568     }
569    
570     template<typename ValueType>
571     void Brick::writeBinaryGridImpl(const escript::Data& in,
572     const string& filename, int byteOrder) const
573     {
574 caltinay 4334 // check function space and determine number of points
575     int myN0, myN1, myN2;
576     int totalN0, totalN1, totalN2;
577     if (in.getFunctionSpace().getTypeCode() == Nodes) {
578     myN0 = m_NN[0];
579     myN1 = m_NN[1];
580     myN2 = m_NN[2];
581     totalN0 = m_gNE[0]+1;
582     totalN1 = m_gNE[1]+1;
583     totalN2 = m_gNE[2]+1;
584     } else if (in.getFunctionSpace().getTypeCode() == Elements ||
585     in.getFunctionSpace().getTypeCode() == ReducedElements) {
586     myN0 = m_NE[0];
587     myN1 = m_NE[1];
588     myN2 = m_NE[2];
589     totalN0 = m_gNE[0];
590     totalN1 = m_gNE[1];
591     totalN2 = m_gNE[2];
592     } else
593     throw RipleyException("writeBinaryGrid(): invalid function space of data object");
594    
595     const int numComp = in.getDataPointSize();
596     const int dpp = in.getNumDataPointsPerSample();
597 caltinay 4357 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
598 caltinay 4334
599     if (numComp > 1 || dpp > 1)
600     throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
601    
602     // from here on we know that each sample consists of one value
603 caltinay 4482 FileWriter fw;
604     fw.openFile(filename, fileSize);
605 caltinay 4334 MPIBarrier();
606    
607     for (index_t z=0; z<myN2; z++) {
608     for (index_t y=0; y<myN1; y++) {
609     const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
610 caltinay 4357 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
611 caltinay 4334 ostringstream oss;
612    
613     for (index_t x=0; x<myN0; x++) {
614 caltinay 4626 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
615 caltinay 4357 ValueType fvalue = static_cast<ValueType>(*sample);
616     if (byteOrder == BYTEORDER_NATIVE) {
617 caltinay 4334 oss.write((char*)&fvalue, sizeof(fvalue));
618     } else {
619     char* value = reinterpret_cast<char*>(&fvalue);
620 caltinay 4357 oss.write(byte_swap32(value), sizeof(fvalue));
621 caltinay 4334 }
622     }
623 caltinay 4482 fw.writeAt(oss, fileofs);
624 caltinay 4334 }
625     }
626 caltinay 4482 fw.close();
627 caltinay 4334 }
628    
629 caltinay 3691 void Brick::dump(const string& fileName) const
630     {
631     #if USE_SILO
632     string fn(fileName);
633     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
634     fn+=".silo";
635     }
636    
637     int driver=DB_HDF5;
638     string siloPath;
639     DBfile* dbfile = NULL;
640    
641     #ifdef ESYS_MPI
642     PMPIO_baton_t* baton = NULL;
643 gross 3793 const int NUM_SILO_FILES = 1;
644     const char* blockDirFmt = "/block%04d";
645 caltinay 3691 #endif
646    
647     if (m_mpiInfo->size > 1) {
648     #ifdef ESYS_MPI
649     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
650     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
651     PMPIO_DefaultClose, (void*)&driver);
652     // try the fallback driver in case of error
653     if (!baton && driver != DB_PDB) {
654     driver = DB_PDB;
655     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
656     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
657     PMPIO_DefaultClose, (void*)&driver);
658     }
659     if (baton) {
660     char str[64];
661     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
662     siloPath = str;
663     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
664     }
665     #endif
666     } else {
667     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
668     getDescription().c_str(), driver);
669     // try the fallback driver in case of error
670     if (!dbfile && driver != DB_PDB) {
671     driver = DB_PDB;
672     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
673     getDescription().c_str(), driver);
674     }
675     }
676    
677     if (!dbfile)
678     throw RipleyException("dump: Could not create Silo file");
679    
680     /*
681     if (driver==DB_HDF5) {
682     // gzip level 1 already provides good compression with minimal
683     // performance penalty. Some tests showed that gzip levels >3 performed
684     // rather badly on escript data both in terms of time and space
685     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
686     }
687     */
688    
689 caltinay 4334 boost::scoped_ptr<double> x(new double[m_NN[0]]);
690     boost::scoped_ptr<double> y(new double[m_NN[1]]);
691     boost::scoped_ptr<double> z(new double[m_NN[2]]);
692 caltinay 3691 double* coords[3] = { x.get(), y.get(), z.get() };
693     #pragma omp parallel
694     {
695     #pragma omp for
696 caltinay 4334 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
697     coords[0][i0]=getLocalCoordinate(i0, 0);
698 caltinay 3691 }
699     #pragma omp for
700 caltinay 4334 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
701     coords[1][i1]=getLocalCoordinate(i1, 1);
702 caltinay 3691 }
703     #pragma omp for
704 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
705     coords[2][i2]=getLocalCoordinate(i2, 2);
706 caltinay 3691 }
707     }
708 caltinay 4334 int* dims = const_cast<int*>(getNumNodesPerDim());
709    
710     // write mesh
711     DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
712 caltinay 3691 DB_COLLINEAR, NULL);
713    
714 caltinay 4334 // write node ids
715     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
716 caltinay 3698 NULL, 0, DB_INT, DB_NODECENT, NULL);
717 caltinay 3691
718 caltinay 3698 // write element ids
719 caltinay 4334 dims = const_cast<int*>(getNumElementsPerDim());
720 caltinay 3698 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
721 caltinay 4334 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
722 caltinay 3698
723     // rank 0 writes multimesh and multivar
724 caltinay 3691 if (m_mpiInfo->rank == 0) {
725     vector<string> tempstrings;
726     vector<char*> names;
727     for (dim_t i=0; i<m_mpiInfo->size; i++) {
728     stringstream path;
729     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
730     tempstrings.push_back(path.str());
731     names.push_back((char*)tempstrings.back().c_str());
732     }
733     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
734     DBSetDir(dbfile, "/");
735     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
736     &types[0], NULL);
737     tempstrings.clear();
738     names.clear();
739     for (dim_t i=0; i<m_mpiInfo->size; i++) {
740     stringstream path;
741     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
742     tempstrings.push_back(path.str());
743     names.push_back((char*)tempstrings.back().c_str());
744     }
745     types.assign(m_mpiInfo->size, DB_QUADVAR);
746     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
747     &types[0], NULL);
748 caltinay 3698 tempstrings.clear();
749     names.clear();
750     for (dim_t i=0; i<m_mpiInfo->size; i++) {
751     stringstream path;
752     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
753     tempstrings.push_back(path.str());
754     names.push_back((char*)tempstrings.back().c_str());
755     }
756     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
757     &types[0], NULL);
758 caltinay 3691 }
759    
760     if (m_mpiInfo->size > 1) {
761     #ifdef ESYS_MPI
762     PMPIO_HandOffBaton(baton, dbfile);
763     PMPIO_Finish(baton);
764     #endif
765     } else {
766     DBClose(dbfile);
767     }
768    
769     #else // USE_SILO
770 caltinay 3791 throw RipleyException("dump: no Silo support");
771 caltinay 3691 #endif
772     }
773    
774     const int* Brick::borrowSampleReferenceIDs(int fsType) const
775     {
776 caltinay 3697 switch (fsType) {
777     case Nodes:
778 caltinay 3748 case ReducedNodes: //FIXME: reduced
779 caltinay 3753 return &m_nodeId[0];
780 caltinay 3757 case DegreesOfFreedom:
781     case ReducedDegreesOfFreedom: //FIXME: reduced
782 caltinay 3753 return &m_dofId[0];
783 caltinay 3697 case Elements:
784 caltinay 3733 case ReducedElements:
785 caltinay 3697 return &m_elementId[0];
786 caltinay 3757 case FaceElements:
787 caltinay 3733 case ReducedFaceElements:
788 caltinay 3697 return &m_faceId[0];
789     default:
790     break;
791     }
792 caltinay 3691
793 caltinay 3697 stringstream msg;
794 caltinay 3791 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
795 caltinay 3697 throw RipleyException(msg.str());
796 caltinay 3691 }
797    
798 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
799 caltinay 3691 {
800 caltinay 3759 if (getMPISize()==1)
801     return true;
802    
803 caltinay 3757 switch (fsType) {
804     case Nodes:
805     case ReducedNodes: //FIXME: reduced
806     return (m_dofMap[id] < getNumDOF());
807     case DegreesOfFreedom:
808     case ReducedDegreesOfFreedom:
809     return true;
810     case Elements:
811     case ReducedElements:
812     {
813     // check ownership of element's _last_ node
814 caltinay 4334 const index_t x=id%m_NE[0] + 1;
815     const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
816     const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
817     return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
818 caltinay 3757 }
819     case FaceElements:
820     case ReducedFaceElements:
821 caltinay 3759 {
822     // check ownership of face element's last node
823     dim_t n=0;
824 caltinay 4334 for (size_t i=0; i<6; i++) {
825     n+=m_faceCount[i];
826 caltinay 3759 if (id<n) {
827 caltinay 4334 const index_t j=id-n+m_faceCount[i];
828 caltinay 3759 if (i>=4) { // front or back
829 caltinay 4334 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
830     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
831 caltinay 3759 } else if (i>=2) { // bottom or top
832 caltinay 4334 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
833     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
834 caltinay 3759 } else { // left or right
835 caltinay 4334 const index_t first=(i==0 ? 0 : m_NN[0]-1);
836     return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
837 caltinay 3759 }
838     }
839     }
840     return false;
841     }
842 caltinay 3757 default:
843     break;
844 caltinay 3753 }
845 caltinay 3757
846     stringstream msg;
847 caltinay 3791 msg << "ownSample: invalid function space type " << fsType;
848 caltinay 3757 throw RipleyException(msg.str());
849 caltinay 3691 }
850    
851 caltinay 3764 void Brick::setToNormal(escript::Data& out) const
852 caltinay 3703 {
853 caltinay 3764 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
854     out.requireWrite();
855     #pragma omp parallel
856     {
857     if (m_faceOffset[0] > -1) {
858     #pragma omp for nowait
859 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
860     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
861     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
862 caltinay 3764 // set vector at four quadrature points
863     *o++ = -1.; *o++ = 0.; *o++ = 0.;
864     *o++ = -1.; *o++ = 0.; *o++ = 0.;
865     *o++ = -1.; *o++ = 0.; *o++ = 0.;
866     *o++ = -1.; *o++ = 0.; *o = 0.;
867     }
868     }
869     }
870    
871     if (m_faceOffset[1] > -1) {
872     #pragma omp for nowait
873 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
874     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
875     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
876 caltinay 3764 // set vector at four quadrature points
877     *o++ = 1.; *o++ = 0.; *o++ = 0.;
878     *o++ = 1.; *o++ = 0.; *o++ = 0.;
879     *o++ = 1.; *o++ = 0.; *o++ = 0.;
880     *o++ = 1.; *o++ = 0.; *o = 0.;
881     }
882     }
883     }
884    
885     if (m_faceOffset[2] > -1) {
886     #pragma omp for nowait
887 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
888     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
889     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
890 caltinay 3764 // set vector at four quadrature points
891     *o++ = 0.; *o++ = -1.; *o++ = 0.;
892     *o++ = 0.; *o++ = -1.; *o++ = 0.;
893     *o++ = 0.; *o++ = -1.; *o++ = 0.;
894     *o++ = 0.; *o++ = -1.; *o = 0.;
895     }
896     }
897     }
898    
899     if (m_faceOffset[3] > -1) {
900     #pragma omp for nowait
901 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
902     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
903     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
904 caltinay 3764 // set vector at four quadrature points
905     *o++ = 0.; *o++ = 1.; *o++ = 0.;
906     *o++ = 0.; *o++ = 1.; *o++ = 0.;
907     *o++ = 0.; *o++ = 1.; *o++ = 0.;
908     *o++ = 0.; *o++ = 1.; *o = 0.;
909     }
910     }
911     }
912    
913     if (m_faceOffset[4] > -1) {
914     #pragma omp for nowait
915 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
916     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
917     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
918 caltinay 3764 // set vector at four quadrature points
919     *o++ = 0.; *o++ = 0.; *o++ = -1.;
920     *o++ = 0.; *o++ = 0.; *o++ = -1.;
921     *o++ = 0.; *o++ = 0.; *o++ = -1.;
922     *o++ = 0.; *o++ = 0.; *o = -1.;
923     }
924     }
925     }
926    
927     if (m_faceOffset[5] > -1) {
928     #pragma omp for nowait
929 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
930     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
931     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
932 caltinay 3764 // set vector at four quadrature points
933     *o++ = 0.; *o++ = 0.; *o++ = 1.;
934     *o++ = 0.; *o++ = 0.; *o++ = 1.;
935     *o++ = 0.; *o++ = 0.; *o++ = 1.;
936     *o++ = 0.; *o++ = 0.; *o = 1.;
937     }
938     }
939     }
940     } // end of parallel section
941     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
942     out.requireWrite();
943     #pragma omp parallel
944     {
945     if (m_faceOffset[0] > -1) {
946     #pragma omp for nowait
947 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
948     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
949     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
950 caltinay 3764 *o++ = -1.;
951     *o++ = 0.;
952     *o = 0.;
953     }
954     }
955     }
956    
957     if (m_faceOffset[1] > -1) {
958     #pragma omp for nowait
959 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
960     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
961     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
962 caltinay 3764 *o++ = 1.;
963     *o++ = 0.;
964     *o = 0.;
965     }
966     }
967     }
968    
969     if (m_faceOffset[2] > -1) {
970     #pragma omp for nowait
971 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
972     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
973     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
974 caltinay 3764 *o++ = 0.;
975     *o++ = -1.;
976     *o = 0.;
977     }
978     }
979     }
980    
981     if (m_faceOffset[3] > -1) {
982     #pragma omp for nowait
983 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
984     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
985     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
986 caltinay 3764 *o++ = 0.;
987     *o++ = 1.;
988     *o = 0.;
989     }
990     }
991     }
992    
993     if (m_faceOffset[4] > -1) {
994     #pragma omp for nowait
995 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
996     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
997     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
998 caltinay 3764 *o++ = 0.;
999     *o++ = 0.;
1000     *o = -1.;
1001     }
1002     }
1003     }
1004    
1005     if (m_faceOffset[5] > -1) {
1006     #pragma omp for nowait
1007 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1008     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1009     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1010 caltinay 3764 *o++ = 0.;
1011     *o++ = 0.;
1012     *o = 1.;
1013     }
1014     }
1015     }
1016     } // end of parallel section
1017    
1018     } else {
1019     stringstream msg;
1020 caltinay 3791 msg << "setToNormal: invalid function space type "
1021     << out.getFunctionSpace().getTypeCode();
1022 caltinay 3764 throw RipleyException(msg.str());
1023     }
1024     }
1025    
1026     void Brick::setToSize(escript::Data& out) const
1027     {
1028     if (out.getFunctionSpace().getTypeCode() == Elements
1029     || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1030     out.requireWrite();
1031     const dim_t numQuad=out.getNumDataPointsPerSample();
1032 caltinay 4334 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1033 caltinay 3764 #pragma omp parallel for
1034     for (index_t k = 0; k < getNumElements(); ++k) {
1035     double* o = out.getSampleDataRW(k);
1036     fill(o, o+numQuad, size);
1037     }
1038     } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1039     || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1040     out.requireWrite();
1041     const dim_t numQuad=out.getNumDataPointsPerSample();
1042     #pragma omp parallel
1043     {
1044     if (m_faceOffset[0] > -1) {
1045 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1046 caltinay 3764 #pragma omp for nowait
1047 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1048     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1049     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1050 caltinay 3764 fill(o, o+numQuad, size);
1051     }
1052     }
1053     }
1054    
1055     if (m_faceOffset[1] > -1) {
1056 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1057 caltinay 3764 #pragma omp for nowait
1058 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1059     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1060     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1061 caltinay 3764 fill(o, o+numQuad, size);
1062     }
1063     }
1064     }
1065    
1066     if (m_faceOffset[2] > -1) {
1067 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1068 caltinay 3764 #pragma omp for nowait
1069 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1070     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1071     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1072 caltinay 3764 fill(o, o+numQuad, size);
1073     }
1074     }
1075     }
1076    
1077     if (m_faceOffset[3] > -1) {
1078 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1079 caltinay 3764 #pragma omp for nowait
1080 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1081     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1082     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1083 caltinay 3764 fill(o, o+numQuad, size);
1084     }
1085     }
1086     }
1087    
1088     if (m_faceOffset[4] > -1) {
1089 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1090 caltinay 3764 #pragma omp for nowait
1091 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1092     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1093     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1094 caltinay 3764 fill(o, o+numQuad, size);
1095     }
1096     }
1097     }
1098    
1099     if (m_faceOffset[5] > -1) {
1100 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1101 caltinay 3764 #pragma omp for nowait
1102 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1103     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1104     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1105 caltinay 3764 fill(o, o+numQuad, size);
1106     }
1107     }
1108     }
1109     } // end of parallel section
1110    
1111     } else {
1112     stringstream msg;
1113 caltinay 3791 msg << "setToSize: invalid function space type "
1114     << out.getFunctionSpace().getTypeCode();
1115 caltinay 3764 throw RipleyException(msg.str());
1116     }
1117     }
1118    
1119     void Brick::Print_Mesh_Info(const bool full) const
1120     {
1121     RipleyDomain::Print_Mesh_Info(full);
1122     if (full) {
1123     cout << " Id Coordinates" << endl;
1124     cout.precision(15);
1125     cout.setf(ios::scientific, ios::floatfield);
1126     for (index_t i=0; i < getNumNodes(); i++) {
1127     cout << " " << setw(5) << m_nodeId[i]
1128 caltinay 4334 << " " << getLocalCoordinate(i%m_NN[0], 0)
1129     << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1130     << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1131 caltinay 3764 }
1132     }
1133     }
1134    
1135    
1136     //protected
1137     void Brick::assembleCoordinates(escript::Data& arg) const
1138     {
1139     escriptDataC x = arg.getDataC();
1140     int numDim = m_numDim;
1141     if (!isDataPointShapeEqual(&x, 1, &numDim))
1142     throw RipleyException("setToX: Invalid Data object shape");
1143     if (!numSamplesEqual(&x, 1, getNumNodes()))
1144     throw RipleyException("setToX: Illegal number of samples in Data object");
1145    
1146     arg.requireWrite();
1147     #pragma omp parallel for
1148 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1149     for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1150     for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1151     double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1152     point[0] = getLocalCoordinate(i0, 0);
1153     point[1] = getLocalCoordinate(i1, 1);
1154     point[2] = getLocalCoordinate(i2, 2);
1155 caltinay 3764 }
1156     }
1157     }
1158     }
1159    
1160     //protected
1161 caltinay 4626 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1162 caltinay 3764 {
1163 caltinay 3703 const dim_t numComp = in.getDataPointSize();
1164 caltinay 3731 const double C0 = .044658198738520451079;
1165     const double C1 = .16666666666666666667;
1166     const double C2 = .21132486540518711775;
1167     const double C3 = .25;
1168     const double C4 = .5;
1169     const double C5 = .62200846792814621559;
1170     const double C6 = .78867513459481288225;
1171    
1172 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
1173 caltinay 3760 out.requireWrite();
1174 caltinay 3913 #pragma omp parallel
1175     {
1176     vector<double> f_000(numComp);
1177     vector<double> f_001(numComp);
1178     vector<double> f_010(numComp);
1179     vector<double> f_011(numComp);
1180     vector<double> f_100(numComp);
1181     vector<double> f_101(numComp);
1182     vector<double> f_110(numComp);
1183     vector<double> f_111(numComp);
1184     #pragma omp for
1185 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1186     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1187     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1188     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1189     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1190     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1191     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1192     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1193     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1194     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1195     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1196     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1197 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1198 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1199     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1200     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1201     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1202     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1203     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1204     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1205     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1206     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1207     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1208     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1209     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1210 caltinay 3913 o[INDEX3(i,0,0,numComp,3)] = V0;
1211     o[INDEX3(i,1,0,numComp,3)] = V4;
1212     o[INDEX3(i,2,0,numComp,3)] = V8;
1213     o[INDEX3(i,0,1,numComp,3)] = V0;
1214     o[INDEX3(i,1,1,numComp,3)] = V5;
1215     o[INDEX3(i,2,1,numComp,3)] = V9;
1216     o[INDEX3(i,0,2,numComp,3)] = V1;
1217     o[INDEX3(i,1,2,numComp,3)] = V4;
1218     o[INDEX3(i,2,2,numComp,3)] = V10;
1219     o[INDEX3(i,0,3,numComp,3)] = V1;
1220     o[INDEX3(i,1,3,numComp,3)] = V5;
1221     o[INDEX3(i,2,3,numComp,3)] = V11;
1222     o[INDEX3(i,0,4,numComp,3)] = V2;
1223     o[INDEX3(i,1,4,numComp,3)] = V6;
1224     o[INDEX3(i,2,4,numComp,3)] = V8;
1225     o[INDEX3(i,0,5,numComp,3)] = V2;
1226     o[INDEX3(i,1,5,numComp,3)] = V7;
1227     o[INDEX3(i,2,5,numComp,3)] = V9;
1228     o[INDEX3(i,0,6,numComp,3)] = V3;
1229     o[INDEX3(i,1,6,numComp,3)] = V6;
1230     o[INDEX3(i,2,6,numComp,3)] = V10;
1231     o[INDEX3(i,0,7,numComp,3)] = V3;
1232     o[INDEX3(i,1,7,numComp,3)] = V7;
1233     o[INDEX3(i,2,7,numComp,3)] = V11;
1234     } // end of component loop i
1235     } // end of k0 loop
1236     } // end of k1 loop
1237     } // end of k2 loop
1238     } // end of parallel section
1239 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1240 caltinay 3760 out.requireWrite();
1241 caltinay 3913 #pragma omp parallel
1242     {
1243     vector<double> f_000(numComp);
1244     vector<double> f_001(numComp);
1245     vector<double> f_010(numComp);
1246     vector<double> f_011(numComp);
1247     vector<double> f_100(numComp);
1248     vector<double> f_101(numComp);
1249     vector<double> f_110(numComp);
1250     vector<double> f_111(numComp);
1251     #pragma omp for
1252 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1253     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1254     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1255     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1256     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1257     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1258     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1259     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1260     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1261     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1262     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1263     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1264 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1265 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1266     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1267     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1268 caltinay 3913 } // end of component loop i
1269     } // end of k0 loop
1270     } // end of k1 loop
1271     } // end of k2 loop
1272     } // end of parallel section
1273 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1274 caltinay 3760 out.requireWrite();
1275 caltinay 3722 #pragma omp parallel
1276     {
1277 caltinay 3913 vector<double> f_000(numComp);
1278     vector<double> f_001(numComp);
1279     vector<double> f_010(numComp);
1280     vector<double> f_011(numComp);
1281     vector<double> f_100(numComp);
1282     vector<double> f_101(numComp);
1283     vector<double> f_110(numComp);
1284     vector<double> f_111(numComp);
1285 caltinay 3722 if (m_faceOffset[0] > -1) {
1286     #pragma omp for nowait
1287 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1288     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1289     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1290     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1291     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1292     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1293     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1294     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1295     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1296     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1297     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1298 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1299 caltinay 4375 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1300     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1301     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1302     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1303     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1304 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1305     o[INDEX3(i,2,0,numComp,3)] = V2;
1306 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1307 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1308     o[INDEX3(i,2,1,numComp,3)] = V3;
1309 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1310 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1311     o[INDEX3(i,2,2,numComp,3)] = V2;
1312 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1313 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1314     o[INDEX3(i,2,3,numComp,3)] = V3;
1315 caltinay 3764 } // end of component loop i
1316     } // end of k1 loop
1317     } // end of k2 loop
1318     } // end of face 0
1319 caltinay 3722 if (m_faceOffset[1] > -1) {
1320     #pragma omp for nowait
1321 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1322     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1323     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1324     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1325     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1326     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1327     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1328     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1329     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1330     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1331     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1332 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1333 caltinay 4375 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1334     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1335     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1336     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1337     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1338 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1339     o[INDEX3(i,2,0,numComp,3)] = V2;
1340 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1341 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1342     o[INDEX3(i,2,1,numComp,3)] = V3;
1343 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1344 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1345     o[INDEX3(i,2,2,numComp,3)] = V2;
1346 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1347 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1348     o[INDEX3(i,2,3,numComp,3)] = V3;
1349 caltinay 3764 } // end of component loop i
1350     } // end of k1 loop
1351     } // end of k2 loop
1352     } // end of face 1
1353 caltinay 3722 if (m_faceOffset[2] > -1) {
1354     #pragma omp for nowait
1355 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1356     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1357     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1358     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1359     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1360     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1361     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1362     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1363     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1364     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1365     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1366 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1367 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1368     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1369     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1370 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1371 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1372 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V1;
1373     o[INDEX3(i,0,1,numComp,3)] = V0;
1374 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1375 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V2;
1376     o[INDEX3(i,0,2,numComp,3)] = V0;
1377 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1378 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V1;
1379     o[INDEX3(i,0,3,numComp,3)] = V0;
1380 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1381 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V2;
1382 caltinay 3764 } // end of component loop i
1383     } // end of k0 loop
1384     } // end of k2 loop
1385     } // end of face 2
1386 caltinay 3722 if (m_faceOffset[3] > -1) {
1387     #pragma omp for nowait
1388 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1389     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1390     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1391     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1392     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1393     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1394     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1395     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1396     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1397     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1398     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1399 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1400 caltinay 4375 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1401     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1402     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1403     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1404 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1405 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1406 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V2;
1407     o[INDEX3(i,0,1,numComp,3)] = V0;
1408 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1409 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V3;
1410     o[INDEX3(i,0,2,numComp,3)] = V1;
1411 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1412 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V2;
1413     o[INDEX3(i,0,3,numComp,3)] = V1;
1414 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1415 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V3;
1416 caltinay 3764 } // end of component loop i
1417     } // end of k0 loop
1418     } // end of k2 loop
1419     } // end of face 3
1420 caltinay 3722 if (m_faceOffset[4] > -1) {
1421     #pragma omp for nowait
1422 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1423     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1424     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1425     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1426     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1427     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1428     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1429     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1430     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1431     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1432     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1433 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1434 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1435     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1436     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1437     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1438 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1439     o[INDEX3(i,1,0,numComp,3)] = V2;
1440 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1441 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1442     o[INDEX3(i,1,1,numComp,3)] = V3;
1443 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1444 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1445     o[INDEX3(i,1,2,numComp,3)] = V2;
1446 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1447 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1448     o[INDEX3(i,1,3,numComp,3)] = V3;
1449 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1450 caltinay 3764 } // end of component loop i
1451     } // end of k0 loop
1452     } // end of k1 loop
1453     } // end of face 4
1454 caltinay 3722 if (m_faceOffset[5] > -1) {
1455     #pragma omp for nowait
1456 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1457     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1458     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1459     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1460     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1461     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1462     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1463     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1464     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1465     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1466     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1467 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1468 caltinay 4375 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1469     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1470     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1471     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1472 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1473     o[INDEX3(i,1,0,numComp,3)] = V2;
1474 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1475 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1476     o[INDEX3(i,1,1,numComp,3)] = V3;
1477 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1478 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1479     o[INDEX3(i,1,2,numComp,3)] = V2;
1480 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1481 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1482     o[INDEX3(i,1,3,numComp,3)] = V3;
1483 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1484 caltinay 3764 } // end of component loop i
1485     } // end of k0 loop
1486     } // end of k1 loop
1487     } // end of face 5
1488 caltinay 3722 } // end of parallel section
1489 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1490 caltinay 3760 out.requireWrite();
1491 caltinay 3722 #pragma omp parallel
1492     {
1493 caltinay 3913 vector<double> f_000(numComp);
1494     vector<double> f_001(numComp);
1495     vector<double> f_010(numComp);
1496     vector<double> f_011(numComp);
1497     vector<double> f_100(numComp);
1498     vector<double> f_101(numComp);
1499     vector<double> f_110(numComp);
1500     vector<double> f_111(numComp);
1501 caltinay 3722 if (m_faceOffset[0] > -1) {
1502     #pragma omp for nowait
1503 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1504     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1505     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1506     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1507     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1508     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1509     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1510     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1511     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1512     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1513     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1514 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1515 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1516     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1517     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1518 caltinay 3764 } // end of component loop i
1519     } // end of k1 loop
1520     } // end of k2 loop
1521     } // end of face 0
1522 caltinay 3722 if (m_faceOffset[1] > -1) {
1523     #pragma omp for nowait
1524 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1525     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1526     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1527     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1528     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1529     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1530     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1531     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1532     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1533     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1534     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1535 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1536 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1537     o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx[1];
1538     o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / m_dx[2];
1539 caltinay 3764 } // end of component loop i
1540     } // end of k1 loop
1541     } // end of k2 loop
1542     } // end of face 1
1543 caltinay 3722 if (m_faceOffset[2] > -1) {
1544     #pragma omp for nowait
1545 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1546     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1547     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1548     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1549     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0