/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4687 - (hide annotations)
Wed Feb 19 00:03:29 2014 UTC (5 years, 2 months ago) by jfenwick
Original Path: trunk/ripley/src/Brick.cpp
File size: 159178 byte(s)
Remove randomFill python method from ripley domains.
All random data objects (for all domain types) should be generated 
using esys.escript.RandomData()

The only filtered random we have is gaussian on ripley but
it is triggered by passing the tuple as the last arg of RandomData().

While the interface is a bit more complicated (in that you always need
 to pass in shape and functionspace) it does mean we have a 
common interface for all domains. 

Removed randomFill from DataExpanded.
The reasoning behind this is to force domains to call the util function
themselves and enforce whatever consistancy requirements they have.

Added version of blocktools to deal with 2D case in Ripley.
Use blocktools for the 2D transfers [This was cleaner than modifying the
previous implementation to deal with variable shaped points].

Note that under MPI, ripley can not generate random data (even unfiltered)
if any of its per rank dimensions is <4 elements on any side.

Unit tests for these calls are in but some extra checks still needed.



1 caltinay 3691
2 jfenwick 3981 /*****************************************************************************
3 caltinay 3691 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 caltinay 3691 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 caltinay 3691
17     #include <ripley/Brick.h>
18 caltinay 3791 #include <paso/SystemMatrix.h>
19 caltinay 4334 #include <esysUtils/esysFileWriter.h>
20 sshaw 4622 #include <ripley/DefaultAssembler3D.h>
21 sshaw 4629 #include <ripley/WaveAssembler3D.h>
22 caltinay 4477 #include <boost/scoped_array.hpp>
23    
24 caltinay 4013 #ifdef USE_NETCDF
25     #include <netcdfcpp.h>
26     #endif
27    
28 caltinay 3691 #if USE_SILO
29     #include <silo.h>
30     #ifdef ESYS_MPI
31     #include <pmpio.h>
32     #endif
33     #endif
34    
35     #include <iomanip>
36    
37 jfenwick 4650 #include "esysUtils/EsysRandom.h"
38     #include "blocktools.h"
39    
40    
41 caltinay 3691 using namespace std;
42 caltinay 4334 using esysUtils::FileWriter;
43 caltinay 3691
44     namespace ripley {
45    
46 caltinay 3781 Brick::Brick(int n0, int n1, int n2, double x0, double y0, double z0,
47 sshaw 4622 double x1, double y1, double z1, int d0, int d1, int d2,
48     const std::vector<double>& points, const std::vector<int>& tags,
49 jfenwick 4638 const simap_t& tagnamestonums) :
50 caltinay 4334 RipleyDomain(3)
51 caltinay 3691 {
52 caltinay 3943 // ignore subdivision parameters for serial run
53     if (m_mpiInfo->size == 1) {
54     d0=1;
55     d1=1;
56     d2=1;
57     }
58     bool warn=false;
59     // if number of subdivisions is non-positive, try to subdivide by the same
60     // ratio as the number of elements
61     if (d0<=0 && d1<=0 && d2<=0) {
62     warn=true;
63 jfenwick 3962 d0=(int)(pow(m_mpiInfo->size*(n0+1)*(n0+1)/((float)(n1+1)*(n2+1)), 1.f/3));
64 caltinay 4010 d0=max(1, d0);
65     d1=max(1, (int)(d0*n1/(float)n0));
66 caltinay 3943 d2=m_mpiInfo->size/(d0*d1);
67     if (d0*d1*d2 != m_mpiInfo->size) {
68     // ratios not the same so leave "smallest" side undivided and try
69     // dividing 2 sides only
70     if (n0>=n1) {
71     if (n1>=n2) {
72     d0=d1=0;
73     d2=1;
74     } else {
75     d0=d2=0;
76     d1=1;
77     }
78     } else {
79     if (n0>=n2) {
80     d0=d1=0;
81     d2=1;
82     } else {
83     d0=1;
84     d1=d2=0;
85     }
86     }
87     }
88     }
89     if (d0<=0 && d1<=0) {
90     warn=true;
91 caltinay 4010 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1))));
92 caltinay 3943 d1=m_mpiInfo->size/d0;
93     if (d0*d1*d2 != m_mpiInfo->size) {
94     // ratios not the same so subdivide side with more elements only
95     if (n0>n1) {
96 caltinay 3944 d0=0;
97 caltinay 3943 d1=1;
98     } else {
99     d0=1;
100 caltinay 3944 d1=0;
101 caltinay 3943 }
102     }
103     } else if (d0<=0 && d2<=0) {
104     warn=true;
105 caltinay 4010 d0=max(1, int(sqrt(m_mpiInfo->size*(n0+1)/(float)(n2+1))));
106 caltinay 3943 d2=m_mpiInfo->size/d0;
107     if (d0*d1*d2 != m_mpiInfo->size) {
108     // ratios not the same so subdivide side with more elements only
109     if (n0>n2) {
110 caltinay 3944 d0=0;
111 caltinay 3943 d2=1;
112     } else {
113     d0=1;
114 caltinay 3944 d2=0;
115 caltinay 3943 }
116     }
117     } else if (d1<=0 && d2<=0) {
118     warn=true;
119 caltinay 4010 d1=max(1, int(sqrt(m_mpiInfo->size*(n1+1)/(float)(n2+1))));
120 caltinay 3943 d2=m_mpiInfo->size/d1;
121     if (d0*d1*d2 != m_mpiInfo->size) {
122     // ratios not the same so subdivide side with more elements only
123     if (n1>n2) {
124 caltinay 3944 d1=0;
125 caltinay 3943 d2=1;
126     } else {
127     d1=1;
128 caltinay 3944 d2=0;
129 caltinay 3943 }
130     }
131     }
132     if (d0<=0) {
133     // d1,d2 are preset, determine d0
134     d0=m_mpiInfo->size/(d1*d2);
135     } else if (d1<=0) {
136     // d0,d2 are preset, determine d1
137     d1=m_mpiInfo->size/(d0*d2);
138     } else if (d2<=0) {
139     // d0,d1 are preset, determine d2
140     d2=m_mpiInfo->size/(d0*d1);
141     }
142    
143 caltinay 3691 // ensure number of subdivisions is valid and nodes can be distributed
144     // among number of ranks
145 caltinay 4334 if (d0*d1*d2 != m_mpiInfo->size)
146 caltinay 3691 throw RipleyException("Invalid number of spatial subdivisions");
147    
148 caltinay 3943 if (warn) {
149     cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
150     << d1 << ", d2=" << d2 << "). This may not be optimal!" << endl;
151     }
152 caltinay 3691
153 caltinay 4334 double l0 = x1-x0;
154     double l1 = y1-y0;
155     double l2 = z1-z0;
156     m_dx[0] = l0/n0;
157     m_dx[1] = l1/n1;
158     m_dx[2] = l2/n2;
159    
160     if ((n0+1)%d0 > 0) {
161 caltinay 3943 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
162 caltinay 4334 l0=m_dx[0]*n0;
163 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N0="
164 caltinay 4334 << n0 << ", l0=" << l0 << endl;
165 caltinay 3943 }
166 caltinay 4334 if ((n1+1)%d1 > 0) {
167 caltinay 3943 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
168 caltinay 4334 l1=m_dx[1]*n1;
169 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N1="
170 caltinay 4334 << n1 << ", l1=" << l1 << endl;
171 caltinay 3943 }
172 caltinay 4334 if ((n2+1)%d2 > 0) {
173 caltinay 3943 n2=(int)round((float)(n2+1)/d2+0.5)*d2-1;
174 caltinay 4334 l2=m_dx[2]*n2;
175 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N2="
176 caltinay 4334 << n2 << ", l2=" << l2 << endl;
177 caltinay 3943 }
178    
179 caltinay 4334 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
180 caltinay 3753 throw RipleyException("Too few elements for the number of ranks");
181    
182 caltinay 4334 m_gNE[0] = n0;
183     m_gNE[1] = n1;
184     m_gNE[2] = n2;
185     m_origin[0] = x0;
186     m_origin[1] = y0;
187     m_origin[2] = z0;
188     m_length[0] = l0;
189     m_length[1] = l1;
190     m_length[2] = l2;
191     m_NX[0] = d0;
192     m_NX[1] = d1;
193     m_NX[2] = d2;
194    
195 caltinay 3753 // local number of elements (including overlap)
196 caltinay 4334 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
197     if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
198     m_NE[0]++;
199     else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
200     m_ownNE[0]--;
201 caltinay 3764
202 caltinay 4334 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
203     if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
204     m_NE[1]++;
205     else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
206     m_ownNE[1]--;
207 caltinay 3764
208 caltinay 4334 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
209     if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
210     m_NE[2]++;
211     else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
212     m_ownNE[2]--;
213 caltinay 3753
214     // local number of nodes
215 caltinay 4334 m_NN[0] = m_NE[0]+1;
216     m_NN[1] = m_NE[1]+1;
217     m_NN[2] = m_NE[2]+1;
218 caltinay 3753
219 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
220 caltinay 4334 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
221     if (m_offset[0] > 0)
222     m_offset[0]--;
223     m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
224     if (m_offset[1] > 0)
225     m_offset[1]--;
226     m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
227     if (m_offset[2] > 0)
228     m_offset[2]--;
229 caltinay 3753
230 caltinay 3691 populateSampleIds();
231 caltinay 3756 createPattern();
232 sshaw 4622
233     assembler = new DefaultAssembler3D(this, m_dx, m_NX, m_NE, m_NN);
234 sshaw 4629 for (map<string, int>::const_iterator i = tagnamestonums.begin();
235     i != tagnamestonums.end(); i++) {
236     setTagMap(i->first, i->second);
237     }
238 sshaw 4622 addPoints(tags.size(), &points[0], &tags[0]);
239 caltinay 3691 }
240    
241    
242     Brick::~Brick()
243     {
244 caltinay 3785 Paso_SystemMatrixPattern_free(m_pattern);
245     Paso_Connector_free(m_connector);
246 sshaw 4622 delete assembler;
247 caltinay 3691 }
248    
249     string Brick::getDescription() const
250     {
251     return "ripley::Brick";
252     }
253    
254     bool Brick::operator==(const AbstractDomain& other) const
255     {
256 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
257     if (o) {
258     return (RipleyDomain::operator==(other) &&
259 caltinay 4334 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
260     && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
261     && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
262     && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
263 caltinay 3744 }
264 caltinay 3691
265     return false;
266     }
267    
268 caltinay 4013 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
269 caltinay 4618 const ReaderParameters& params) const
270 caltinay 4013 {
271     #ifdef USE_NETCDF
272     // check destination function space
273     int myN0, myN1, myN2;
274     if (out.getFunctionSpace().getTypeCode() == Nodes) {
275 caltinay 4334 myN0 = m_NN[0];
276     myN1 = m_NN[1];
277     myN2 = m_NN[2];
278 caltinay 4013 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
279     out.getFunctionSpace().getTypeCode() == ReducedElements) {
280 caltinay 4334 myN0 = m_NE[0];
281     myN1 = m_NE[1];
282     myN2 = m_NE[2];
283 caltinay 4013 } else
284     throw RipleyException("readNcGrid(): invalid function space for output data object");
285    
286 caltinay 4615 if (params.first.size() != 3)
287 caltinay 4013 throw RipleyException("readNcGrid(): argument 'first' must have 3 entries");
288    
289 caltinay 4615 if (params.numValues.size() != 3)
290 caltinay 4013 throw RipleyException("readNcGrid(): argument 'numValues' must have 3 entries");
291    
292 caltinay 4615 if (params.multiplier.size() != 3)
293 caltinay 4277 throw RipleyException("readNcGrid(): argument 'multiplier' must have 3 entries");
294 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
295     if (params.multiplier[i]<1)
296 caltinay 4277 throw RipleyException("readNcGrid(): all multipliers must be positive");
297    
298 caltinay 4013 // check file existence and size
299     NcFile f(filename.c_str(), NcFile::ReadOnly);
300     if (!f.is_valid())
301     throw RipleyException("readNcGrid(): cannot open file");
302    
303     NcVar* var = f.get_var(varname.c_str());
304     if (!var)
305     throw RipleyException("readNcGrid(): invalid variable name");
306    
307     // TODO: rank>0 data support
308     const int numComp = out.getDataPointSize();
309     if (numComp > 1)
310     throw RipleyException("readNcGrid(): only scalar data supported");
311    
312     const int dims = var->num_dims();
313 caltinay 4477 boost::scoped_array<long> edges(var->edges());
314 caltinay 4013
315     // is this a slice of the data object (dims!=3)?
316     // note the expected ordering of edges (as in numpy: z,y,x)
317 caltinay 4615 if ( (dims==3 && (params.numValues[2] > edges[0] ||
318     params.numValues[1] > edges[1] ||
319     params.numValues[0] > edges[2]))
320     || (dims==2 && params.numValues[2]>1)
321     || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
322 caltinay 4013 throw RipleyException("readNcGrid(): not enough data in file");
323     }
324    
325     // check if this rank contributes anything
326 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
327     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
328     params.first[1] >= m_offset[1]+myN1 ||
329     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
330     params.first[2] >= m_offset[2]+myN2 ||
331     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
332 caltinay 4013 return;
333     }
334    
335     // now determine how much this rank has to write
336    
337     // first coordinates in data object to write to
338 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
339     const int first1 = max(0, params.first[1]-m_offset[1]);
340     const int first2 = max(0, params.first[2]-m_offset[2]);
341 caltinay 4618 // indices to first value in file (not accounting for reverse yet)
342     int idx0 = max(0, m_offset[0]-params.first[0]);
343     int idx1 = max(0, m_offset[1]-params.first[1]);
344     int idx2 = max(0, m_offset[2]-params.first[2]);
345 caltinay 4277 // number of values to read
346 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
347     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
348     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
349 caltinay 4013
350 caltinay 4618 // make sure we read the right block if going backwards through file
351     if (params.reverse[0])
352     idx0 = edges[dims-1]-num0-idx0;
353     if (dims>1 && params.reverse[1])
354     idx1 = edges[dims-2]-num1-idx1;
355     if (dims>2 && params.reverse[2])
356     idx2 = edges[dims-3]-num2-idx2;
357    
358    
359 caltinay 4013 vector<double> values(num0*num1*num2);
360     if (dims==3) {
361     var->set_cur(idx2, idx1, idx0);
362     var->get(&values[0], num2, num1, num0);
363     } else if (dims==2) {
364     var->set_cur(idx1, idx0);
365     var->get(&values[0], num1, num0);
366     } else {
367     var->set_cur(idx0);
368     var->get(&values[0], num0);
369     }
370    
371     const int dpp = out.getNumDataPointsPerSample();
372     out.requireWrite();
373    
374 caltinay 4618 // helpers for reversing
375     const int x0 = (params.reverse[0] ? num0-1 : 0);
376     const int x_mult = (params.reverse[0] ? -1 : 1);
377     const int y0 = (params.reverse[1] ? num1-1 : 0);
378     const int y_mult = (params.reverse[1] ? -1 : 1);
379     const int z0 = (params.reverse[2] ? num2-1 : 0);
380     const int z_mult = (params.reverse[2] ? -1 : 1);
381    
382 caltinay 4013 for (index_t z=0; z<num2; z++) {
383     for (index_t y=0; y<num1; y++) {
384     #pragma omp parallel for
385     for (index_t x=0; x<num0; x++) {
386 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
387     +(first1+y*params.multiplier[1])*myN0
388     +(first2+z*params.multiplier[2])*myN0*myN1;
389 caltinay 4618 const int srcIndex=(z0+z_mult*z)*num1*num0
390     +(y0+y_mult*y)*num0
391     +(x0+x_mult*x);
392 caltinay 4174 if (!isnan(values[srcIndex])) {
393 caltinay 4615 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
394     for (index_t m1=0; m1<params.multiplier[1]; m1++) {
395     for (index_t m0=0; m0<params.multiplier[0]; m0++) {
396 caltinay 4277 const int dataIndex = baseIndex+m0
397     +m1*myN0
398     +m2*myN0*myN1;
399     double* dest = out.getSampleDataRW(dataIndex);
400     for (index_t q=0; q<dpp; q++) {
401     *dest++ = values[srcIndex];
402     }
403     }
404     }
405 caltinay 4174 }
406 caltinay 4013 }
407     }
408     }
409     }
410     #else
411     throw RipleyException("readNcGrid(): not compiled with netCDF support");
412     #endif
413     }
414    
415 caltinay 4334 void Brick::readBinaryGrid(escript::Data& out, string filename,
416 caltinay 4618 const ReaderParameters& params) const
417 caltinay 4334 {
418 caltinay 4495 // the mapping is not universally correct but should work on our
419     // supported platforms
420 caltinay 4615 switch (params.dataType) {
421 caltinay 4495 case DATATYPE_INT32:
422 caltinay 4615 readBinaryGridImpl<int>(out, filename, params);
423 caltinay 4495 break;
424     case DATATYPE_FLOAT32:
425 caltinay 4615 readBinaryGridImpl<float>(out, filename, params);
426 caltinay 4495 break;
427     case DATATYPE_FLOAT64:
428 caltinay 4615 readBinaryGridImpl<double>(out, filename, params);
429 caltinay 4495 break;
430     default:
431     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
432     }
433     }
434    
435     template<typename ValueType>
436     void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
437 caltinay 4618 const ReaderParameters& params) const
438 caltinay 4495 {
439 caltinay 4334 // check destination function space
440     int myN0, myN1, myN2;
441     if (out.getFunctionSpace().getTypeCode() == Nodes) {
442     myN0 = m_NN[0];
443     myN1 = m_NN[1];
444     myN2 = m_NN[2];
445     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
446     out.getFunctionSpace().getTypeCode() == ReducedElements) {
447     myN0 = m_NE[0];
448     myN1 = m_NE[1];
449     myN2 = m_NE[2];
450     } else
451     throw RipleyException("readBinaryGrid(): invalid function space for output data object");
452    
453 caltinay 4615 if (params.first.size() != 3)
454 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'first' must have 3 entries");
455    
456 caltinay 4615 if (params.numValues.size() != 3)
457 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'numValues' must have 3 entries");
458    
459 caltinay 4615 if (params.multiplier.size() != 3)
460 caltinay 4334 throw RipleyException("readBinaryGrid(): argument 'multiplier' must have 3 entries");
461 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
462     if (params.multiplier[i]<1)
463 caltinay 4334 throw RipleyException("readBinaryGrid(): all multipliers must be positive");
464    
465     // check file existence and size
466     ifstream f(filename.c_str(), ifstream::binary);
467     if (f.fail()) {
468     throw RipleyException("readBinaryGrid(): cannot open file");
469     }
470     f.seekg(0, ios::end);
471     const int numComp = out.getDataPointSize();
472     const int filesize = f.tellg();
473 caltinay 4615 const int reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
474 caltinay 4334 if (filesize < reqsize) {
475     f.close();
476     throw RipleyException("readBinaryGrid(): not enough data in file");
477     }
478    
479     // check if this rank contributes anything
480 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
481     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
482     params.first[1] >= m_offset[1]+myN1 ||
483     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
484     params.first[2] >= m_offset[2]+myN2 ||
485     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
486 caltinay 4334 f.close();
487     return;
488     }
489    
490     // now determine how much this rank has to write
491    
492     // first coordinates in data object to write to
493 caltinay 4615 const int first0 = max(0, params.first[0]-m_offset[0]);
494     const int first1 = max(0, params.first[1]-m_offset[1]);
495     const int first2 = max(0, params.first[2]-m_offset[2]);
496 caltinay 4334 // indices to first value in file
497 caltinay 4615 const int idx0 = max(0, m_offset[0]-params.first[0]);
498     const int idx1 = max(0, m_offset[1]-params.first[1]);
499     const int idx2 = max(0, m_offset[2]-params.first[2]);
500 caltinay 4334 // number of values to read
501 caltinay 4615 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
502     const int num1 = min(params.numValues[1]-idx1, myN1-first1);
503     const int num2 = min(params.numValues[2]-idx2, myN2-first2);
504 caltinay 4334
505     out.requireWrite();
506 caltinay 4495 vector<ValueType> values(num0*numComp);
507 caltinay 4334 const int dpp = out.getNumDataPointsPerSample();
508    
509 caltinay 4529 for (int z=0; z<num2; z++) {
510     for (int y=0; y<num1; y++) {
511 caltinay 4615 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
512     +(idx2+z)*params.numValues[0]*params.numValues[1]);
513 caltinay 4495 f.seekg(fileofs*sizeof(ValueType));
514     f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
515 caltinay 4334
516 caltinay 4529 for (int x=0; x<num0; x++) {
517 caltinay 4615 const int baseIndex = first0+x*params.multiplier[0]
518     +(first1+y*params.multiplier[1])*myN0
519     +(first2+z*params.multiplier[2])*myN0*myN1;
520     for (int m2=0; m2<params.multiplier[2]; m2++) {
521     for (int m1=0; m1<params.multiplier[1]; m1++) {
522     for (int m0=0; m0<params.multiplier[0]; m0++) {
523 caltinay 4334 const int dataIndex = baseIndex+m0
524     +m1*myN0
525     +m2*myN0*myN1;
526     double* dest = out.getSampleDataRW(dataIndex);
527 caltinay 4529 for (int c=0; c<numComp; c++) {
528     ValueType val = values[x*numComp+c];
529    
530 caltinay 4615 if (params.byteOrder != BYTEORDER_NATIVE) {
531 caltinay 4529 char* cval = reinterpret_cast<char*>(&val);
532     // this will alter val!!
533     byte_swap32(cval);
534     }
535     if (!std::isnan(val)) {
536     for (int q=0; q<dpp; q++) {
537     *dest++ = static_cast<double>(val);
538 caltinay 4334 }
539     }
540     }
541     }
542     }
543     }
544     }
545     }
546     }
547    
548     f.close();
549     }
550    
551 caltinay 4357 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
552     int byteOrder, int dataType) const
553 caltinay 4334 {
554 caltinay 4357 // the mapping is not universally correct but should work on our
555     // supported platforms
556     switch (dataType) {
557     case DATATYPE_INT32:
558     writeBinaryGridImpl<int>(in, filename, byteOrder);
559     break;
560     case DATATYPE_FLOAT32:
561     writeBinaryGridImpl<float>(in, filename, byteOrder);
562     break;
563     case DATATYPE_FLOAT64:
564     writeBinaryGridImpl<double>(in, filename, byteOrder);
565     break;
566     default:
567     throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
568     }
569     }
570    
571     template<typename ValueType>
572     void Brick::writeBinaryGridImpl(const escript::Data& in,
573     const string& filename, int byteOrder) const
574     {
575 caltinay 4334 // check function space and determine number of points
576     int myN0, myN1, myN2;
577     int totalN0, totalN1, totalN2;
578     if (in.getFunctionSpace().getTypeCode() == Nodes) {
579     myN0 = m_NN[0];
580     myN1 = m_NN[1];
581     myN2 = m_NN[2];
582     totalN0 = m_gNE[0]+1;
583     totalN1 = m_gNE[1]+1;
584     totalN2 = m_gNE[2]+1;
585     } else if (in.getFunctionSpace().getTypeCode() == Elements ||
586     in.getFunctionSpace().getTypeCode() == ReducedElements) {
587     myN0 = m_NE[0];
588     myN1 = m_NE[1];
589     myN2 = m_NE[2];
590     totalN0 = m_gNE[0];
591     totalN1 = m_gNE[1];
592     totalN2 = m_gNE[2];
593     } else
594     throw RipleyException("writeBinaryGrid(): invalid function space of data object");
595    
596     const int numComp = in.getDataPointSize();
597     const int dpp = in.getNumDataPointsPerSample();
598 caltinay 4357 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
599 caltinay 4334
600     if (numComp > 1 || dpp > 1)
601     throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
602    
603     // from here on we know that each sample consists of one value
604 caltinay 4482 FileWriter fw;
605     fw.openFile(filename, fileSize);
606 caltinay 4334 MPIBarrier();
607    
608     for (index_t z=0; z<myN2; z++) {
609     for (index_t y=0; y<myN1; y++) {
610     const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0
611 caltinay 4357 +(m_offset[2]+z)*totalN0*totalN1)*sizeof(ValueType);
612 caltinay 4334 ostringstream oss;
613    
614     for (index_t x=0; x<myN0; x++) {
615 caltinay 4626 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
616 caltinay 4357 ValueType fvalue = static_cast<ValueType>(*sample);
617     if (byteOrder == BYTEORDER_NATIVE) {
618 caltinay 4334 oss.write((char*)&fvalue, sizeof(fvalue));
619     } else {
620     char* value = reinterpret_cast<char*>(&fvalue);
621 caltinay 4357 oss.write(byte_swap32(value), sizeof(fvalue));
622 caltinay 4334 }
623     }
624 caltinay 4482 fw.writeAt(oss, fileofs);
625 caltinay 4334 }
626     }
627 caltinay 4482 fw.close();
628 caltinay 4334 }
629    
630 caltinay 3691 void Brick::dump(const string& fileName) const
631     {
632     #if USE_SILO
633     string fn(fileName);
634     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
635     fn+=".silo";
636     }
637    
638     int driver=DB_HDF5;
639     string siloPath;
640     DBfile* dbfile = NULL;
641    
642     #ifdef ESYS_MPI
643     PMPIO_baton_t* baton = NULL;
644 gross 3793 const int NUM_SILO_FILES = 1;
645     const char* blockDirFmt = "/block%04d";
646 caltinay 3691 #endif
647    
648     if (m_mpiInfo->size > 1) {
649     #ifdef ESYS_MPI
650     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
651     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
652     PMPIO_DefaultClose, (void*)&driver);
653     // try the fallback driver in case of error
654     if (!baton && driver != DB_PDB) {
655     driver = DB_PDB;
656     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
657     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
658     PMPIO_DefaultClose, (void*)&driver);
659     }
660     if (baton) {
661     char str[64];
662     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
663     siloPath = str;
664     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
665     }
666     #endif
667     } else {
668     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
669     getDescription().c_str(), driver);
670     // try the fallback driver in case of error
671     if (!dbfile && driver != DB_PDB) {
672     driver = DB_PDB;
673     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
674     getDescription().c_str(), driver);
675     }
676     }
677    
678     if (!dbfile)
679     throw RipleyException("dump: Could not create Silo file");
680    
681     /*
682     if (driver==DB_HDF5) {
683     // gzip level 1 already provides good compression with minimal
684     // performance penalty. Some tests showed that gzip levels >3 performed
685     // rather badly on escript data both in terms of time and space
686     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
687     }
688     */
689    
690 caltinay 4334 boost::scoped_ptr<double> x(new double[m_NN[0]]);
691     boost::scoped_ptr<double> y(new double[m_NN[1]]);
692     boost::scoped_ptr<double> z(new double[m_NN[2]]);
693 caltinay 3691 double* coords[3] = { x.get(), y.get(), z.get() };
694     #pragma omp parallel
695     {
696     #pragma omp for
697 caltinay 4334 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
698     coords[0][i0]=getLocalCoordinate(i0, 0);
699 caltinay 3691 }
700     #pragma omp for
701 caltinay 4334 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
702     coords[1][i1]=getLocalCoordinate(i1, 1);
703 caltinay 3691 }
704     #pragma omp for
705 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
706     coords[2][i2]=getLocalCoordinate(i2, 2);
707 caltinay 3691 }
708     }
709 caltinay 4334 int* dims = const_cast<int*>(getNumNodesPerDim());
710    
711     // write mesh
712     DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 3, DB_DOUBLE,
713 caltinay 3691 DB_COLLINEAR, NULL);
714    
715 caltinay 4334 // write node ids
716     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 3,
717 caltinay 3698 NULL, 0, DB_INT, DB_NODECENT, NULL);
718 caltinay 3691
719 caltinay 3698 // write element ids
720 caltinay 4334 dims = const_cast<int*>(getNumElementsPerDim());
721 caltinay 3698 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
722 caltinay 4334 dims, 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
723 caltinay 3698
724     // rank 0 writes multimesh and multivar
725 caltinay 3691 if (m_mpiInfo->rank == 0) {
726     vector<string> tempstrings;
727     vector<char*> names;
728     for (dim_t i=0; i<m_mpiInfo->size; i++) {
729     stringstream path;
730     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
731     tempstrings.push_back(path.str());
732     names.push_back((char*)tempstrings.back().c_str());
733     }
734     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
735     DBSetDir(dbfile, "/");
736     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
737     &types[0], NULL);
738     tempstrings.clear();
739     names.clear();
740     for (dim_t i=0; i<m_mpiInfo->size; i++) {
741     stringstream path;
742     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
743     tempstrings.push_back(path.str());
744     names.push_back((char*)tempstrings.back().c_str());
745     }
746     types.assign(m_mpiInfo->size, DB_QUADVAR);
747     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
748     &types[0], NULL);
749 caltinay 3698 tempstrings.clear();
750     names.clear();
751     for (dim_t i=0; i<m_mpiInfo->size; i++) {
752     stringstream path;
753     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
754     tempstrings.push_back(path.str());
755     names.push_back((char*)tempstrings.back().c_str());
756     }
757     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
758     &types[0], NULL);
759 caltinay 3691 }
760    
761     if (m_mpiInfo->size > 1) {
762     #ifdef ESYS_MPI
763     PMPIO_HandOffBaton(baton, dbfile);
764     PMPIO_Finish(baton);
765     #endif
766     } else {
767     DBClose(dbfile);
768     }
769    
770     #else // USE_SILO
771 caltinay 3791 throw RipleyException("dump: no Silo support");
772 caltinay 3691 #endif
773     }
774    
775     const int* Brick::borrowSampleReferenceIDs(int fsType) const
776     {
777 caltinay 3697 switch (fsType) {
778     case Nodes:
779 caltinay 3748 case ReducedNodes: //FIXME: reduced
780 caltinay 3753 return &m_nodeId[0];
781 caltinay 3757 case DegreesOfFreedom:
782     case ReducedDegreesOfFreedom: //FIXME: reduced
783 caltinay 3753 return &m_dofId[0];
784 caltinay 3697 case Elements:
785 caltinay 3733 case ReducedElements:
786 caltinay 3697 return &m_elementId[0];
787 caltinay 3757 case FaceElements:
788 caltinay 3733 case ReducedFaceElements:
789 caltinay 3697 return &m_faceId[0];
790 sshaw 4660 case Points:
791     return &m_diracPointNodeIDs[0];
792 caltinay 3697 default:
793     break;
794     }
795 caltinay 3691
796 caltinay 3697 stringstream msg;
797 caltinay 3791 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
798 caltinay 3697 throw RipleyException(msg.str());
799 caltinay 3691 }
800    
801 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
802 caltinay 3691 {
803 caltinay 3759 if (getMPISize()==1)
804     return true;
805    
806 caltinay 3757 switch (fsType) {
807     case Nodes:
808     case ReducedNodes: //FIXME: reduced
809     return (m_dofMap[id] < getNumDOF());
810     case DegreesOfFreedom:
811     case ReducedDegreesOfFreedom:
812     return true;
813     case Elements:
814     case ReducedElements:
815     {
816     // check ownership of element's _last_ node
817 caltinay 4334 const index_t x=id%m_NE[0] + 1;
818     const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
819     const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
820     return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
821 caltinay 3757 }
822     case FaceElements:
823     case ReducedFaceElements:
824 caltinay 3759 {
825     // check ownership of face element's last node
826     dim_t n=0;
827 caltinay 4334 for (size_t i=0; i<6; i++) {
828     n+=m_faceCount[i];
829 caltinay 3759 if (id<n) {
830 caltinay 4334 const index_t j=id-n+m_faceCount[i];
831 caltinay 3759 if (i>=4) { // front or back
832 caltinay 4334 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
833     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
834 caltinay 3759 } else if (i>=2) { // bottom or top
835 caltinay 4334 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
836     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
837 caltinay 3759 } else { // left or right
838 caltinay 4334 const index_t first=(i==0 ? 0 : m_NN[0]-1);
839     return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
840 caltinay 3759 }
841     }
842     }
843     return false;
844     }
845 caltinay 3757 default:
846     break;
847 caltinay 3753 }
848 caltinay 3757
849     stringstream msg;
850 caltinay 3791 msg << "ownSample: invalid function space type " << fsType;
851 caltinay 3757 throw RipleyException(msg.str());
852 caltinay 3691 }
853    
854 caltinay 3764 void Brick::setToNormal(escript::Data& out) const
855 caltinay 3703 {
856 caltinay 3764 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
857     out.requireWrite();
858     #pragma omp parallel
859     {
860     if (m_faceOffset[0] > -1) {
861     #pragma omp for nowait
862 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
863     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
864     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
865 caltinay 3764 // set vector at four quadrature points
866     *o++ = -1.; *o++ = 0.; *o++ = 0.;
867     *o++ = -1.; *o++ = 0.; *o++ = 0.;
868     *o++ = -1.; *o++ = 0.; *o++ = 0.;
869     *o++ = -1.; *o++ = 0.; *o = 0.;
870     }
871     }
872     }
873    
874     if (m_faceOffset[1] > -1) {
875     #pragma omp for nowait
876 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
877     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
878     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
879 caltinay 3764 // set vector at four quadrature points
880     *o++ = 1.; *o++ = 0.; *o++ = 0.;
881     *o++ = 1.; *o++ = 0.; *o++ = 0.;
882     *o++ = 1.; *o++ = 0.; *o++ = 0.;
883     *o++ = 1.; *o++ = 0.; *o = 0.;
884     }
885     }
886     }
887    
888     if (m_faceOffset[2] > -1) {
889     #pragma omp for nowait
890 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
891     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
892     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
893 caltinay 3764 // set vector at four quadrature points
894     *o++ = 0.; *o++ = -1.; *o++ = 0.;
895     *o++ = 0.; *o++ = -1.; *o++ = 0.;
896     *o++ = 0.; *o++ = -1.; *o++ = 0.;
897     *o++ = 0.; *o++ = -1.; *o = 0.;
898     }
899     }
900     }
901    
902     if (m_faceOffset[3] > -1) {
903     #pragma omp for nowait
904 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
905     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
906     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
907 caltinay 3764 // set vector at four quadrature points
908     *o++ = 0.; *o++ = 1.; *o++ = 0.;
909     *o++ = 0.; *o++ = 1.; *o++ = 0.;
910     *o++ = 0.; *o++ = 1.; *o++ = 0.;
911     *o++ = 0.; *o++ = 1.; *o = 0.;
912     }
913     }
914     }
915    
916     if (m_faceOffset[4] > -1) {
917     #pragma omp for nowait
918 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
919     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
920     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
921 caltinay 3764 // set vector at four quadrature points
922     *o++ = 0.; *o++ = 0.; *o++ = -1.;
923     *o++ = 0.; *o++ = 0.; *o++ = -1.;
924     *o++ = 0.; *o++ = 0.; *o++ = -1.;
925     *o++ = 0.; *o++ = 0.; *o = -1.;
926     }
927     }
928     }
929    
930     if (m_faceOffset[5] > -1) {
931     #pragma omp for nowait
932 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
933     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
934     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
935 caltinay 3764 // set vector at four quadrature points
936     *o++ = 0.; *o++ = 0.; *o++ = 1.;
937     *o++ = 0.; *o++ = 0.; *o++ = 1.;
938     *o++ = 0.; *o++ = 0.; *o++ = 1.;
939     *o++ = 0.; *o++ = 0.; *o = 1.;
940     }
941     }
942     }
943     } // end of parallel section
944     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
945     out.requireWrite();
946     #pragma omp parallel
947     {
948     if (m_faceOffset[0] > -1) {
949     #pragma omp for nowait
950 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
951     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
952     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
953 caltinay 3764 *o++ = -1.;
954     *o++ = 0.;
955     *o = 0.;
956     }
957     }
958     }
959    
960     if (m_faceOffset[1] > -1) {
961     #pragma omp for nowait
962 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
963     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
964     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
965 caltinay 3764 *o++ = 1.;
966     *o++ = 0.;
967     *o = 0.;
968     }
969     }
970     }
971    
972     if (m_faceOffset[2] > -1) {
973     #pragma omp for nowait
974 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
975     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
976     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
977 caltinay 3764 *o++ = 0.;
978     *o++ = -1.;
979     *o = 0.;
980     }
981     }
982     }
983    
984     if (m_faceOffset[3] > -1) {
985     #pragma omp for nowait
986 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
987     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
988     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
989 caltinay 3764 *o++ = 0.;
990     *o++ = 1.;
991     *o = 0.;
992     }
993     }
994     }
995    
996     if (m_faceOffset[4] > -1) {
997     #pragma omp for nowait
998 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
999     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1000     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1001 caltinay 3764 *o++ = 0.;
1002     *o++ = 0.;
1003     *o = -1.;
1004     }
1005     }
1006     }
1007    
1008     if (m_faceOffset[5] > -1) {
1009     #pragma omp for nowait
1010 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1011     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1012     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1013 caltinay 3764 *o++ = 0.;
1014     *o++ = 0.;
1015     *o = 1.;
1016     }
1017     }
1018     }
1019     } // end of parallel section
1020    
1021     } else {
1022     stringstream msg;
1023 caltinay 3791 msg << "setToNormal: invalid function space type "
1024     << out.getFunctionSpace().getTypeCode();
1025 caltinay 3764 throw RipleyException(msg.str());
1026     }
1027     }
1028    
1029     void Brick::setToSize(escript::Data& out) const
1030     {
1031     if (out.getFunctionSpace().getTypeCode() == Elements
1032     || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1033     out.requireWrite();
1034     const dim_t numQuad=out.getNumDataPointsPerSample();
1035 caltinay 4334 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1036 caltinay 3764 #pragma omp parallel for
1037     for (index_t k = 0; k < getNumElements(); ++k) {
1038     double* o = out.getSampleDataRW(k);
1039     fill(o, o+numQuad, size);
1040     }
1041     } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1042     || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1043     out.requireWrite();
1044     const dim_t numQuad=out.getNumDataPointsPerSample();
1045     #pragma omp parallel
1046     {
1047     if (m_faceOffset[0] > -1) {
1048 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1049 caltinay 3764 #pragma omp for nowait
1050 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1051     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1052     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1053 caltinay 3764 fill(o, o+numQuad, size);
1054     }
1055     }
1056     }
1057    
1058     if (m_faceOffset[1] > -1) {
1059 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1060 caltinay 3764 #pragma omp for nowait
1061 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1062     for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1063     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1064 caltinay 3764 fill(o, o+numQuad, size);
1065     }
1066     }
1067     }
1068    
1069     if (m_faceOffset[2] > -1) {
1070 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1071 caltinay 3764 #pragma omp for nowait
1072 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1073     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1074     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1075 caltinay 3764 fill(o, o+numQuad, size);
1076     }
1077     }
1078     }
1079    
1080     if (m_faceOffset[3] > -1) {
1081 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1082 caltinay 3764 #pragma omp for nowait
1083 caltinay 4334 for (index_t k2 = 0; k2 < m_NE[2]; ++k2) {
1084     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1085     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1086 caltinay 3764 fill(o, o+numQuad, size);
1087     }
1088     }
1089     }
1090    
1091     if (m_faceOffset[4] > -1) {
1092 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1093 caltinay 3764 #pragma omp for nowait
1094 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1095     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1096     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1097 caltinay 3764 fill(o, o+numQuad, size);
1098     }
1099     }
1100     }
1101    
1102     if (m_faceOffset[5] > -1) {
1103 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1104 caltinay 3764 #pragma omp for nowait
1105 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
1106     for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
1107     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1108 caltinay 3764 fill(o, o+numQuad, size);
1109     }
1110     }
1111     }
1112     } // end of parallel section
1113    
1114     } else {
1115     stringstream msg;
1116 caltinay 3791 msg << "setToSize: invalid function space type "
1117     << out.getFunctionSpace().getTypeCode();
1118 caltinay 3764 throw RipleyException(msg.str());
1119     }
1120     }
1121    
1122     void Brick::Print_Mesh_Info(const bool full) const
1123     {
1124     RipleyDomain::Print_Mesh_Info(full);
1125     if (full) {
1126     cout << " Id Coordinates" << endl;
1127     cout.precision(15);
1128     cout.setf(ios::scientific, ios::floatfield);
1129     for (index_t i=0; i < getNumNodes(); i++) {
1130     cout << " " << setw(5) << m_nodeId[i]
1131 caltinay 4334 << " " << getLocalCoordinate(i%m_NN[0], 0)
1132     << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1133     << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << endl;
1134 caltinay 3764 }
1135     }
1136     }
1137    
1138    
1139     //protected
1140     void Brick::assembleCoordinates(escript::Data& arg) const
1141     {
1142     escriptDataC x = arg.getDataC();
1143     int numDim = m_numDim;
1144     if (!isDataPointShapeEqual(&x, 1, &numDim))
1145     throw RipleyException("setToX: Invalid Data object shape");
1146     if (!numSamplesEqual(&x, 1, getNumNodes()))
1147     throw RipleyException("setToX: Illegal number of samples in Data object");
1148    
1149     arg.requireWrite();
1150     #pragma omp parallel for
1151 caltinay 4334 for (dim_t i2 = 0; i2 < m_NN[2]; i2++) {
1152     for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1153     for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1154     double* point = arg.getSampleDataRW(i0+m_NN[0]*i1+m_NN[0]*m_NN[1]*i2);
1155     point[0] = getLocalCoordinate(i0, 0);
1156     point[1] = getLocalCoordinate(i1, 1);
1157     point[2] = getLocalCoordinate(i2, 2);
1158 caltinay 3764 }
1159     }
1160     }
1161     }
1162    
1163     //protected
1164 caltinay 4626 void Brick::assembleGradient(escript::Data& out, const escript::Data& in) const
1165 caltinay 3764 {
1166 caltinay 3703 const dim_t numComp = in.getDataPointSize();
1167 caltinay 3731 const double C0 = .044658198738520451079;
1168     const double C1 = .16666666666666666667;
1169     const double C2 = .21132486540518711775;
1170     const double C3 = .25;
1171     const double C4 = .5;
1172     const double C5 = .62200846792814621559;
1173     const double C6 = .78867513459481288225;
1174    
1175 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
1176 caltinay 3760 out.requireWrite();
1177 caltinay 3913 #pragma omp parallel
1178     {
1179     vector<double> f_000(numComp);
1180     vector<double> f_001(numComp);
1181     vector<double> f_010(numComp);
1182     vector<double> f_011(numComp);
1183     vector<double> f_100(numComp);
1184     vector<double> f_101(numComp);
1185     vector<double> f_110(numComp);
1186     vector<double> f_111(numComp);
1187     #pragma omp for
1188 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1189     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1190     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1191     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1192     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1193     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1194     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1195     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1196     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1197     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1198     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1199     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1200 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1201 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1202     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1203     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1204     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1205     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1206     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1207     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1208     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1209     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1210     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1211     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1212     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1213 caltinay 3913 o[INDEX3(i,0,0,numComp,3)] = V0;
1214     o[INDEX3(i,1,0,numComp,3)] = V4;
1215     o[INDEX3(i,2,0,numComp,3)] = V8;
1216     o[INDEX3(i,0,1,numComp,3)] = V0;
1217     o[INDEX3(i,1,1,numComp,3)] = V5;
1218     o[INDEX3(i,2,1,numComp,3)] = V9;
1219     o[INDEX3(i,0,2,numComp,3)] = V1;
1220     o[INDEX3(i,1,2,numComp,3)] = V4;
1221     o[INDEX3(i,2,2,numComp,3)] = V10;
1222     o[INDEX3(i,0,3,numComp,3)] = V1;
1223     o[INDEX3(i,1,3,numComp,3)] = V5;
1224     o[INDEX3(i,2,3,numComp,3)] = V11;
1225     o[INDEX3(i,0,4,numComp,3)] = V2;
1226     o[INDEX3(i,1,4,numComp,3)] = V6;
1227     o[INDEX3(i,2,4,numComp,3)] = V8;
1228     o[INDEX3(i,0,5,numComp,3)] = V2;
1229     o[INDEX3(i,1,5,numComp,3)] = V7;
1230     o[INDEX3(i,2,5,numComp,3)] = V9;
1231     o[INDEX3(i,0,6,numComp,3)] = V3;
1232     o[INDEX3(i,1,6,numComp,3)] = V6;
1233     o[INDEX3(i,2,6,numComp,3)] = V10;
1234     o[INDEX3(i,0,7,numComp,3)] = V3;
1235     o[INDEX3(i,1,7,numComp,3)] = V7;
1236     o[INDEX3(i,2,7,numComp,3)] = V11;
1237     } // end of component loop i
1238     } // end of k0 loop
1239     } // end of k1 loop
1240     } // end of k2 loop
1241     } // end of parallel section
1242 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1243 caltinay 3760 out.requireWrite();
1244 caltinay 3913 #pragma omp parallel
1245     {
1246     vector<double> f_000(numComp);
1247     vector<double> f_001(numComp);
1248     vector<double> f_010(numComp);
1249     vector<double> f_011(numComp);
1250     vector<double> f_100(numComp);
1251     vector<double> f_101(numComp);
1252     vector<double> f_110(numComp);
1253     vector<double> f_111(numComp);
1254     #pragma omp for
1255 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1256     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1257     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1258     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1259     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1260     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1261     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1262     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1263     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1264     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1265     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1266     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE[0],m_NE[1]));
1267 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1268 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1269     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1270     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1271 caltinay 3913 } // end of component loop i
1272     } // end of k0 loop
1273     } // end of k1 loop
1274     } // end of k2 loop
1275     } // end of parallel section
1276 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1277 caltinay 3760 out.requireWrite();
1278 caltinay 3722 #pragma omp parallel
1279     {
1280 caltinay 3913 vector<double> f_000(numComp);
1281     vector<double> f_001(numComp);
1282     vector<double> f_010(numComp);
1283     vector<double> f_011(numComp);
1284     vector<double> f_100(numComp);
1285     vector<double> f_101(numComp);
1286     vector<double> f_110(numComp);
1287     vector<double> f_111(numComp);
1288 caltinay 3722 if (m_faceOffset[0] > -1) {
1289     #pragma omp for nowait
1290 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1291     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1292     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1293     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1294     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1295     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1296     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1297     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1298     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1299     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1300     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1301 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1302 caltinay 4375 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / m_dx[1];
1303     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / m_dx[1];
1304     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / m_dx[2];
1305     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / m_dx[2];
1306     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1307 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1308     o[INDEX3(i,2,0,numComp,3)] = V2;
1309 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1310 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1311     o[INDEX3(i,2,1,numComp,3)] = V3;
1312 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1313 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1314     o[INDEX3(i,2,2,numComp,3)] = V2;
1315 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1316 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1317     o[INDEX3(i,2,3,numComp,3)] = V3;
1318 caltinay 3764 } // end of component loop i
1319     } // end of k1 loop
1320     } // end of k2 loop
1321     } // end of face 0
1322 caltinay 3722 if (m_faceOffset[1] > -1) {
1323     #pragma omp for nowait
1324 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1325     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1326     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1327     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1328     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1329     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1330     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1331     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1332     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1333     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1334     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1335 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1336 caltinay 4375 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1337     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1338     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1339     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1340     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1341 caltinay 3731 o[INDEX3(i,1,0,numComp,3)] = V0;
1342     o[INDEX3(i,2,0,numComp,3)] = V2;
1343 caltinay 4375 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1344 caltinay 3731 o[INDEX3(i,1,1,numComp,3)] = V0;
1345     o[INDEX3(i,2,1,numComp,3)] = V3;
1346 caltinay 4375 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1347 caltinay 3731 o[INDEX3(i,1,2,numComp,3)] = V1;
1348     o[INDEX3(i,2,2,numComp,3)] = V2;
1349 caltinay 4375 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1350 caltinay 3731 o[INDEX3(i,1,3,numComp,3)] = V1;
1351     o[INDEX3(i,2,3,numComp,3)] = V3;
1352 caltinay 3764 } // end of component loop i
1353     } // end of k1 loop
1354     } // end of k2 loop
1355     } // end of face 1
1356 caltinay 3722 if (m_faceOffset[2] > -1) {
1357     #pragma omp for nowait
1358 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1359     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1360     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1361     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1362     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1363     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1364     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,0,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1365     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1366     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1367     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1368     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1369 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1370 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / m_dx[0];
1371     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / m_dx[2];
1372     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / m_dx[2];
1373 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1374 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1375 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V1;
1376     o[INDEX3(i,0,1,numComp,3)] = V0;
1377 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1378 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V2;
1379     o[INDEX3(i,0,2,numComp,3)] = V0;
1380 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1381 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V1;
1382     o[INDEX3(i,0,3,numComp,3)] = V0;
1383 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1384 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V2;
1385 caltinay 3764 } // end of component loop i
1386     } // end of k0 loop
1387     } // end of k2 loop
1388     } // end of face 2
1389 caltinay 3722 if (m_faceOffset[3] > -1) {
1390     #pragma omp for nowait
1391 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1392     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1393     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1394     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1395     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1396     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1397     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1398     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-2,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1399     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1400     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,m_NN[1]-1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1401     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1402 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1403 caltinay 4375 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1404     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1405     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / m_dx[2];
1406     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / m_dx[2];
1407 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1408 caltinay 4375 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1409 caltinay 3731 o[INDEX3(i,2,0,numComp,3)] = V2;
1410     o[INDEX3(i,0,1,numComp,3)] = V0;
1411 caltinay 4375 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1412 caltinay 3731 o[INDEX3(i,2,1,numComp,3)] = V3;
1413     o[INDEX3(i,0,2,numComp,3)] = V1;
1414 caltinay 4375 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1415 caltinay 3731 o[INDEX3(i,2,2,numComp,3)] = V2;
1416     o[INDEX3(i,0,3,numComp,3)] = V1;
1417 caltinay 4375 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1418 caltinay 3731 o[INDEX3(i,2,3,numComp,3)] = V3;
1419 caltinay 3764 } // end of component loop i
1420     } // end of k0 loop
1421     } // end of k2 loop
1422     } // end of face 3
1423 caltinay 3722 if (m_faceOffset[4] > -1) {
1424     #pragma omp for nowait
1425 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1426     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1427     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1428     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1429     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1430     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1431     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1432     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1433     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_NN[0],m_NN[1])), numComp*sizeof(double));
1434     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1435     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1436 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1437 caltinay 4375 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / m_dx[0];
1438     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / m_dx[0];
1439     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / m_dx[1];
1440     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / m_dx[1];
1441 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1442     o[INDEX3(i,1,0,numComp,3)] = V2;
1443 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1444 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1445     o[INDEX3(i,1,1,numComp,3)] = V3;
1446 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1447 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1448     o[INDEX3(i,1,2,numComp,3)] = V2;
1449 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1450 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1451     o[INDEX3(i,1,3,numComp,3)] = V3;
1452 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1453 caltinay 3764 } // end of component loop i
1454     } // end of k0 loop
1455     } // end of k1 loop
1456     } // end of face 4
1457 caltinay 3722 if (m_faceOffset[5] > -1) {
1458     #pragma omp for nowait
1459 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1460     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1461     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1462     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1463     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1464     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1465     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1466     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1467     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1468     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,m_NN[2]-1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1469     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1470 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1471 caltinay 4375 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / m_dx[0];
1472     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / m_dx[0];
1473     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / m_dx[1];
1474     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / m_dx[1];
1475 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = V0;
1476     o[INDEX3(i,1,0,numComp,3)] = V2;
1477 caltinay 4375 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1478 caltinay 3731 o[INDEX3(i,0,1,numComp,3)] = V0;
1479     o[INDEX3(i,1,1,numComp,3)] = V3;
1480 caltinay 4375 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1481 caltinay 3731 o[INDEX3(i,0,2,numComp,3)] = V1;
1482     o[INDEX3(i,1,2,numComp,3)] = V2;
1483 caltinay 4375 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1484 caltinay 3731 o[INDEX3(i,0,3,numComp,3)] = V1;
1485     o[INDEX3(i,1,3,numComp,3)] = V3;
1486 caltinay 4375 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1487 caltinay 3764 } // end of component loop i
1488     } // end of k0 loop
1489     } // end of k1 loop
1490     } // end of face 5
1491 caltinay 3722 } // end of parallel section
1492 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1493 caltinay 3760 out.requireWrite();
1494 caltinay 3722 #pragma omp parallel
1495     {
1496 caltinay 3913 vector<double> f_000(numComp);
1497     vector<double> f_001(numComp);
1498     vector<double> f_010(numComp);
1499     vector<double> f_011(numComp);
1500     vector<double> f_100(numComp);
1501     vector<double> f_101(numComp);
1502     vector<double> f_110(numComp);
1503     vector<double> f_111(numComp);
1504 caltinay 3722 if (m_faceOffset[0] > -1) {
1505     #pragma omp for nowait
1506 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1507     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1508     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1509     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1510     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1511     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1512     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1513     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1514     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1515     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1516     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1517 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1518 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1519     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / m_dx[1];
1520     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / m_dx[2];
1521 caltinay 3764 } // end of component loop i
1522     } // end of k1 loop
1523     } // end of k2 loop
1524     } // end of face 0
1525 caltinay 3722 if (m_faceOffset[1] > -1) {
1526     #pragma omp for nowait
1527 caltinay 4334 for (index_t k2=0; k2 < m_NE[2]; ++k2) {
1528     for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1529     memcpy(&f_000[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1530     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1531     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1532     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(m_NN[0]-2,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1533     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1534     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1535     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2, m_NN[0],m_NN[1])), numComp*sizeof(double));
1536     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(m_NN[0]-1,k1+1,k2+1, m_NN[0],m_NN[1])), numComp*sizeof(double));
1537     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1538 caltinay 3722 for (index_t i=0; i < numComp; ++i) {
1539 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1540     o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / m_dx