/[escript]/trunk/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /trunk/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6651 - (hide annotations)
Wed Feb 7 02:12:08 2018 UTC (14 months, 1 week ago) by jfenwick
File size: 188494 byte(s)
Make everyone sad by touching all the files

Copyright dates update

1 caltinay 3691
2 jfenwick 3981 /*****************************************************************************
3 caltinay 3691 *
4 jfenwick 6651 * Copyright (c) 2003-2018 by The University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 caltinay 3691 *
7     * Primary Business: Queensland, Australia
8 jfenwick 6112 * Licensed under the Apache License, version 2.0
9     * http://www.apache.org/licenses/LICENSE-2.0
10 caltinay 3691 *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 caltinay 3691
17     #include <ripley/Brick.h>
18 sshaw 4622 #include <ripley/DefaultAssembler3D.h>
19 caltinay 5115 #include <ripley/LameAssembler3D.h>
20 sshaw 4629 #include <ripley/WaveAssembler3D.h>
21 caltinay 5046 #include <ripley/blocktools.h>
22 sshaw 4712 #include <ripley/domainhelpers.h>
23 caltinay 5046
24 caltinay 5968 #include <escript/FileWriter.h>
25 caltinay 6169 #include <escript/index.h>
26 caltinay 5958 #include <escript/Random.h>
27    
28 caltinay 6169 #ifdef ESYS_HAVE_PASO
29 caltinay 5958 #include <paso/SystemMatrix.h>
30 caltinay 6169 #endif
31 caltinay 5958
32 caltinay 6144 #ifdef ESYS_HAVE_NETCDF
33 jfenwick 6505 #ifdef NETCDF4
34     #include <ncVar.h>
35     #include <ncDim.h>
36     #include <escript/NCHelper.h>
37    
38     #else
39     #include <netcdfcpp.h>
40     #endif
41 caltinay 4013 #endif
42    
43 jfenwick 6505
44    
45    
46 caltinay 6144 #ifdef ESYS_HAVE_SILO
47 caltinay 3691 #include <silo.h>
48     #ifdef ESYS_MPI
49     #include <pmpio.h>
50     #endif
51     #endif
52    
53 caltinay 5532 #include <boost/math/special_functions/fpclassify.hpp> // for isnan
54     #include <boost/scoped_array.hpp>
55    
56 caltinay 3691 #include <iomanip>
57 caltinay 5046 #include <limits>
58 caltinay 3691
59 caltinay 5120 namespace bp = boost::python;
60 jfenwick 5535 namespace bm = boost::math;
61 caltinay 5114 using escript::AbstractSystemMatrix;
62 caltinay 5968 using escript::FileWriter;
63 caltinay 5986 using escript::NotImplementedError;
64     using escript::ValueError;
65 jfenwick 5525 using std::vector;
66     using std::string;
67     using std::min;
68     using std::max;
69     using std::ios;
70     using std::fill;
71 caltinay 3691
72     namespace ripley {
73    
74 caltinay 5120 inline int indexOfMax(dim_t a, dim_t b, dim_t c)
75 caltinay 5117 {
76 sshaw 4712 if (a > b) {
77     if (c > a) {
78     return 2;
79     }
80     return 0;
81     } else if (b > c) {
82     return 1;
83     }
84     return 2;
85     }
86    
87 caltinay 5120 Brick::Brick(dim_t n0, dim_t n1, dim_t n2, double x0, double y0, double z0,
88 sshaw 4622 double x1, double y1, double z1, int d0, int d1, int d2,
89 caltinay 5115 const vector<double>& points, const vector<int>& tags,
90 caltinay 5148 const TagMap& tagnamestonums,
91 jfenwick 4934 escript::SubWorld_ptr w) :
92     RipleyDomain(3, w)
93 caltinay 3691 {
94 caltinay 5119 if (static_cast<long>(n0 + 1) * static_cast<long>(n1 + 1)
95 jfenwick 5525 * static_cast<long>(n2 + 1) > std::numeric_limits<dim_t>::max())
96 sshaw 4861 throw RipleyException("The number of elements has overflowed, this "
97     "limit may be raised in future releases.");
98    
99 sshaw 4851 if (n0 <= 0 || n1 <= 0 || n2 <= 0)
100 caltinay 5986 throw ValueError("Number of elements in each spatial dimension "
101 sshaw 4851 "must be positive");
102    
103 caltinay 3943 // ignore subdivision parameters for serial run
104     if (m_mpiInfo->size == 1) {
105     d0=1;
106     d1=1;
107     d2=1;
108     }
109     bool warn=false;
110 sshaw 4712
111 caltinay 5115 vector<int> factors;
112 sshaw 4712 int ranks = m_mpiInfo->size;
113 caltinay 5120 dim_t epr[3] = {n0,n1,n2};
114 sshaw 4712 int d[3] = {d0,d1,d2};
115     if (d0<=0 || d1<=0 || d2<=0) {
116     for (int i = 0; i < 3; i++) {
117     if (d[i] < 1) {
118     d[i] = 1;
119     continue;
120 caltinay 3943 }
121 sshaw 4712 epr[i] = -1; // can no longer be max
122     if (ranks % d[i] != 0) {
123 caltinay 5986 throw ValueError("Invalid number of spatial subdivisions");
124 caltinay 3943 }
125 caltinay 5119 //remove
126 sshaw 4712 ranks /= d[i];
127 caltinay 3943 }
128 sshaw 4712 factorise(factors, ranks);
129     if (factors.size() != 0) {
130     warn = true;
131 caltinay 3943 }
132 caltinay 5119 }
133 sshaw 4712 while (factors.size() > 0) {
134     int i = indexOfMax(epr[0],epr[1],epr[2]);
135     int f = factors.back();
136     factors.pop_back();
137     d[i] *= f;
138     epr[i] /= f;
139 caltinay 3943 }
140 sshaw 4712 d0 = d[0]; d1 = d[1]; d2 = d[2];
141 caltinay 3943
142 caltinay 3691 // ensure number of subdivisions is valid and nodes can be distributed
143     // among number of ranks
144 sshaw 4712 if (d0*d1*d2 != m_mpiInfo->size){
145 caltinay 5986 throw ValueError("Invalid number of spatial subdivisions");
146 sshaw 4712 }
147 caltinay 3943 if (warn) {
148 jfenwick 5525 std::cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
149     << d1 << ", d2=" << d2 << "). This may not be optimal!" << std::endl;
150 caltinay 3943 }
151 caltinay 3691
152 caltinay 4334 double l0 = x1-x0;
153     double l1 = y1-y0;
154     double l2 = z1-z0;
155 caltinay 6479 m_dx[0] = l0/n0;
156     m_dx[1] = l1/n1;
157     m_dx[2] = l2/n2;
158 caltinay 4334
159 caltinay 6397 warn = false;
160 caltinay 4334 if ((n0+1)%d0 > 0) {
161 caltinay 6397 switch (getDecompositionPolicy()) {
162     case DECOMP_EXPAND:
163     l0 = m_dx[0]*n0; // fall through
164     case DECOMP_ADD_ELEMENTS:
165     n0 = (dim_t)round((float)(n0+1)/d0+0.5)*d0-1; // fall through
166     case DECOMP_STRICT:
167     warn = true;
168     break;
169     }
170 caltinay 6477 // reset spacing
171     m_dx[0] = l0/n0;
172 caltinay 3943 }
173 caltinay 4334 if ((n1+1)%d1 > 0) {
174 caltinay 6397 switch (getDecompositionPolicy()) {
175     case DECOMP_EXPAND:
176     l1 = m_dx[1]*n1; // fall through
177     case DECOMP_ADD_ELEMENTS:
178     n1 = (dim_t)round((float)(n1+1)/d1+0.5)*d1-1; // fall through
179     case DECOMP_STRICT:
180     warn = true;
181     break;
182     }
183 caltinay 6477 // reset spacing
184     m_dx[1] = l1/n1;
185 caltinay 3943 }
186 caltinay 4334 if ((n2+1)%d2 > 0) {
187 caltinay 6397 switch (getDecompositionPolicy()) {
188     case DECOMP_EXPAND:
189     l2 = m_dx[2]*n2; // fall through
190     case DECOMP_ADD_ELEMENTS:
191     n2 = (dim_t)round((float)(n2+1)/d2+0.5)*d2-1; // fall through
192     case DECOMP_STRICT:
193     warn = true;
194     break;
195     }
196 caltinay 6477 // reset spacing
197     m_dx[2] = l2/n2;
198 caltinay 3943 }
199    
200 caltinay 4334 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2) || (d2 > 1 && (n2+1)/d2<2))
201 caltinay 5986 throw ValueError("Too few elements for the number of ranks");
202 caltinay 3753
203 caltinay 6397 if (warn) {
204     if (getDecompositionPolicy() == DECOMP_STRICT) {
205     throw ValueError("Unable to decompose domain to the number of "
206     "MPI ranks without adding elements and the policy "
207     "is set to STRICT. Use setDecompositionPolicy() "
208     "to allow adding elements.");
209     } else {
210     std::cout << "Warning: Domain setup has been adjusted as follows "
211     "to allow decomposition into " << m_mpiInfo->size
212     << " MPI ranks:" << std::endl
213     << " N0=" << n0 << ", l0=" << l0 << std::endl
214     << " N1=" << n1 << ", l1=" << l1 << std::endl
215     << " N2=" << n2 << ", l2=" << l1 << std::endl;
216     }
217     }
218 caltinay 4334 m_gNE[0] = n0;
219     m_gNE[1] = n1;
220     m_gNE[2] = n2;
221     m_origin[0] = x0;
222     m_origin[1] = y0;
223     m_origin[2] = z0;
224     m_length[0] = l0;
225     m_length[1] = l1;
226     m_length[2] = l2;
227     m_NX[0] = d0;
228     m_NX[1] = d1;
229     m_NX[2] = d2;
230    
231 caltinay 3753 // local number of elements (including overlap)
232 caltinay 4334 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
233     if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
234     m_NE[0]++;
235     else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
236     m_ownNE[0]--;
237 caltinay 3764
238 caltinay 4334 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
239     if (m_mpiInfo->rank%(d0*d1)/d0>0 && m_mpiInfo->rank%(d0*d1)/d0<d1-1)
240     m_NE[1]++;
241     else if (d1>1 && m_mpiInfo->rank%(d0*d1)/d0==d1-1)
242     m_ownNE[1]--;
243 caltinay 3764
244 caltinay 4334 m_NE[2] = m_ownNE[2] = (d2>1 ? (n2+1)/d2 : n2);
245     if (m_mpiInfo->rank/(d0*d1)>0 && m_mpiInfo->rank/(d0*d1)<d2-1)
246     m_NE[2]++;
247     else if (d2>1 && m_mpiInfo->rank/(d0*d1)==d2-1)
248     m_ownNE[2]--;
249 caltinay 3753
250     // local number of nodes
251 caltinay 4334 m_NN[0] = m_NE[0]+1;
252     m_NN[1] = m_NE[1]+1;
253     m_NN[2] = m_NE[2]+1;
254 caltinay 3753
255 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
256 caltinay 4334 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
257     if (m_offset[0] > 0)
258     m_offset[0]--;
259     m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank%(d0*d1)/d0);
260     if (m_offset[1] > 0)
261     m_offset[1]--;
262     m_offset[2] = (n2+1)/d2*(m_mpiInfo->rank/(d0*d1));
263     if (m_offset[2] > 0)
264     m_offset[2]--;
265 caltinay 3753
266 caltinay 3691 populateSampleIds();
267 caltinay 5119
268 caltinay 5148 for (TagMap::const_iterator i = tagnamestonums.begin();
269 sshaw 4629 i != tagnamestonums.end(); i++) {
270     setTagMap(i->first, i->second);
271     }
272 caltinay 5097 addPoints(points, tags);
273 caltinay 3691 }
274    
275     Brick::~Brick()
276     {
277     }
278    
279     string Brick::getDescription() const
280     {
281     return "ripley::Brick";
282     }
283    
284 sshaw 5736 bool Brick::operator==(const escript::AbstractDomain& other) const
285 caltinay 3691 {
286 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
287     if (o) {
288     return (RipleyDomain::operator==(other) &&
289 caltinay 4334 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] && m_gNE[2]==o->m_gNE[2]
290     && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] && m_origin[2]==o->m_origin[2]
291     && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] && m_length[2]==o->m_length[2]
292     && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1] && m_NX[2]==o->m_NX[2]);
293 caltinay 3744 }
294 caltinay 3691
295     return false;
296     }
297    
298 jfenwick 6505 #ifdef NETCDF4
299    
300 caltinay 4013 void Brick::readNcGrid(escript::Data& out, string filename, string varname,
301 caltinay 4618 const ReaderParameters& params) const
302 caltinay 4013 {
303 caltinay 6144 #ifdef ESYS_HAVE_NETCDF
304 jfenwick 6505 using namespace netCDF;
305 caltinay 4013 // check destination function space
306 caltinay 5120 dim_t myN0, myN1, myN2;
307 caltinay 4013 if (out.getFunctionSpace().getTypeCode() == Nodes) {
308 caltinay 4334 myN0 = m_NN[0];
309     myN1 = m_NN[1];
310     myN2 = m_NN[2];
311 caltinay 4013 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
312     out.getFunctionSpace().getTypeCode() == ReducedElements) {
313 caltinay 4334 myN0 = m_NE[0];
314     myN1 = m_NE[1];
315     myN2 = m_NE[2];
316 caltinay 4013 } else
317 caltinay 5986 throw ValueError("readNcGrid(): invalid function space for output data object");
318 caltinay 4013
319 caltinay 4615 if (params.first.size() != 3)
320 caltinay 5986 throw ValueError("readNcGrid(): argument 'first' must have 3 entries");
321 caltinay 4013
322 caltinay 4615 if (params.numValues.size() != 3)
323 caltinay 5986 throw ValueError("readNcGrid(): argument 'numValues' must have 3 entries");
324 caltinay 4013
325 caltinay 4615 if (params.multiplier.size() != 3)
326 caltinay 5986 throw ValueError("readNcGrid(): argument 'multiplier' must have 3 entries");
327 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
328     if (params.multiplier[i]<1)
329 caltinay 5986 throw ValueError("readNcGrid(): all multipliers must be positive");
330 caltinay 4277
331 caltinay 4013 // check file existence and size
332 jfenwick 6505
333     NcFile f;
334 jfenwick 6618 if (!escript::openNcFile(f, filename))
335 jfenwick 6505 {
336     throw RipleyException("readNcGrid(): cannot open file");
337     }
338     NcVar var = f.getVar(varname.c_str());
339     if (var.isNull())
340     throw RipleyException("readNcGrid(): invalid variable name");
341    
342     // TODO: rank>0 data support
343     const int numComp = out.getDataPointSize();
344     if (numComp > 1)
345     throw RipleyException("readNcGrid(): only scalar data supported");
346    
347     const int dims = var.getDimCount();
348     vector<long> edges(dims);
349     std::vector< NcDim > vard=var.getDims();
350     for (size_t i=0;i<vard.size();++i)
351     {
352     edges[i]=vard[i].getSize();
353     }
354    
355     // is this a slice of the data object (dims!=3)?
356     // note the expected ordering of edges (as in numpy: z,y,x)
357     if ( (dims==3 && (params.numValues[2] > edges[0] ||
358     params.numValues[1] > edges[1] ||
359     params.numValues[0] > edges[2]))
360     || (dims==2 && params.numValues[2]>1)
361     || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
362     throw RipleyException("readNcGrid(): not enough data in file");
363     }
364    
365     // check if this rank contributes anything
366     if (params.first[0] >= m_offset[0]+myN0 ||
367     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
368     params.first[1] >= m_offset[1]+myN1 ||
369     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
370     params.first[2] >= m_offset[2]+myN2 ||
371     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
372     return;
373     }
374    
375     // now determine how much this rank has to write
376    
377     // first coordinates in data object to write to
378     const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
379     const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
380     const dim_t first2 = max(dim_t(0), params.first[2]-m_offset[2]);
381     // indices to first value in file (not accounting for reverse yet)
382     dim_t idx0 = max(dim_t(0), m_offset[0]-params.first[0]);
383     dim_t idx1 = max(dim_t(0), m_offset[1]-params.first[1]);
384     dim_t idx2 = max(dim_t(0), m_offset[2]-params.first[2]);
385     // number of values to read
386     const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
387     const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
388     const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
389    
390     // make sure we read the right block if going backwards through file
391     if (params.reverse[0])
392     idx0 = edges[dims-1]-num0-idx0;
393     if (dims>1 && params.reverse[1])
394     idx1 = edges[dims-2]-num1-idx1;
395     if (dims>2 && params.reverse[2])
396     idx2 = edges[dims-3]-num2-idx2;
397    
398    
399     vector<double> values(num0*num1*num2);
400     vector<size_t> startindex;
401     vector<size_t> counts;
402     if (dims==3) {
403     //var->set_cur(idx2, idx1, idx0); // from old API
404     startindex.push_back(idx2);
405     startindex.push_back(idx1);
406     startindex.push_back(idx0);
407     counts.push_back(num2);
408     counts.push_back(num1);
409     counts.push_back(num0);
410     var.getVar(startindex, counts, &values[0]);
411     } else if (dims==2) {
412     // var->set_cur(idx1, idx0); // from old API
413     startindex.push_back(idx1);
414     startindex.push_back(idx0);
415     counts.push_back(num1);
416     counts.push_back(num0);
417     var.getVar(startindex, counts, &values[0]);
418     } else {
419     //var->set_cur(idx0);
420     //var->get(&values[0], num0);
421     startindex.push_back(idx0);
422     counts.push_back(num0);
423     var.getVar(startindex, counts, &values[0]);
424     }
425    
426     const int dpp = out.getNumDataPointsPerSample();
427     out.requireWrite();
428    
429     // helpers for reversing
430     const dim_t x0 = (params.reverse[0] ? num0-1 : 0);
431     const int x_mult = (params.reverse[0] ? -1 : 1);
432     const dim_t y0 = (params.reverse[1] ? num1-1 : 0);
433     const int y_mult = (params.reverse[1] ? -1 : 1);
434     const dim_t z0 = (params.reverse[2] ? num2-1 : 0);
435     const int z_mult = (params.reverse[2] ? -1 : 1);
436    
437     for (index_t z=0; z<num2; z++) {
438     for (index_t y=0; y<num1; y++) {
439     #pragma omp parallel for
440     for (index_t x=0; x<num0; x++) {
441     const dim_t baseIndex = first0+x*params.multiplier[0]
442     +(first1+y*params.multiplier[1])*myN0
443     +(first2+z*params.multiplier[2])*myN0*myN1;
444     const dim_t srcIndex=(z0+z_mult*z)*num1*num0
445     +(y0+y_mult*y)*num0
446     +(x0+x_mult*x);
447     if (!bm::isnan(values[srcIndex])) {
448     for (index_t m2=0; m2<params.multiplier[2]; m2++) {
449     for (index_t m1=0; m1<params.multiplier[1]; m1++) {
450     for (index_t m0=0; m0<params.multiplier[0]; m0++) {
451     const dim_t dataIndex = baseIndex+m0
452     +m1*myN0
453     +m2*myN0*myN1;
454     double* dest = out.getSampleDataRW(dataIndex);
455     for (index_t q=0; q<dpp; q++) {
456     *dest++ = values[srcIndex];
457     }
458     }
459     }
460     }
461     }
462     }
463     }
464     }
465     #else
466     throw RipleyException("readNcGrid(): not compiled with netCDF support");
467     #endif
468     }
469    
470    
471     #else
472    
473     void Brick::readNcGrid(escript::Data& out, string filename, string varname,
474     const ReaderParameters& params) const
475     {
476     #ifdef ESYS_HAVE_NETCDF
477     // check destination function space
478     dim_t myN0, myN1, myN2;
479     if (out.getFunctionSpace().getTypeCode() == Nodes) {
480     myN0 = m_NN[0];
481     myN1 = m_NN[1];
482     myN2 = m_NN[2];
483     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
484     out.getFunctionSpace().getTypeCode() == ReducedElements) {
485     myN0 = m_NE[0];
486     myN1 = m_NE[1];
487     myN2 = m_NE[2];
488     } else
489     throw ValueError("readNcGrid(): invalid function space for output data object");
490    
491     if (params.first.size() != 3)
492     throw ValueError("readNcGrid(): argument 'first' must have 3 entries");
493    
494     if (params.numValues.size() != 3)
495     throw ValueError("readNcGrid(): argument 'numValues' must have 3 entries");
496    
497     if (params.multiplier.size() != 3)
498     throw ValueError("readNcGrid(): argument 'multiplier' must have 3 entries");
499     for (size_t i=0; i<params.multiplier.size(); i++)
500     if (params.multiplier[i]<1)
501     throw ValueError("readNcGrid(): all multipliers must be positive");
502    
503     // check file existence and size
504 caltinay 4013 NcFile f(filename.c_str(), NcFile::ReadOnly);
505     if (!f.is_valid())
506     throw RipleyException("readNcGrid(): cannot open file");
507    
508     NcVar* var = f.get_var(varname.c_str());
509     if (!var)
510     throw RipleyException("readNcGrid(): invalid variable name");
511    
512     // TODO: rank>0 data support
513     const int numComp = out.getDataPointSize();
514     if (numComp > 1)
515     throw RipleyException("readNcGrid(): only scalar data supported");
516    
517     const int dims = var->num_dims();
518 caltinay 4477 boost::scoped_array<long> edges(var->edges());
519 caltinay 4013
520     // is this a slice of the data object (dims!=3)?
521     // note the expected ordering of edges (as in numpy: z,y,x)
522 caltinay 4615 if ( (dims==3 && (params.numValues[2] > edges[0] ||
523     params.numValues[1] > edges[1] ||
524     params.numValues[0] > edges[2]))
525     || (dims==2 && params.numValues[2]>1)
526     || (dims==1 && (params.numValues[2]>1 || params.numValues[1]>1)) ) {
527 caltinay 4013 throw RipleyException("readNcGrid(): not enough data in file");
528     }
529    
530     // check if this rank contributes anything
531 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
532     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
533     params.first[1] >= m_offset[1]+myN1 ||
534     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
535     params.first[2] >= m_offset[2]+myN2 ||
536     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
537 caltinay 4013 return;
538     }
539    
540     // now determine how much this rank has to write
541    
542     // first coordinates in data object to write to
543 caltinay 5187 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
544     const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
545     const dim_t first2 = max(dim_t(0), params.first[2]-m_offset[2]);
546 caltinay 4618 // indices to first value in file (not accounting for reverse yet)
547 caltinay 5187 dim_t idx0 = max(dim_t(0), m_offset[0]-params.first[0]);
548     dim_t idx1 = max(dim_t(0), m_offset[1]-params.first[1]);
549     dim_t idx2 = max(dim_t(0), m_offset[2]-params.first[2]);
550 caltinay 4277 // number of values to read
551 caltinay 5120 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
552     const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
553     const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
554 caltinay 4013
555 caltinay 4618 // make sure we read the right block if going backwards through file
556     if (params.reverse[0])
557     idx0 = edges[dims-1]-num0-idx0;
558     if (dims>1 && params.reverse[1])
559     idx1 = edges[dims-2]-num1-idx1;
560     if (dims>2 && params.reverse[2])
561     idx2 = edges[dims-3]-num2-idx2;
562    
563    
564 caltinay 4013 vector<double> values(num0*num1*num2);
565     if (dims==3) {
566     var->set_cur(idx2, idx1, idx0);
567     var->get(&values[0], num2, num1, num0);
568     } else if (dims==2) {
569     var->set_cur(idx1, idx0);
570     var->get(&values[0], num1, num0);
571     } else {
572     var->set_cur(idx0);
573     var->get(&values[0], num0);
574     }
575    
576     const int dpp = out.getNumDataPointsPerSample();
577     out.requireWrite();
578    
579 caltinay 4618 // helpers for reversing
580 caltinay 5120 const dim_t x0 = (params.reverse[0] ? num0-1 : 0);
581 caltinay 4618 const int x_mult = (params.reverse[0] ? -1 : 1);
582 caltinay 5120 const dim_t y0 = (params.reverse[1] ? num1-1 : 0);
583 caltinay 4618 const int y_mult = (params.reverse[1] ? -1 : 1);
584 caltinay 5120 const dim_t z0 = (params.reverse[2] ? num2-1 : 0);
585 caltinay 4618 const int z_mult = (params.reverse[2] ? -1 : 1);
586    
587 caltinay 4013 for (index_t z=0; z<num2; z++) {
588     for (index_t y=0; y<num1; y++) {
589     #pragma omp parallel for
590     for (index_t x=0; x<num0; x++) {
591 caltinay 5120 const dim_t baseIndex = first0+x*params.multiplier[0]
592 caltinay 4615 +(first1+y*params.multiplier[1])*myN0
593     +(first2+z*params.multiplier[2])*myN0*myN1;
594 caltinay 5120 const dim_t srcIndex=(z0+z_mult*z)*num1*num0
595 caltinay 4618 +(y0+y_mult*y)*num0
596     +(x0+x_mult*x);
597 jfenwick 5535 if (!bm::isnan(values[srcIndex])) {
598 caltinay 4615 for (index_t m2=0; m2<params.multiplier[2]; m2++) {
599     for (index_t m1=0; m1<params.multiplier[1]; m1++) {
600     for (index_t m0=0; m0<params.multiplier[0]; m0++) {
601 caltinay 5120 const dim_t dataIndex = baseIndex+m0
602 caltinay 4277 +m1*myN0
603     +m2*myN0*myN1;
604     double* dest = out.getSampleDataRW(dataIndex);
605     for (index_t q=0; q<dpp; q++) {
606     *dest++ = values[srcIndex];
607     }
608     }
609     }
610 caltinay 4174 }
611 caltinay 4013 }
612     }
613     }
614     }
615     #else
616     throw RipleyException("readNcGrid(): not compiled with netCDF support");
617     #endif
618     }
619    
620 jfenwick 6505 #endif
621    
622 sshaw 4738 void Brick::readBinaryGridFromZipped(escript::Data& out, string filename,
623     const ReaderParameters& params) const
624     {
625 caltinay 6141 #ifdef ESYS_HAVE_BOOST_IO
626 sshaw 4738 // the mapping is not universally correct but should work on our
627     // supported platforms
628     switch (params.dataType) {
629     case DATATYPE_INT32:
630     readBinaryGridZippedImpl<int>(out, filename, params);
631     break;
632     case DATATYPE_FLOAT32:
633     readBinaryGridZippedImpl<float>(out, filename, params);
634     break;
635     case DATATYPE_FLOAT64:
636     readBinaryGridZippedImpl<double>(out, filename, params);
637     break;
638     default:
639 caltinay 6141 throw ValueError("readBinaryGridZipped(): invalid or unsupported datatype");
640 sshaw 4738 }
641 caltinay 6141 #else
642     throw RipleyException("readBinaryGridZipped(): not compiled with zip support");
643     #endif
644 sshaw 4738 }
645    
646 caltinay 4334 void Brick::readBinaryGrid(escript::Data& out, string filename,
647 caltinay 4618 const ReaderParameters& params) const
648 caltinay 4334 {
649 caltinay 4495 // the mapping is not universally correct but should work on our
650     // supported platforms
651 caltinay 4615 switch (params.dataType) {
652 caltinay 4495 case DATATYPE_INT32:
653 caltinay 4615 readBinaryGridImpl<int>(out, filename, params);
654 caltinay 4495 break;
655     case DATATYPE_FLOAT32:
656 caltinay 4615 readBinaryGridImpl<float>(out, filename, params);
657 caltinay 4495 break;
658     case DATATYPE_FLOAT64:
659 caltinay 4615 readBinaryGridImpl<double>(out, filename, params);
660 caltinay 4495 break;
661     default:
662 caltinay 5986 throw ValueError("readBinaryGrid(): invalid or unsupported datatype");
663 caltinay 4495 }
664     }
665    
666     template<typename ValueType>
667     void Brick::readBinaryGridImpl(escript::Data& out, const string& filename,
668 caltinay 4618 const ReaderParameters& params) const
669 caltinay 4495 {
670 caltinay 4334 // check destination function space
671 caltinay 5120 dim_t myN0, myN1, myN2;
672 caltinay 4334 if (out.getFunctionSpace().getTypeCode() == Nodes) {
673     myN0 = m_NN[0];
674     myN1 = m_NN[1];
675     myN2 = m_NN[2];
676     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
677     out.getFunctionSpace().getTypeCode() == ReducedElements) {
678     myN0 = m_NE[0];
679     myN1 = m_NE[1];
680     myN2 = m_NE[2];
681     } else
682 caltinay 5986 throw ValueError("readBinaryGrid(): invalid function space for output data object");
683 caltinay 4334
684 caltinay 4615 if (params.first.size() != 3)
685 caltinay 5986 throw ValueError("readBinaryGrid(): argument 'first' must have 3 entries");
686 caltinay 4334
687 caltinay 4615 if (params.numValues.size() != 3)
688 caltinay 5986 throw ValueError("readBinaryGrid(): argument 'numValues' must have 3 entries");
689 caltinay 4334
690 caltinay 4615 if (params.multiplier.size() != 3)
691 caltinay 5986 throw ValueError("readBinaryGrid(): argument 'multiplier' must have 3 entries");
692 caltinay 4615 for (size_t i=0; i<params.multiplier.size(); i++)
693     if (params.multiplier[i]<1)
694 caltinay 5986 throw ValueError("readBinaryGrid(): all multipliers must be positive");
695 caltinay 4978 if (params.reverse[0] != 0 || params.reverse[1] != 0)
696     throw RipleyException("readBinaryGrid(): reversing only supported in Z-direction currently");
697 caltinay 4334
698     // check file existence and size
699 jfenwick 5525 std::ifstream f(filename.c_str(), std::ifstream::binary);
700 caltinay 4334 if (f.fail()) {
701 caltinay 6161 throw RipleyException("readBinaryGrid(): cannot open file " + filename);
702 caltinay 4334 }
703 sshaw 5529 f.seekg(0, std::ios::end);
704 caltinay 4334 const int numComp = out.getDataPointSize();
705 caltinay 5120 const dim_t filesize = f.tellg();
706     const dim_t reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
707 caltinay 4334 if (filesize < reqsize) {
708     f.close();
709     throw RipleyException("readBinaryGrid(): not enough data in file");
710     }
711    
712     // check if this rank contributes anything
713 caltinay 4615 if (params.first[0] >= m_offset[0]+myN0 ||
714     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
715     params.first[1] >= m_offset[1]+myN1 ||
716     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
717     params.first[2] >= m_offset[2]+myN2 ||
718     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
719 caltinay 4334 f.close();
720     return;
721     }
722    
723     // now determine how much this rank has to write
724    
725     // first coordinates in data object to write to
726 caltinay 5187 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
727     const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
728     const dim_t first2 = max(dim_t(0), params.first[2]-m_offset[2]);
729 caltinay 4978 // indices to first value in file (not accounting for reverse yet)
730 caltinay 5187 dim_t idx0 = max(dim_t(0), (m_offset[0]/params.multiplier[0])-params.first[0]);
731     dim_t idx1 = max(dim_t(0), (m_offset[1]/params.multiplier[1])-params.first[1]);
732     dim_t idx2 = max(dim_t(0), (m_offset[2]/params.multiplier[2])-params.first[2]);
733 caltinay 5098 // if restX > 0 the first value in the respective dimension has been
734     // written restX times already in a previous rank so this rank only
735     // contributes (multiplier-rank) copies of that value
736 caltinay 5120 const dim_t rest0 = m_offset[0]%params.multiplier[0];
737     const dim_t rest1 = m_offset[1]%params.multiplier[1];
738     const dim_t rest2 = m_offset[2]%params.multiplier[2];
739 caltinay 5098
740 caltinay 4334 // number of values to read
741 caltinay 5120 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
742     const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
743     const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
744 caltinay 4334
745 caltinay 4978 // make sure we read the right block if going backwards through file
746     if (params.reverse[2])
747     idx2 = params.numValues[2]-idx2-1;
748    
749     // helpers for reversing
750     const int z_mult = (params.reverse[2] ? -1 : 1);
751    
752 caltinay 4334 out.requireWrite();
753 caltinay 4495 vector<ValueType> values(num0*numComp);
754 caltinay 4334 const int dpp = out.getNumDataPointsPerSample();
755    
756 caltinay 5120 for (dim_t z=0; z<num2; z++) {
757     const dim_t m2limit = (z==0 ? params.multiplier[2]-rest2 : params.multiplier[2]);
758     dim_t dataZbase = first2 + z*params.multiplier[2];
759 caltinay 5098 if (z>0)
760     dataZbase -= rest2;
761    
762 caltinay 5120 for (dim_t y=0; y<num1; y++) {
763     const dim_t fileofs = numComp*(idx0 +
764 caltinay 4978 (idx1+y)*params.numValues[0] +
765     (idx2+z_mult*z)*params.numValues[0]*params.numValues[1]);
766 caltinay 4495 f.seekg(fileofs*sizeof(ValueType));
767     f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
768 caltinay 5120 const dim_t m1limit = (y==0 ? params.multiplier[1]-rest1 : params.multiplier[1]);
769     dim_t dataYbase = first1 + y*params.multiplier[1];
770 caltinay 5098 if (y>0)
771     dataYbase -= rest1;
772 caltinay 4334
773 caltinay 5120 for (dim_t x=0; x<num0; x++) {
774     const dim_t m0limit = (x==0 ? params.multiplier[0]-rest0 : params.multiplier[0]);
775     dim_t dataXbase = first0 + x*params.multiplier[0];
776 caltinay 5098 if (x>0)
777     dataXbase -= rest0;
778     // write a block of mult0 x mult1 x mult2 identical values into
779     // Data object
780 caltinay 5120 for (dim_t m2=0; m2 < m2limit; m2++) {
781     const dim_t dataZ = dataZbase + m2;
782 caltinay 5098 if (dataZ >= myN2)
783     break;
784 caltinay 5120 for (dim_t m1=0; m1 < m1limit; m1++) {
785     const dim_t dataY = dataYbase + m1;
786 caltinay 5098 if (dataY >= myN1)
787     break;
788 caltinay 5120 for (dim_t m0=0; m0 < m0limit; m0++) {
789     const dim_t dataX = dataXbase + m0;
790 caltinay 5098 if (dataX >= myN0)
791     break;
792 caltinay 5120 const dim_t dataIndex = dataX + dataY*myN0 + dataZ*myN0*myN1;
793 caltinay 4334 double* dest = out.getSampleDataRW(dataIndex);
794 caltinay 4529 for (int c=0; c<numComp; c++) {
795     ValueType val = values[x*numComp+c];
796    
797 caltinay 4615 if (params.byteOrder != BYTEORDER_NATIVE) {
798 caltinay 4529 char* cval = reinterpret_cast<char*>(&val);
799     // this will alter val!!
800 caltinay 5051 if (sizeof(ValueType)>4) {
801     byte_swap64(cval);
802     } else {
803     byte_swap32(cval);
804     }
805 caltinay 4529 }
806 jfenwick 5535 if (!bm::isnan(val)) {
807 caltinay 4529 for (int q=0; q<dpp; q++) {
808     *dest++ = static_cast<double>(val);
809 caltinay 4334 }
810     }
811     }
812     }
813     }
814     }
815     }
816     }
817     }
818    
819     f.close();
820     }
821    
822 caltinay 6141 #ifdef ESYS_HAVE_BOOST_IO
823 sshaw 4738 template<typename ValueType>
824     void Brick::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
825     const ReaderParameters& params) const
826     {
827     // check destination function space
828 caltinay 5120 dim_t myN0, myN1, myN2;
829 sshaw 4738 if (out.getFunctionSpace().getTypeCode() == Nodes) {
830     myN0 = m_NN[0];
831     myN1 = m_NN[1];
832     myN2 = m_NN[2];
833     } else if (out.getFunctionSpace().getTypeCode() == Elements ||
834     out.getFunctionSpace().getTypeCode() == ReducedElements) {
835     myN0 = m_NE[0];
836     myN1 = m_NE[1];
837     myN2 = m_NE[2];
838     } else
839 caltinay 5986 throw ValueError("readBinaryGridFromZipped(): invalid function space for output data object");
840 sshaw 4738
841     if (params.first.size() != 3)
842 caltinay 5986 throw ValueError("readBinaryGridFromZipped(): argument 'first' must have 3 entries");
843 sshaw 4738
844     if (params.numValues.size() != 3)
845 caltinay 5986 throw ValueError("readBinaryGridFromZipped(): argument 'numValues' must have 3 entries");
846 sshaw 4738
847     if (params.multiplier.size() != 3)
848 caltinay 5986 throw ValueError("readBinaryGridFromZipped(): argument 'multiplier' must have 3 entries");
849 sshaw 4738 for (size_t i=0; i<params.multiplier.size(); i++)
850     if (params.multiplier[i]<1)
851 caltinay 5986 throw ValueError("readBinaryGridFromZipped(): all multipliers must be positive");
852 sshaw 4738
853     // check file existence and size
854 jfenwick 5525 std::ifstream f(filename.c_str(), std::ifstream::binary);
855 sshaw 4738 if (f.fail()) {
856 caltinay 6161 throw RipleyException("readBinaryGridFromZipped(): cannot open file " + filename);
857 sshaw 4738 }
858 sshaw 5529 f.seekg(0, std::ios::end);
859 sshaw 4738 const int numComp = out.getDataPointSize();
860 caltinay 5120 dim_t filesize = f.tellg();
861 sshaw 4738 f.seekg(0, ios::beg);
862 caltinay 5115 vector<char> compressed(filesize);
863 sshaw 4738 f.read((char*)&compressed[0], filesize);
864     f.close();
865 caltinay 5115 vector<char> decompressed = unzip(compressed);
866 sshaw 4738 filesize = decompressed.size();
867 caltinay 5120 const dim_t reqsize = params.numValues[0]*params.numValues[1]*params.numValues[2]*numComp*sizeof(ValueType);
868 sshaw 4738 if (filesize < reqsize) {
869     throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
870     }
871    
872     // check if this rank contributes anything
873     if (params.first[0] >= m_offset[0]+myN0 ||
874     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
875     params.first[1] >= m_offset[1]+myN1 ||
876     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1] ||
877     params.first[2] >= m_offset[2]+myN2 ||
878     params.first[2]+params.numValues[2]*params.multiplier[2] <= m_offset[2]) {
879     return;
880     }
881    
882     // now determine how much this rank has to write
883    
884     // first coordinates in data object to write to
885 caltinay 5187 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
886     const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
887     const dim_t first2 = max(dim_t(0), params.first[2]-m_offset[2]);
888 sshaw 4738 // indices to first value in file
889 caltinay 5187 const dim_t idx0 = max(dim_t(0), m_offset[0]-params.first[0]);
890     const dim_t idx1 = max(dim_t(0), m_offset[1]-params.first[1]);
891     const dim_t idx2 = max(dim_t(0), m_offset[2]-params.first[2]);
892 sshaw 4738 // number of values to read
893 caltinay 5120 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
894     const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
895     const dim_t num2 = min(params.numValues[2]-idx2, myN2-first2);
896 sshaw 4738
897     out.requireWrite();
898     vector<ValueType> values(num0*numComp);
899     const int dpp = out.getNumDataPointsPerSample();
900    
901 caltinay 5120 for (dim_t z=0; z<num2; z++) {
902     for (dim_t y=0; y<num1; y++) {
903     const dim_t fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]
904 sshaw 4738 +(idx2+z)*params.numValues[0]*params.numValues[1]);
905     memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
906 caltinay 5119
907 caltinay 5120 for (dim_t x=0; x<num0; x++) {
908     const dim_t baseIndex = first0+x*params.multiplier[0]
909 sshaw 4738 +(first1+y*params.multiplier[1])*myN0
910     +(first2+z*params.multiplier[2])*myN0*myN1;
911 caltinay 5120 for (dim_t m2=0; m2<params.multiplier[2]; m2++) {
912     for (dim_t m1=0; m1<params.multiplier[1]; m1++) {
913     for (dim_t m0=0; m0<params.multiplier[0]; m0++) {
914     const dim_t dataIndex = baseIndex+m0
915 sshaw 4738 +m1*myN0
916     +m2*myN0*myN1;
917     double* dest = out.getSampleDataRW(dataIndex);
918     for (int c=0; c<numComp; c++) {
919     ValueType val = values[x*numComp+c];
920    
921     if (params.byteOrder != BYTEORDER_NATIVE) {
922     char* cval = reinterpret_cast<char*>(&val);
923     // this will alter val!!
924     byte_swap32(cval);
925     }
926 jfenwick 5535 if (!bm::isnan(val)) {
927 sshaw 4738 for (int q=0; q<dpp; q++) {
928     *dest++ = static_cast<double>(val);
929     }
930     }
931     }
932     }
933     }
934     }
935     }
936     }
937     }
938     }
939     #endif
940    
941 caltinay 4357 void Brick::writeBinaryGrid(const escript::Data& in, string filename,
942     int byteOrder, int dataType) const
943 caltinay 4334 {
944 caltinay 4357 // the mapping is not universally correct but should work on our
945     // supported platforms
946     switch (dataType) {
947     case DATATYPE_INT32:
948     writeBinaryGridImpl<int>(in, filename, byteOrder);
949     break;
950     case DATATYPE_FLOAT32:
951     writeBinaryGridImpl<float>(in, filename, byteOrder);
952     break;
953     case DATATYPE_FLOAT64:
954     writeBinaryGridImpl<double>(in, filename, byteOrder);
955     break;
956     default:
957 caltinay 5986 throw ValueError("writeBinaryGrid(): invalid or unsupported datatype");
958 caltinay 4357 }
959     }
960    
961     template<typename ValueType>
962     void Brick::writeBinaryGridImpl(const escript::Data& in,
963     const string& filename, int byteOrder) const
964     {
965 caltinay 4334 // check function space and determine number of points
966 caltinay 5120 dim_t myN0, myN1, myN2;
967     dim_t totalN0, totalN1, totalN2;
968 caltinay 5172 dim_t offset0, offset1, offset2;
969 caltinay 4334 if (in.getFunctionSpace().getTypeCode() == Nodes) {
970     myN0 = m_NN[0];
971     myN1 = m_NN[1];
972     myN2 = m_NN[2];
973     totalN0 = m_gNE[0]+1;
974     totalN1 = m_gNE[1]+1;
975     totalN2 = m_gNE[2]+1;
976 caltinay 5172 offset0 = m_offset[0];
977     offset1 = m_offset[1];
978     offset2 = m_offset[2];
979 caltinay 5205 } else if (in.getFunctionSpace().getTypeCode() == DegreesOfFreedom ||
980     in.getFunctionSpace().getTypeCode() == ReducedDegreesOfFreedom) {
981 caltinay 5172 myN0 = (m_gNE[0]+1)/m_NX[0];
982     myN1 = (m_gNE[1]+1)/m_NX[1];
983     myN2 = (m_gNE[2]+1)/m_NX[2];
984     totalN0 = m_gNE[0]+1;
985     totalN1 = m_gNE[1]+1;
986     totalN2 = m_gNE[2]+1;
987     offset0 = (m_offset[0]>0 ? m_offset[0]+1 : 0);
988     offset1 = (m_offset[1]>0 ? m_offset[1]+1 : 0);
989     offset2 = (m_offset[2]>0 ? m_offset[2]+1 : 0);
990 caltinay 4334 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
991     in.getFunctionSpace().getTypeCode() == ReducedElements) {
992     myN0 = m_NE[0];
993     myN1 = m_NE[1];
994     myN2 = m_NE[2];
995     totalN0 = m_gNE[0];
996     totalN1 = m_gNE[1];
997     totalN2 = m_gNE[2];
998 caltinay 5172 offset0 = m_offset[0];
999     offset1 = m_offset[1];
1000     offset2 = m_offset[2];
1001 caltinay 4334 } else
1002 caltinay 5172 throw RipleyException("writeBinaryGrid(): unsupported function space");
1003 caltinay 4334
1004     const int numComp = in.getDataPointSize();
1005     const int dpp = in.getNumDataPointsPerSample();
1006 caltinay 5120 const dim_t fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1*totalN2;
1007 caltinay 4334
1008     if (numComp > 1 || dpp > 1)
1009     throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
1010    
1011     // from here on we know that each sample consists of one value
1012 caltinay 5804 FileWriter fw(m_mpiInfo->comm);
1013     fw.openFile(filename, fileSize, true);
1014 caltinay 4334 MPIBarrier();
1015    
1016     for (index_t z=0; z<myN2; z++) {
1017     for (index_t y=0; y<myN1; y++) {
1018 caltinay 5172 const dim_t fileofs = (offset0+(offset1+y)*totalN0
1019     +(offset2+z)*totalN0*totalN1)*sizeof(ValueType);
1020 jfenwick 5525 std::ostringstream oss;
1021 caltinay 4334
1022     for (index_t x=0; x<myN0; x++) {
1023 caltinay 4626 const double* sample = in.getSampleDataRO(z*myN0*myN1+y*myN0+x);
1024 caltinay 4357 ValueType fvalue = static_cast<ValueType>(*sample);
1025     if (byteOrder == BYTEORDER_NATIVE) {
1026 caltinay 4334 oss.write((char*)&fvalue, sizeof(fvalue));
1027     } else {
1028     char* value = reinterpret_cast<char*>(&fvalue);
1029 caltinay 5015 if (sizeof(fvalue)>4) {
1030     byte_swap64(value);
1031     } else {
1032     byte_swap32(value);
1033     }
1034     oss.write(value, sizeof(fvalue));
1035 caltinay 4334 }
1036     }
1037 caltinay 4482 fw.writeAt(oss, fileofs);
1038 caltinay 4334 }
1039     }
1040 caltinay 4482 fw.close();
1041 caltinay 4334 }
1042    
1043 caltinay 5121 void Brick::write(const std::string& filename) const
1044     {
1045     throw RipleyException("write: not supported");
1046     }
1047    
1048 caltinay 3691 void Brick::dump(const string& fileName) const
1049     {
1050 caltinay 6144 #ifdef ESYS_HAVE_SILO
1051 caltinay 3691 string fn(fileName);
1052     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
1053     fn+=".silo";
1054     }
1055    
1056 caltinay 5119 int driver=DB_HDF5;
1057 caltinay 3691 string siloPath;
1058     DBfile* dbfile = NULL;
1059    
1060     #ifdef ESYS_MPI
1061     PMPIO_baton_t* baton = NULL;
1062 gross 3793 const int NUM_SILO_FILES = 1;
1063     const char* blockDirFmt = "/block%04d";
1064 caltinay 3691 #endif
1065    
1066     if (m_mpiInfo->size > 1) {
1067     #ifdef ESYS_MPI
1068     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
1069     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
1070     PMPIO_DefaultClose, (void*)&driver);
1071     // try the fallback driver in case of error
1072     if (!baton && driver != DB_PDB) {
1073     driver = DB_PDB;
1074     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
1075     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
1076     PMPIO_DefaultClose, (void*)&driver);
1077     }
1078     if (baton) {
1079     char str[64];
1080     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
1081     siloPath = str;
1082     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
1083     }
1084     #endif
1085     } else {
1086     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
1087     getDescription().c_str(), driver);
1088     // try the fallback driver in case of error
1089     if (!dbfile && driver != DB_PDB) {
1090     driver = DB_PDB;
1091     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
1092     getDescription().c_str(), driver);
1093     }
1094     }
1095    
1096     if (!dbfile)
1097     throw RipleyException("dump: Could not create Silo file");
1098    
1099     /*
1100     if (driver==DB_HDF5) {
1101     // gzip level 1 already provides good compression with minimal
1102     // performance penalty. Some tests showed that gzip levels >3 performed
1103     // rather badly on escript data both in terms of time and space
1104     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
1105     }
1106     */
1107    
1108 caltinay 4334 boost::scoped_ptr<double> x(new double[m_NN[0]]);
1109     boost::scoped_ptr<double> y(new double[m_NN[1]]);
1110     boost::scoped_ptr<double> z(new double[m_NN[2]]);
1111 caltinay 3691 double* coords[3] = { x.get(), y.get(), z.get() };
1112 caltinay 5120 const dim_t NN0 = m_NN[0];
1113     const dim_t NN1 = m_NN[1];
1114     const dim_t NN2 = m_NN[2];
1115 caltinay 5036
1116 caltinay 3691 #pragma omp parallel
1117     {
1118     #pragma omp for
1119 caltinay 5036 for (dim_t i0 = 0; i0 < NN0; i0++) {
1120 caltinay 4334 coords[0][i0]=getLocalCoordinate(i0, 0);
1121 caltinay 3691 }
1122     #pragma omp for
1123 caltinay 5036 for (dim_t i1 = 0; i1 < NN1; i1++) {
1124 caltinay 4334 coords[1][i1]=getLocalCoordinate(i1, 1);
1125 caltinay 3691 }
1126     #pragma omp for
1127 caltinay 5036 for (dim_t i2 = 0; i2 < NN2; i2++) {
1128 caltinay 4334 coords[2][i2]=getLocalCoordinate(i2, 2);
1129 caltinay 3691 }
1130     }
1131 caltinay 5187 // converting to int!
1132     vector<int> dims(m_NN, m_NN+3);
1133 caltinay 4334
1134     // write mesh
1135 caltinay 5187 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
1136 caltinay 3691 DB_COLLINEAR, NULL);
1137    
1138 caltinay 4334 // write node ids
1139 caltinay 5187 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
1140 caltinay 3698 NULL, 0, DB_INT, DB_NODECENT, NULL);
1141 caltinay 3691
1142 caltinay 3698 // write element ids
1143 caltinay 5187 dims.assign(m_NE, m_NE+3);
1144 caltinay 3698 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
1145 caltinay 5187 &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
1146 caltinay 3698
1147     // rank 0 writes multimesh and multivar
1148 caltinay 3691 if (m_mpiInfo->rank == 0) {
1149     vector<string> tempstrings;
1150     vector<char*> names;
1151     for (dim_t i=0; i<m_mpiInfo->size; i++) {
1152 sshaw 5529 std::stringstream path;
1153     path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/mesh";
1154 caltinay 3691 tempstrings.push_back(path.str());
1155     names.push_back((char*)tempstrings.back().c_str());
1156     }
1157     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
1158     DBSetDir(dbfile, "/");
1159     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
1160     &types[0], NULL);
1161     tempstrings.clear();
1162     names.clear();
1163     for (dim_t i=0; i<m_mpiInfo->size; i++) {
1164 sshaw 5529 std::stringstream path;
1165     path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/nodeId";
1166 caltinay 3691 tempstrings.push_back(path.str());
1167     names.push_back((char*)tempstrings.back().c_str());
1168     }
1169     types.assign(m_mpiInfo->size, DB_QUADVAR);
1170     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
1171     &types[0], NULL);
1172 caltinay 3698 tempstrings.clear();
1173     names.clear();
1174     for (dim_t i=0; i<m_mpiInfo->size; i++) {
1175 sshaw 5529 std::stringstream path;
1176     path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/elementId";
1177 caltinay 3698 tempstrings.push_back(path.str());
1178     names.push_back((char*)tempstrings.back().c_str());
1179     }
1180     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
1181     &types[0], NULL);
1182 caltinay 3691 }
1183    
1184     if (m_mpiInfo->size > 1) {
1185     #ifdef ESYS_MPI
1186     PMPIO_HandOffBaton(baton, dbfile);
1187     PMPIO_Finish(baton);
1188     #endif
1189     } else {
1190     DBClose(dbfile);
1191     }
1192    
1193 caltinay 6144 #else // ESYS_HAVE_SILO
1194 caltinay 3791 throw RipleyException("dump: no Silo support");
1195 caltinay 3691 #endif
1196     }
1197    
1198 caltinay 5120 const dim_t* Brick::borrowSampleReferenceIDs(int fsType) const
1199 caltinay 3691 {
1200 caltinay 3697 switch (fsType) {
1201     case Nodes:
1202 caltinay 3748 case ReducedNodes: //FIXME: reduced
1203 caltinay 3753 return &m_nodeId[0];
1204 caltinay 3757 case DegreesOfFreedom:
1205     case ReducedDegreesOfFreedom: //FIXME: reduced
1206 caltinay 3753 return &m_dofId[0];
1207 caltinay 3697 case Elements:
1208 caltinay 3733 case ReducedElements:
1209 caltinay 3697 return &m_elementId[0];
1210 caltinay 3757 case FaceElements:
1211 caltinay 3733 case ReducedFaceElements:
1212 caltinay 3697 return &m_faceId[0];
1213 sshaw 4660 case Points:
1214     return &m_diracPointNodeIDs[0];
1215 caltinay 3697 default:
1216     break;
1217     }
1218 caltinay 3691
1219 sshaw 5529 std::stringstream msg;
1220 caltinay 3791 msg << "borrowSampleReferenceIDs: invalid function space type "<<fsType;
1221 caltinay 5986 throw ValueError(msg.str());
1222 caltinay 3691 }
1223    
1224 caltinay 3757 bool Brick::ownSample(int fsType, index_t id) const
1225 caltinay 3691 {
1226 caltinay 3759 if (getMPISize()==1)
1227     return true;
1228    
1229 caltinay 3757 switch (fsType) {
1230     case Nodes:
1231     case ReducedNodes: //FIXME: reduced
1232     return (m_dofMap[id] < getNumDOF());
1233     case DegreesOfFreedom:
1234     case ReducedDegreesOfFreedom:
1235     return true;
1236     case Elements:
1237     case ReducedElements:
1238     {
1239     // check ownership of element's _last_ node
1240 caltinay 4334 const index_t x=id%m_NE[0] + 1;
1241     const index_t y=id%(m_NE[0]*m_NE[1])/m_NE[0] + 1;
1242     const index_t z=id/(m_NE[0]*m_NE[1]) + 1;
1243     return (m_dofMap[x + m_NN[0]*y + m_NN[0]*m_NN[1]*z] < getNumDOF());
1244 caltinay 3757 }
1245     case FaceElements:
1246     case ReducedFaceElements:
1247 caltinay 3759 {
1248     // check ownership of face element's last node
1249     dim_t n=0;
1250 caltinay 4334 for (size_t i=0; i<6; i++) {
1251     n+=m_faceCount[i];
1252 caltinay 3759 if (id<n) {
1253 caltinay 4334 const index_t j=id-n+m_faceCount[i];
1254 caltinay 3759 if (i>=4) { // front or back
1255 caltinay 4334 const index_t first=(i==4 ? 0 : m_NN[0]*m_NN[1]*(m_NN[2]-1));
1256     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]] < getNumDOF());
1257 caltinay 3759 } else if (i>=2) { // bottom or top
1258 caltinay 4334 const index_t first=(i==2 ? 0 : m_NN[0]*(m_NN[1]-1));
1259     return (m_dofMap[first+j%m_NE[0]+1+(j/m_NE[0]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
1260 caltinay 3759 } else { // left or right
1261 caltinay 4334 const index_t first=(i==0 ? 0 : m_NN[0]-1);
1262     return (m_dofMap[first+(j%m_NE[1]+1)*m_NN[0]+(j/m_NE[1]+1)*m_NN[0]*m_NN[1]] < getNumDOF());
1263 caltinay 3759 }
1264     }
1265     }
1266     return false;
1267     }
1268 caltinay 3757 default:
1269     break;
1270 caltinay 3753 }
1271 caltinay 3757
1272 sshaw 5529 std::stringstream msg;
1273 caltinay 3791 msg << "ownSample: invalid function space type " << fsType;
1274 caltinay 5986 throw ValueError(msg.str());
1275 caltinay 3691 }
1276    
1277 caltinay 5532 RankVector Brick::getOwnerVector(int fsType) const
1278     {
1279     RankVector owner;
1280 caltinay 5965 const int rank = m_mpiInfo->rank;
1281 caltinay 5532
1282     if (fsType == Elements || fsType == ReducedElements) {
1283     owner.assign(getNumElements(), rank);
1284     // shared plane in Y-Z
1285     if (m_faceCount[0]==0) {
1286     for (dim_t k2=0; k2<m_NE[2]; k2++) {
1287     for (dim_t k1=0; k1<m_NE[1]; k1++) {
1288     const dim_t e=k2*m_NE[0]*m_NE[1]+k1*m_NE[0];
1289     owner[e] = rank-1;
1290     }
1291     }
1292     }
1293     // shared plane in X-Z
1294     if (m_faceCount[2]==0) {
1295     for (dim_t k2=0; k2<m_NE[2]; k2++) {
1296     for (dim_t k0=0; k0<m_NE[0]; k0++) {
1297     const dim_t e=k2*m_NE[0]*m_NE[1]+k0;
1298     owner[e] = rank-m_NX[0];
1299     }
1300     }
1301     }
1302     // shared plane in X-Y
1303     if (m_faceCount[4]==0) {
1304     for (dim_t k1=0; k1<m_NE[1]; k1++) {
1305     for (dim_t k0=0; k0<m_NE[0]; k0++) {
1306     const dim_t e=k1*m_NE[0]+k0;
1307     owner[e] = rank-m_NX[0]*m_NX[1];
1308     }
1309     }
1310     }
1311    
1312     } else if (fsType == FaceElements || fsType == ReducedFaceElements) {
1313     owner.assign(getNumFaceElements(), rank);
1314     index_t offset=0;
1315     if (m_faceCount[0] > 0) {
1316     if (m_faceCount[2]==0) {
1317     for (dim_t k2=0; k2<m_NE[2]; k2++)
1318     owner[k2*m_NE[1]] = rank-m_NX[0];
1319     }
1320     if (m_faceCount[4]==0) {
1321     for (dim_t k1=0; k1<m_NE[1]; k1++)
1322     owner[k1] = rank-m_NX[0]*m_NX[1];
1323     }
1324     offset += m_faceCount[0];
1325     }
1326     if (m_faceCount[1] > 0) {
1327     if (m_faceCount[2]==0) {
1328     for (dim_t k2=0; k2<m_NE[2]; k2++)
1329     owner[offset+k2*m_NE[1]] = rank-m_NX[0];
1330     }
1331     if (m_faceCount[4]==0) {
1332     for (dim_t k1=0; k1<m_NE[1]; k1++)
1333     owner[offset+k1] = rank-m_NX[0]*m_NX[1];
1334     }
1335     offset += m_faceCount[1];
1336     }
1337     if (m_faceCount[2] > 0) {
1338     if (m_faceCount[0]==0) {
1339     for (dim_t k2=0; k2<m_NE[2]; k2++)
1340     owner[offset+k2*m_NE[0]] = rank-1;
1341     }
1342     if (m_faceCount[4]==0) {
1343     for (dim_t k0=0; k0<m_NE[0]; k0++)
1344     owner[offset+k0] = rank-m_NX[0]*m_NX[1];
1345     }
1346     offset += m_faceCount[2];
1347     }
1348     if (m_faceCount[3] > 0) {
1349     if (m_faceCount[0]==0) {
1350     for (dim_t k2=0; k2<m_NE[2]; k2++)
1351     owner[offset+k2*m_NE[0]] = rank-1;
1352     }
1353     if (m_faceCount[4]==0) {
1354     for (dim_t k0=0; k0<m_NE[0]; k0++)
1355     owner[offset+k0] = rank-m_NX[0]*m_NX[1];
1356     }
1357     offset += m_faceCount[3];
1358     }
1359     if (m_faceCount[4] > 0) {
1360     if (m_faceCount[0]==0) {
1361     for (dim_t k1=0; k1<m_NE[1]; k1++)
1362     owner[offset+k1*m_NE[0]] = rank-1;
1363     }
1364     if (m_faceCount[2]==0) {
1365     for (dim_t k0=0; k0<m_NE[0]; k0++)
1366     owner[offset+k0] = rank-m_NX[0];
1367     }
1368     offset += m_faceCount[4];
1369     }
1370     if (m_faceCount[5] > 0) {
1371     if (m_faceCount[0]==0) {
1372     for (dim_t k1=0; k1<m_NE[1]; k1++)
1373     owner[offset+k1*m_NE[0]] = rank-1;
1374     }
1375     if (m_faceCount[2]==0) {
1376     for (dim_t k0=0; k0<m_NE[0]; k0++)
1377     owner[offset+k0] = rank-m_NX[0];
1378     }
1379     }
1380     } else {
1381 caltinay 5986 throw ValueError("getOwnerVector: only valid for element types");
1382 caltinay 5532 }
1383    
1384     return owner;
1385     }
1386    
1387 caltinay 3764 void Brick::setToNormal(escript::Data& out) const
1388 caltinay 3703 {
1389 caltinay 5120 const dim_t NE0 = m_NE[0];
1390     const dim_t NE1 = m_NE[1];
1391     const dim_t NE2 = m_NE[2];
1392 caltinay 5036
1393 caltinay 3764 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1394     out.requireWrite();
1395     #pragma omp parallel
1396     {
1397     if (m_faceOffset[0] > -1) {
1398     #pragma omp for nowait
1399 caltinay 5036 for (index_t k2 = 0; k2 < NE2; ++k2) {
1400     for (index_t k1 = 0; k1 < NE1; ++k1) {
1401 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1402 caltinay 3764 // set vector at four quadrature points
1403     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1404     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1405     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1406     *o++ = -1.; *o++ = 0.; *o = 0.;
1407     }
1408     }
1409     }
1410    
1411     if (m_faceOffset[1] > -1) {
1412     #pragma omp for nowait
1413 caltinay 5036 for (index_t k2 = 0; k2 < NE2; ++k2) {
1414     for (index_t k1 = 0; k1 < NE1; ++k1) {
1415 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1416 caltinay 3764 // set vector at four quadrature points
1417     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1418     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1419     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1420     *o++ = 1.; *o++ = 0.; *o = 0.;
1421     }
1422     }
1423     }
1424    
1425     if (m_faceOffset[2] > -1) {
1426     #pragma omp for nowait
1427 caltinay 5036 for (index_t k2 = 0; k2 < NE2; ++k2) {
1428     for (index_t k0 = 0; k0 < NE0; ++k0) {
1429 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1430 caltinay 3764 // set vector at four quadrature points
1431     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1432     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1433     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1434     *o++ = 0.; *o++ = -1.; *o = 0.;
1435     }
1436     }
1437     }
1438    
1439     if (m_faceOffset[3] > -1) {
1440     #pragma omp for nowait
1441 caltinay 5036 for (index_t k2 = 0; k2 < NE2; ++k2) {
1442     for (index_t k0 = 0; k0 < NE0; ++k0) {
1443 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1444 caltinay 3764 // set vector at four quadrature points
1445     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1446     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1447     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1448     *o++ = 0.; *o++ = 1.; *o = 0.;
1449     }
1450     }
1451     }
1452    
1453     if (m_faceOffset[4] > -1) {
1454     #pragma omp for nowait
1455 caltinay 5036 for (index_t k1 = 0; k1 < NE1; ++k1) {
1456     for (index_t k0 = 0; k0 < NE0; ++k0) {
1457 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1458 caltinay 3764 // set vector at four quadrature points
1459     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1460     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1461     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1462     *o++ = 0.; *o++ = 0.; *o = -1.;
1463     }
1464     }
1465     }
1466    
1467     if (m_faceOffset[5] > -1) {
1468     #pragma omp for nowait
1469 caltinay 5036 for (index_t k1 = 0; k1 < NE1; ++k1) {
1470     for (index_t k0 = 0; k0 < NE0; ++k0) {
1471 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1472 caltinay 3764 // set vector at four quadrature points
1473     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1474     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1475     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1476     *o++ = 0.; *o++ = 0.; *o = 1.;
1477     }
1478     }
1479     }
1480     } // end of parallel section
1481     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1482     out.requireWrite();
1483     #pragma omp parallel
1484     {
1485     if (m_faceOffset[0] > -1) {
1486     #pragma omp for nowait
1487 caltinay 5036 for (index_t k2 = 0; k2 < NE2; ++k2) {
1488     for (index_t k1 = 0; k1 < NE1; ++k1) {
1489 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1490 caltinay 3764 *o++ = -1.;
1491     *o++ = 0.;
1492     *o = 0.;
1493     }
1494     }
1495     }
1496    
1497     if (m_faceOffset[1] > -1) {
1498     #pragma omp for nowait
1499 caltinay 5036 for (index_t k2 = 0; k2 < NE2; ++k2) {
1500     for (index_t k1 = 0; k1 < NE1; ++k1) {
1501 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1502 caltinay 3764 *o++ = 1.;
1503     *o++ = 0.;
1504     *o = 0.;
1505     }
1506     }
1507     }
1508    
1509     if (m_faceOffset[2] > -1) {
1510     #pragma omp for nowait
1511 caltinay 5036 for (index_t k2 = 0; k2 < NE2; ++k2) {
1512     for (index_t k0 = 0; k0 < NE0; ++k0) {
1513 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1514 caltinay 3764 *o++ = 0.;
1515     *o++ = -1.;
1516     *o = 0.;
1517     }
1518     }
1519     }
1520    
1521     if (m_faceOffset[3] > -1) {
1522     #pragma omp for nowait
1523 caltinay 5036 for (index_t k2 = 0; k2 < NE2; ++k2) {
1524     for (index_t k0 = 0; k0 < NE0; ++k0) {
1525 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1526 caltinay 3764 *o++ = 0.;
1527     *o++ = 1.;
1528     *o = 0.;
1529     }
1530     }
1531     }
1532    
1533     if (m_faceOffset[4] > -1) {
1534     #pragma omp for nowait
1535 caltinay 5036 for (index_t k1 = 0; k1 < NE1; ++k1) {
1536     for (index_t k0 = 0; k0 < NE0; ++k0) {
1537 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1538 caltinay 3764 *o++ = 0.;
1539     *o++ = 0.;
1540     *o = -1.;
1541     }
1542     }
1543     }
1544    
1545     if (m_faceOffset[5] > -1) {
1546     #pragma omp for nowait
1547 caltinay 5036 for (index_t k1 = 0; k1 < NE1; ++k1) {
1548     for (index_t k0 = 0; k0 < NE0; ++k0) {
1549 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1550 caltinay 3764 *o++ = 0.;
1551     *o++ = 0.;
1552     *o = 1.;
1553     }
1554     }
1555     }
1556     } // end of parallel section
1557    
1558     } else {
1559 sshaw 5529 std::stringstream msg;
1560 caltinay 3791 msg << "setToNormal: invalid function space type "
1561     << out.getFunctionSpace().getTypeCode();
1562 caltinay 5986 throw ValueError(msg.str());
1563 caltinay 3764 }
1564     }
1565    
1566     void Brick::setToSize(escript::Data& out) const
1567     {
1568     if (out.getFunctionSpace().getTypeCode() == Elements
1569     || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1570     out.requireWrite();
1571     const dim_t numQuad=out.getNumDataPointsPerSample();
1572 caltinay 4334 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]+m_dx[2]*m_dx[2]);
1573 caltinay 5120 const dim_t NE = getNumElements();
1574 caltinay 3764 #pragma omp parallel for
1575 caltinay 5036 for (index_t k = 0; k < NE; ++k) {
1576 caltinay 3764 double* o = out.getSampleDataRW(k);
1577     fill(o, o+numQuad, size);
1578     }
1579     } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1580     || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1581     out.requireWrite();
1582     const dim_t numQuad=out.getNumDataPointsPerSample();
1583 caltinay 5120 const dim_t NE0 = m_NE[0];
1584     const dim_t NE1 = m_NE[1];
1585     const dim_t NE2 = m_NE[2];
1586 caltinay 3764 #pragma omp parallel
1587     {
1588     if (m_faceOffset[0] > -1) {
1589 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1590 caltinay 3764 #pragma omp for nowait
1591 caltinay 5036 for (index_t k2 = 0; k2 < NE2; ++k2) {
1592     for (index_t k1 = 0; k1 < NE1; ++k1) {
1593 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE[1]));
1594 caltinay 3764 fill(o, o+numQuad, size);
1595     }
1596     }
1597     }
1598    
1599     if (m_faceOffset[1] > -1) {
1600 caltinay 4334 const double size=min(m_dx[1],m_dx[2]);
1601 caltinay 3764 #pragma omp for nowait
1602 caltinay 5036 for (index_t k2 = 0; k2 < NE2; ++k2) {
1603     for (index_t k1 = 0; k1 < NE1; ++k1) {
1604 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE[1]));
1605 caltinay 3764 fill(o, o+numQuad, size);
1606     }
1607     }
1608     }
1609    
1610     if (m_faceOffset[2] > -1) {
1611 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1612 caltinay 3764 #pragma omp for nowait
1613 caltinay 5036 for (index_t k2 = 0; k2 < NE2; ++k2) {
1614     for (index_t k0 = 0; k0 < NE0; ++k0) {
1615 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE[0]));
1616 caltinay 3764 fill(o, o+numQuad, size);
1617     }
1618     }
1619     }
1620    
1621     if (m_faceOffset[3] > -1) {
1622 caltinay 4334 const double size=min(m_dx[0],m_dx[2]);
1623 caltinay 3764 #pragma omp for nowait
1624 caltinay 5036 for (index_t k2 = 0; k2 < NE2; ++k2) {
1625     for (index_t k0 = 0; k0 < NE0; ++k0) {
1626 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE[0]));
1627 caltinay 3764 fill(o, o+numQuad, size);
1628     }
1629     }
1630     }
1631    
1632     if (m_faceOffset[4] > -1) {
1633 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1634 caltinay 3764 #pragma omp for nowait
1635 caltinay 5036 for (index_t k1 = 0; k1 < NE1; ++k1) {
1636     for (index_t k0 = 0; k0 < NE0; ++k0) {
1637 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE[0]));
1638 caltinay 3764 fill(o, o+numQuad, size);
1639     }
1640     }
1641     }
1642    
1643     if (m_faceOffset[5] > -1) {
1644 caltinay 4334 const double size=min(m_dx[0],m_dx[1]);
1645 caltinay 3764 #pragma omp for nowait
1646 caltinay 5036 for (index_t k1 = 0; k1 < NE1; ++k1) {
1647     for (index_t k0 = 0; k0 < NE0; ++k0) {
1648 caltinay 4334 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE[0]));
1649 caltinay 3764 fill(o, o+numQuad, size);
1650     }
1651     }
1652     }
1653     } // end of parallel section
1654    
1655     } else {
1656 sshaw 5529 std::stringstream msg;
1657 caltinay 3791 msg << "setToSize: invalid function space type "
1658     << out.getFunctionSpace().getTypeCode();
1659 caltinay 5986 throw ValueError(msg.str());
1660 caltinay 3764 }
1661     }
1662    
1663     void Brick::Print_Mesh_Info(const bool full) const
1664     {
1665     RipleyDomain::Print_Mesh_Info(full);
1666     if (full) {
1667 jfenwick 5525 std::cout << " Id Coordinates" << std::endl;
1668     std::cout.precision(15);
1669 sshaw 5529 std::cout.setf(ios::scientific, std::ios::floatfield);
1670 caltinay 3764 for (index_t i=0; i < getNumNodes(); i++) {
1671 jfenwick 5525 std::cout << " " << std::setw(5) << m_nodeId[i]
1672 caltinay 4334 << " " << getLocalCoordinate(i%m_NN[0], 0)
1673     << " " << getLocalCoordinate(i%(m_NN[0]*m_NN[1])/m_NN[0], 1)
1674 jfenwick 5525 << " " << getLocalCoordinate(i/(m_NN[0]*m_NN[1]), 2) << std::endl;
1675 caltinay 3764 }
1676     }
1677     }
1678    
1679    
1680     //protected
1681     void Brick::assembleCoordinates(escript::Data& arg) const
1682     {
1683     int numDim = m_numDim;
1684 sshaw 5775 if (!arg.isDataPointShapeEqual(1, &numDim))
1685 caltinay 5986 throw ValueError("setToX: Invalid Data object shape");
1686 sshaw 5775 if (!arg.numSamplesEqual(1, getNumNodes()))
1687 caltinay 5986 throw ValueError("setToX: Illegal number of samples in Data object");
1688 caltinay 3764
1689 caltinay 5120 const dim_t NN0 = m_NN[0];
1690     const dim_t NN1 = m_NN[1];
1691     const dim_t NN2 = m_NN[2];
1692 caltinay 3764 arg.requireWrite();
1693     #pragma omp parallel for
1694 caltinay 5036 for (dim_t i2 = 0; i2 < NN2; i2++) {
1695     for (dim_t i1 = 0; i1 < NN1; i1++) {
1696     for (dim_t i0 = 0; i0 < NN0; i0++) {
1697     double* point = arg.getSampleDataRW(i0+NN0*i1+NN0*NN1*i2);
1698 caltinay 4334 point[0] = getLocalCoordinate(i0, 0);
1699     point[1] = getLocalCoordinate(i1, 1);
1700     point[2] = getLocalCoordinate(i2, 2);
1701 caltinay 3764 }
1702     }
1703     }
1704     }
1705    
1706     //protected
1707 caltinay 6429 void Brick::assembleGradient(escript::Data& out,
1708     const escript::Data& in) const
1709 caltinay 3764 {
1710 caltinay 6429 if (out.isComplex() != in.isComplex())
1711     throw ValueError("Gradient: input & output complexity must match.");
1712     else if (in.isComplex())
1713     assembleGradientImpl<cplx_t>(out, in);
1714     else
1715     assembleGradientImpl<real_t>(out, in);
1716     }
1717    
1718     //protected
1719     template<typename Scalar>
1720     void Brick::assembleGradientImpl(escript::Data& out,
1721     const escript::Data& in) const
1722     {
1723 caltinay 3703 const dim_t numComp = in.getDataPointSize();
1724 caltinay 3731 const double C0 = .044658198738520451079;
1725     const double C1 = .16666666666666666667;
1726     const double C2 = .21132486540518711775;
1727     const double C3 = .25;
1728     const double C4 = .5;
1729     const double C5 = .62200846792814621559;
1730     const double C6 = .78867513459481288225;
1731 caltinay 5120 const dim_t NE0 = m_NE[0];
1732     const dim_t NE1 = m_NE[1];
1733     const dim_t NE2 = m_NE[2];
1734 caltinay 6429 const Scalar zero = static_cast<Scalar>(0);
1735 caltinay 3731
1736 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
1737 caltinay 3760 out.requireWrite();
1738 caltinay 3913 #pragma omp parallel
1739     {
1740 caltinay 6429 vector<Scalar> f_000(numComp, zero);
1741     vector<Scalar> f_001(numComp, zero);
1742     vector<Scalar> f_010(numComp, zero);
1743     vector<Scalar> f_011(numComp, zero);
1744     vector<Scalar> f_100(numComp, zero);
1745     vector<Scalar> f_101(numComp, zero);
1746     vector<Scalar> f_110(numComp, zero);
1747     vector<Scalar> f_111(numComp, zero);
1748 caltinay 3913 #pragma omp for
1749 caltinay 5036 for (index_t k2=0; k2 < NE2; ++k2) {
1750     for (index_t k1=0; k1 < NE1; ++k1) {
1751     for (index_t k0=0; k0 < NE0; ++k0) {
1752 caltinay 6429 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1753     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1754     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1755     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1756     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1757     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1758     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1759     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1760     Scalar* o = out.getSampleDataRW(INDEX3(k0,k1,k2,NE0,NE1), zero);
1761 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1762 caltinay 6429 const Scalar V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1763     const Scalar V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1764     const Scalar V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / m_dx[0];
1765     const Scalar V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / m_dx[0];
1766     const Scalar V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1767     const Scalar V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1768     const Scalar V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / m_dx[1];
1769     const Scalar V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / m_dx[1];
1770     const Scalar V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1771     const Scalar V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1772     const Scalar V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / m_dx[2];
1773     const Scalar V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / m_dx[2];
1774 caltinay 3913 o[INDEX3(i,0,0,numComp,3)] = V0;
1775     o[INDEX3(i,1,0,numComp,3)] = V4;
1776     o[INDEX3(i,2,0,numComp,3)] = V8;
1777     o[INDEX3(i,0,1,numComp,3)] = V0;
1778     o[INDEX3(i,1,1,numComp,3)] = V5;
1779     o[INDEX3(i,2,1,numComp,3)] = V9;
1780     o[INDEX3(i,0,2,numComp,3)] = V1;
1781     o[INDEX3(i,1,2,numComp,3)] = V4;
1782     o[INDEX3(i,2,2,numComp,3)] = V10;
1783     o[INDEX3(i,0,3,numComp,3)] = V1;
1784     o[INDEX3(i,1,3,numComp,3)] = V5;
1785     o[INDEX3(i,2,3,numComp,3)] = V11;
1786     o[INDEX3(i,0,4,numComp,3)] = V2;
1787     o[INDEX3(i,1,4,numComp,3)] = V6;
1788     o[INDEX3(i,2,4,numComp,3)] = V8;
1789     o[INDEX3(i,0,5,numComp,3)] = V2;
1790     o[INDEX3(i,1,5,numComp,3)] = V7;
1791     o[INDEX3(i,2,5,numComp,3)] = V9;
1792     o[INDEX3(i,0,6,numComp,3)] = V3;
1793     o[INDEX3(i,1,6,numComp,3)] = V6;
1794     o[INDEX3(i,2,6,numComp,3)] = V10;
1795     o[INDEX3(i,0,7,numComp,3)] = V3;
1796     o[INDEX3(i,1,7,numComp,3)] = V7;
1797     o[INDEX3(i,2,7,numComp,3)] = V11;
1798     } // end of component loop i
1799     } // end of k0 loop
1800     } // end of k1 loop
1801     } // end of k2 loop
1802     } // end of parallel section
1803 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1804 caltinay 3760 out.requireWrite();
1805 caltinay 3913 #pragma omp parallel
1806     {
1807 caltinay 6429 vector<Scalar> f_000(numComp, zero);
1808     vector<Scalar> f_001(numComp, zero);
1809     vector<Scalar> f_010(numComp, zero);
1810     vector<Scalar> f_011(numComp, zero);
1811     vector<Scalar> f_100(numComp, zero);
1812     vector<Scalar> f_101(numComp, zero);
1813     vector<Scalar> f_110(numComp, zero);
1814     vector<Scalar> f_111(numComp, zero);
1815 caltinay 3913 #pragma omp for
1816 caltinay 5036 for (index_t k2=0; k2 < NE2; ++k2) {
1817     for (index_t k1=0; k1 < NE1; ++k1) {
1818     for (index_t k0=0; k0 < NE0; ++k0) {
1819 caltinay 6429 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(k0,k1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1820     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1821     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1822     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1823     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1824     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1825     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1826     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1827     Scalar* o = out.getSampleDataRW(INDEX3(k0,k1,k2,NE0,NE1), zero);
1828 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1829 caltinay 4375 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / m_dx[0];
1830     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / m_dx[1];
1831     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / m_dx[2];
1832 caltinay 3913 } // end of component loop i
1833     } // end of k0 loop
1834     } // end of k1 loop
1835     } // end of k2 loop
1836     } // end of parallel section
1837 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1838 caltinay 3760 out.requireWrite();
1839 caltinay 3722 #pragma omp parallel
1840     {
1841 caltinay 6429 vector<Scalar> f_000(numComp, zero);
1842     vector<Scalar> f_001(numComp, zero);
1843     vector<Scalar> f_010(numComp, zero);
1844     vector<Scalar> f_011(numComp, zero);
1845     vector<Scalar> f_100(numComp, zero);
1846     vector<Scalar> f_101(numComp, zero);
1847     vector<Scalar> f_110(numComp, zero);
1848     vector<Scalar> f_111(numComp, zero);
1849 caltinay 3722 if (m_faceOffset[0] > -1) {
1850     #pragma omp for nowait
1851 caltinay 5036 for (index_t k2=0; k2 < NE2; ++k2) {
1852     for (index_t k1=0; k1 < NE1; ++k1) {
1853 caltinay 6429 memcpy(&f_000[0], in.getSampleDataRO(INDEX3(0,k1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1854     memcpy(&f_001[0], in.getSampleDataRO(INDEX3(0,k1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1855     memcpy(&f_010[0], in.getSampleDataRO(INDEX3(0,k1+1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1856     memcpy(&f_011[0], in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1857     memcpy(&f_100[0], in.getSampleDataRO(INDEX3(1,k1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1858     memcpy(&f_101[0], in.getSampleDataRO(INDEX3(1,k1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1859     memcpy(&f_110[0], in.getSampleDataRO(INDEX3(1,k1+1,k2, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1860     memcpy(&f_111[0], in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_NN[0],m_NN[1]), zero), numComp*sizeof(Scalar));
1861     Scalar* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,NE1), zero);
1862 caltinay 3722