/[escript]/trunk/speckley/src/Rectangle.cpp
ViewVC logotype

Annotation of /trunk/speckley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6141 - (hide annotations)
Wed Apr 6 03:51:30 2016 UTC (2 years, 11 months ago) by caltinay
File size: 59160 byte(s)
more namespacing of defines.

1 sshaw 5123
2     /*****************************************************************************
3     *
4 jfenwick 5863 * Copyright (c) 2003-2016 by The University of Queensland
5 sshaw 5123 * http://www.uq.edu.au
6     *
7     * Primary Business: Queensland, Australia
8 jfenwick 6112 * Licensed under the Apache License, version 2.0
9     * http://www.apache.org/licenses/LICENSE-2.0
10 sshaw 5123 *
11     * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12     * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14     *
15     *****************************************************************************/
16    
17     #include <speckley/Rectangle.h>
18     #include <speckley/DefaultAssembler2D.h>
19 sshaw 5574 #include <speckley/WaveAssembler2D.h>
20 sshaw 5226 #ifdef USE_RIPLEY
21     #include <speckley/CrossDomainCoupler.h>
22     #endif
23    
24 caltinay 5998 #include <escript/index.h>
25 caltinay 5968 #include <escript/FileWriter.h>
26 caltinay 5958 #include <escript/FunctionSpaceFactory.h>
27     #include <escript/Random.h>
28    
29     #include <boost/scoped_array.hpp>
30     #include <boost/math/special_functions/fpclassify.hpp> // for isnan
31    
32 sshaw 5123 #ifdef USE_NETCDF
33     #include <netcdfcpp.h>
34     #endif
35    
36     #if USE_SILO
37     #include <silo.h>
38     #ifdef ESYS_MPI
39     #include <pmpio.h>
40     #endif
41     #endif
42    
43 caltinay 5958 #include <algorithm>
44 sshaw 5123 #include <iomanip>
45 caltinay 5958 #include <limits>
46 sshaw 5123
47 jfenwick 5535 namespace bm=boost::math;
48 caltinay 5968 using escript::FileWriter;
49 jfenwick 5525
50 sshaw 5123 namespace speckley {
51    
52     Rectangle::Rectangle(int order, dim_t n0, dim_t n1, double x0, double y0, double x1,
53     double y1, int d0, int d1,
54     const std::vector<double>& points,
55     const std::vector<int>& tags,
56 sshaw 5177 const TagMap& tagnamestonums,
57     escript::SubWorld_ptr w) :
58 sshaw 5123 SpeckleyDomain(2, order, w)
59     {
60     if (static_cast<long>(n0 + 1) * static_cast<long>(n1 + 1)
61     > std::numeric_limits<dim_t>::max())
62     throw SpeckleyException("The number of elements has overflowed, this "
63     "limit may be raised in future releases.");
64    
65     if (n0 <= 0 || n1 <= 0)
66     throw SpeckleyException("Number of elements in each spatial dimension "
67     "must be positive");
68    
69     // ignore subdivision parameters for serial run
70     if (m_mpiInfo->size == 1) {
71     d0=1;
72     d1=1;
73     }
74    
75     bool warn=false;
76     std::vector<int> factors;
77     int ranks = m_mpiInfo->size;
78     dim_t epr[2] = {n0,n1};
79     int d[2] = {d0,d1};
80     if (d0<=0 || d1<=0) {
81     for (int i = 0; i < 2; i++) {
82     if (d[i] < 1) {
83     d[i] = 1;
84     continue;
85     }
86     epr[i] = -1; // can no longer be max
87     //remove
88     if (ranks % d[i] != 0) {
89     throw SpeckleyException("Invalid number of spatial subdivisions");
90     }
91     ranks /= d[i];
92     }
93     factorise(factors, ranks);
94     if (factors.size() != 0) {
95     warn = true;
96     }
97     }
98    
99     while (factors.size() > 0) {
100     int i = epr[0] > epr[1] ? 0 : 1;
101     int f = factors.back();
102     factors.pop_back();
103     d[i] *= f;
104     epr[i] /= f;
105     }
106     d0 = d[0]; d1 = d[1];
107    
108     // ensure number of subdivisions is valid and nodes can be distributed
109     // among number of ranks
110     if (d0*d1 != m_mpiInfo->size)
111     throw SpeckleyException("Invalid number of spatial subdivisions");
112    
113     if (warn) {
114     std::cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
115     << d1 << "). This may not be optimal!" << std::endl;
116     }
117    
118     double l0 = x1-x0;
119     double l1 = y1-y0;
120     m_dx[0] = l0/n0;
121     m_dx[1] = l1/n1;
122    
123     if (n0 % d0 > 0) {
124     n0 += d0 - (n0 % d0);
125     l0 = m_dx[0]*n0;
126     std::cout << "Warning: Adjusted number of elements and length. N0="
127     << n0 << ", l0=" << l0 << std::endl;
128     }
129     if (n1 % d1 > 0) {
130     n1 += d1 - (n1 % d1);
131     l1 = m_dx[1]*n1;
132     std::cout << "Warning: Adjusted number of elements and length. N1="
133     << n1 << ", l1=" << l1 << std::endl;
134     }
135    
136     if (n0/d0 < 2 || n1/d1 < 2)
137     throw SpeckleyException("Too few elements for the number of ranks");
138    
139     m_gNE[0] = n0;
140     m_gNE[1] = n1;
141     m_origin[0] = x0;
142     m_origin[1] = y0;
143     m_length[0] = l0;
144     m_length[1] = l1;
145     m_NX[0] = d0;
146     m_NX[1] = d1;
147    
148     // local number of elements
149     m_NE[0] = m_gNE[0] / d0;
150     m_NE[1] = m_gNE[1] / d1;
151    
152     // local number of nodes
153     m_NN[0] = m_NE[0]*m_order+1;
154     m_NN[1] = m_NE[1]*m_order+1;
155    
156     // bottom-left node is at (offset0,offset1) in global mesh
157     m_offset[0] = n0/d0*(m_mpiInfo->rank%d0);
158     m_offset[1] = n1/d1*(m_mpiInfo->rank/d0);
159    
160     populateSampleIds();
161    
162 sshaw 5177 for (TagMap::const_iterator i = tagnamestonums.begin();
163 sshaw 5123 i != tagnamestonums.end(); i++) {
164     setTagMap(i->first, i->second);
165     }
166     addPoints(points, tags);
167 sshaw 5226
168    
169     #ifdef USE_RIPLEY
170     coupler = NULL;
171     #endif
172 sshaw 5123 }
173    
174     Rectangle::~Rectangle()
175     {
176 sshaw 5226 #ifdef USE_RIPLEY
177 sshaw 5246 if (coupler != NULL)
178     delete coupler;
179 sshaw 5226 #endif
180 sshaw 5123 }
181    
182     std::string Rectangle::getDescription() const
183     {
184     return "speckley::Rectangle";
185     }
186    
187 sshaw 5736 bool Rectangle::operator==(const escript::AbstractDomain& other) const
188 sshaw 5123 {
189     const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
190     if (o) {
191     return (SpeckleyDomain::operator==(other) &&
192     m_order == o->m_order &&
193     m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1]
194     && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1]
195     && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1]
196     && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1]);
197     }
198    
199     return false;
200     }
201    
202 sshaw 5191 void Rectangle::readNcGrid(escript::Data& out, std::string filename,
203 sshaw 5149 std::string varname, const ReaderParameters& params) const
204     {
205     #ifdef USE_NETCDF
206     // check destination function space
207     dim_t myN0, myN1;
208     if (out.getFunctionSpace().getTypeCode() == Nodes) {
209     myN0 = m_NE[0] + 1;
210     myN1 = m_NE[1] + 1;
211     // } else if (out.getFunctionSpace().getTypeCode() == Elements) {
212     // myN0 = m_NE[0];
213     // myN1 = m_NE[1];
214     } else
215     throw SpeckleyException("readNcGrid(): invalid function space for output data object");
216    
217     if (params.first.size() != 2)
218     throw SpeckleyException("readNcGrid(): argument 'first' must have 2 entries");
219    
220     if (params.numValues.size() != 2)
221     throw SpeckleyException("readNcGrid(): argument 'numValues' must have 2 entries");
222    
223     if (params.multiplier.size() != 2)
224     throw SpeckleyException("readNcGrid(): argument 'multiplier' must have 2 entries");
225     for (size_t i=0; i<params.multiplier.size(); i++)
226     if (params.multiplier[i]<1)
227     throw SpeckleyException("readNcGrid(): all multipliers must be positive");
228     if (params.reverse.size() != 2)
229     throw SpeckleyException("readNcGrid(): argument 'reverse' must have 2 entries");
230    
231     // check file existence and size
232     NcFile f(filename.c_str(), NcFile::ReadOnly);
233     if (!f.is_valid())
234     throw SpeckleyException("readNcGrid(): cannot open file");
235    
236     NcVar* var = f.get_var(varname.c_str());
237     if (!var)
238     throw SpeckleyException("readNcGrid(): invalid variable");
239    
240     // TODO: rank>0 data support
241     const int numComp = out.getDataPointSize();
242     if (numComp > 1)
243     throw SpeckleyException("readNcGrid(): only scalar data supported");
244    
245     const int dims = var->num_dims();
246     boost::scoped_array<long> edges(var->edges());
247    
248     // is this a slice of the data object (dims!=2)?
249     // note the expected ordering of edges (as in numpy: y,x)
250     if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1]))
251     || (dims==1 && params.numValues[1]>1) ) {
252     throw SpeckleyException("readNcGrid(): not enough data in file");
253     }
254    
255     // check if this rank contributes anything
256     if (params.first[0] >= m_offset[0]+myN0 ||
257     params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
258     params.first[1] >= m_offset[1]+myN1 ||
259     params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1])
260     return;
261    
262     // now determine how much this rank has to write
263    
264     // first coordinates in data object to write to
265 caltinay 5194 const dim_t first0 = std::max(dim_t(0), params.first[0]-m_offset[0]);
266     const dim_t first1 = std::max(dim_t(0), params.first[1]-m_offset[1]);
267 sshaw 5149 // indices to first value in file (not accounting for reverse yet)
268 caltinay 5194 dim_t idx0 = std::max(dim_t(0), m_offset[0]-params.first[0]);
269     dim_t idx1 = std::max(dim_t(0), m_offset[1]-params.first[1]);
270 sshaw 5149 // number of values to read
271     const dim_t num0 = std::min(params.numValues[0]-idx0, myN0-first0);
272     const dim_t num1 = std::min(params.numValues[1]-idx1, myN1-first1);
273    
274     // make sure we read the right block if going backwards through file
275     if (params.reverse[0])
276     idx0 = edges[dims-1]-num0-idx0;
277     if (dims>1 && params.reverse[1])
278     idx1 = edges[dims-2]-num1-idx1;
279    
280     std::vector<double> values(num0*num1);
281     if (dims==2) {
282     var->set_cur(idx1, idx0);
283     var->get(&values[0], num1, num0);
284     } else {
285     var->set_cur(idx0);
286     var->get(&values[0], num0);
287     }
288    
289     const int dpp = out.getNumDataPointsPerSample();
290     out.requireWrite();
291    
292     // helpers for reversing
293     const dim_t x0 = (params.reverse[0] ? num0-1 : 0);
294     const int x_mult = (params.reverse[0] ? -1 : 1);
295     const dim_t y0 = (params.reverse[1] ? num1-1 : 0);
296     const int y_mult = (params.reverse[1] ? -1 : 1);
297    
298     for (index_t y=0; y<num1; y++) {
299     #pragma omp parallel for
300     for (index_t x=0; x<num0; x++) {
301     const dim_t baseIndex = first0+x*params.multiplier[0]
302     +(first1+y*params.multiplier[1])*myN0;
303     const dim_t srcIndex = (y0+y_mult*y)*num0+(x0+x_mult*x);
304 jfenwick 5535 if (!bm::isnan(values[srcIndex])) {
305 sshaw 5149 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
306     for (index_t m0=0; m0<params.multiplier[0]; m0++) {
307     const dim_t dataIndex = baseIndex+m0+m1*myN0;
308     double* dest = out.getSampleDataRW(dataIndex);
309     for (index_t q=0; q<dpp; q++) {
310     *dest++ = values[srcIndex];
311     }
312     }
313     }
314     }
315     }
316     }
317     #else
318     throw SpeckleyException("readNcGrid(): not compiled with netCDF support");
319     #endif
320     }
321    
322     void Rectangle::readBinaryGrid(escript::Data& out, std::string filename,
323     const ReaderParameters& params) const
324     {
325     // the mapping is not universally correct but should work on our
326     // supported platforms
327     switch (params.dataType) {
328     case DATATYPE_INT32:
329     readBinaryGridImpl<int>(out, filename, params);
330     break;
331     case DATATYPE_FLOAT32:
332     readBinaryGridImpl<float>(out, filename, params);
333     break;
334     case DATATYPE_FLOAT64:
335     readBinaryGridImpl<double>(out, filename, params);
336     break;
337     default:
338     throw SpeckleyException("readBinaryGrid(): invalid or unsupported datatype");
339     }
340     }
341    
342     void Rectangle::readBinaryGridFromZipped(escript::Data& out, std::string filename,
343     const ReaderParameters& params) const
344     {
345 caltinay 6141 #ifdef ESYS_HAVE_BOOST_IO
346 sshaw 5149 // the mapping is not universally correct but should work on our
347     // supported platforms
348     switch (params.dataType) {
349     case DATATYPE_INT32:
350     readBinaryGridZippedImpl<int>(out, filename, params);
351     break;
352     case DATATYPE_FLOAT32:
353     readBinaryGridZippedImpl<float>(out, filename, params);
354     break;
355     case DATATYPE_FLOAT64:
356     readBinaryGridZippedImpl<double>(out, filename, params);
357     break;
358     default:
359     throw SpeckleyException("readBinaryGridFromZipped(): invalid or unsupported datatype");
360     }
361 caltinay 6141 #else
362     throw SpeckleyException("readBinaryGridFromZipped(): not built with zip support");
363     #endif
364 sshaw 5149 }
365    
366     template<typename ValueType>
367     void Rectangle::readBinaryGridImpl(escript::Data& out, const std::string& filename,
368     const ReaderParameters& params) const
369     {
370     // check destination function space
371     dim_t myN0, myN1;
372     if (out.getFunctionSpace().getTypeCode() == Nodes) {
373     myN0 = m_NE[0] + 1;
374     myN1 = m_NE[1] + 1;
375     // } else if (out.getFunctionSpace().getTypeCode() == Elements) {
376     // myN0 = m_NE[0];
377     // myN1 = m_NE[1];
378     } else
379     throw SpeckleyException("readBinaryGrid(): invalid function space for output data object");
380    
381     if (params.first.size() != 2)
382     throw SpeckleyException("readBinaryGrid(): argument 'first' must have 2 entries");
383    
384     if (params.numValues.size() != 2)
385     throw SpeckleyException("readBinaryGrid(): argument 'numValues' must have 2 entries");
386    
387     if (params.multiplier.size() != 2)
388     throw SpeckleyException("readBinaryGrid(): argument 'multiplier' must have 2 entries");
389     for (size_t i=0; i<params.multiplier.size(); i++)
390     if (params.multiplier[i]<1)
391     throw SpeckleyException("readBinaryGrid(): all multipliers must be positive");
392     if (params.reverse[0] != 0 || params.reverse[1] != 0)
393     throw SpeckleyException("readBinaryGrid(): reversing not supported yet");
394    
395     // check file existence and size
396     std::ifstream f(filename.c_str(), std::ifstream::binary);
397     if (f.fail()) {
398     throw SpeckleyException("readBinaryGrid(): cannot open file");
399     }
400     f.seekg(0, std::ios::end);
401     const int numComp = out.getDataPointSize();
402     const dim_t filesize = f.tellg();
403     const dim_t reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
404     if (filesize < reqsize) {
405     f.close();
406     throw SpeckleyException("readBinaryGrid(): not enough data in file");
407     }
408    
409     // check if this rank contributes anything
410     if (params.first[0] >= m_offset[0]+myN0 ||
411     params.first[0]+(params.numValues[0]*params.multiplier[0]) <= m_offset[0] ||
412     params.first[1] >= m_offset[1]+myN1 ||
413     params.first[1]+(params.numValues[1]*params.multiplier[1]) <= m_offset[1]) {
414     f.close();
415     return;
416     }
417    
418     // now determine how much this rank has to write
419    
420     // first coordinates in data object to write to
421 caltinay 5194 const dim_t first0 = std::max(dim_t(0), params.first[0]-m_offset[0]);
422     const dim_t first1 = std::max(dim_t(0), params.first[1]-m_offset[1]);
423 sshaw 5149 // indices to first value in file
424 caltinay 5194 const dim_t idx0 = std::max(dim_t(0), (m_offset[0]/params.multiplier[0])-params.first[0]);
425     const dim_t idx1 = std::max(dim_t(0), (m_offset[1]/params.multiplier[1])-params.first[1]);
426 sshaw 5149 // if restX > 0 the first value in the respective dimension has been
427     // written restX times already in a previous rank so this rank only
428     // contributes (multiplier-rank) copies of that value
429     const dim_t rest0 = m_offset[0]%params.multiplier[0];
430     const dim_t rest1 = m_offset[1]%params.multiplier[1];
431     // number of values to read
432     const dim_t num0 = std::min(params.numValues[0]-idx0, myN0-first0);
433     const dim_t num1 = std::min(params.numValues[1]-idx1, myN1-first1);
434    
435     out.requireWrite();
436     std::vector<ValueType> values(num0*numComp);
437     const int dpp = out.getNumDataPointsPerSample();
438    
439     for (dim_t y=0; y<num1; y++) {
440     const dim_t fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
441     f.seekg(fileofs*sizeof(ValueType));
442     f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
443     const dim_t m1limit = (y==0 ? params.multiplier[1]-rest1 : params.multiplier[1]);
444     dim_t dataYbase = first1+y*params.multiplier[1];
445     if (y>0)
446     dataYbase -= rest1;
447     for (dim_t x=0; x<num0; x++) {
448     const dim_t m0limit = (x==0 ? params.multiplier[0]-rest0 : params.multiplier[0]);
449     dim_t dataXbase = first0+x*params.multiplier[0];
450     if (x>0)
451     dataXbase -= rest0;
452     // write a block of mult0 x mult1 identical values into Data object
453     for (dim_t m1=0; m1<m1limit; m1++) {
454     const dim_t dataY = dataYbase+m1;
455     if (dataY >= myN1)
456     break;
457     for (dim_t m0=0; m0<m0limit; m0++) {
458     const dim_t dataX = dataXbase+m0;
459     if (dataX >= myN0)
460     break;
461     const dim_t dataIndex = dataX+dataY*m_NN[0];
462     double* dest = out.getSampleDataRW(dataIndex*m_order);
463     for (int c=0; c<numComp; c++) {
464     ValueType val = values[x*numComp+c];
465     if (params.byteOrder != BYTEORDER_NATIVE) {
466     char* cval = reinterpret_cast<char*>(&val);
467     // this will alter val!!
468     if (sizeof(ValueType) > 4) {
469     byte_swap64(cval);
470     } else {
471     byte_swap32(cval);
472     }
473     }
474 jfenwick 5535 if (!bm::isnan(val)) {
475 sshaw 5149 for (int q=0; q<dpp; q++) {
476     *dest++ = static_cast<double>(val);
477     }
478     }
479     }
480     }
481     }
482     }
483     }
484    
485     f.close();
486 sshaw 5154 interpolateFromCorners(out);
487 sshaw 5149 }
488    
489 caltinay 6141 #ifdef ESYS_HAVE_BOOST_IO
490 sshaw 5149 template<typename ValueType>
491     void Rectangle::readBinaryGridZippedImpl(escript::Data& out, const std::string& filename,
492     const ReaderParameters& params) const
493     {
494     // check destination function space
495     dim_t myN0, myN1;
496     if (out.getFunctionSpace().getTypeCode() == Nodes) {
497     myN0 = m_NE[0] + 1;
498     myN1 = m_NE[1] + 1;
499     // } else if (out.getFunctionSpace().getTypeCode() == Elements) {
500     // myN0 = m_NE[0];
501     // myN1 = m_NE[1];
502     } else
503     throw SpeckleyException("readBinaryGrid(): invalid function space for output data object");
504    
505     // check file existence and size
506     std::ifstream f(filename.c_str(), std::ifstream::binary);
507     if (f.fail()) {
508     throw SpeckleyException(strerror(errno));//"readBinaryGridFromZipped(): cannot open file");
509     }
510     f.seekg(0, std::ios::end);
511     const int numComp = out.getDataPointSize();
512     dim_t filesize = f.tellg();
513     f.seekg(0, std::ios::beg);
514     std::vector<char> compressed(filesize);
515     f.read((char*)&compressed[0], filesize);
516     f.close();
517     std::vector<char> decompressed = unzip(compressed);
518     filesize = decompressed.size();
519     const dim_t reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
520     if (filesize < reqsize) {
521     throw SpeckleyException("readBinaryGridFromZipped(): not enough data in file");
522     }
523    
524     // check if this rank contributes anything
525     if (params.first[0] >= m_offset[0]+myN0 ||
526     params.first[0]+(params.numValues[0]*params.multiplier[0]) <= m_offset[0] ||
527     params.first[1] >= m_offset[1]+myN1 ||
528     params.first[1]+(params.numValues[1]*params.multiplier[1]) <= m_offset[1]) {
529     return;
530     }
531    
532     // now determine how much this rank has to write
533    
534     // first coordinates in data object to write to
535 caltinay 5194 const dim_t first0 = std::max(dim_t(0), params.first[0]-m_offset[0]);
536     const dim_t first1 = std::max(dim_t(0), params.first[1]-m_offset[1]);
537 sshaw 5149 // indices to first value in file
538 caltinay 5194 const dim_t idx0 = std::max(dim_t(0), (m_offset[0]/params.multiplier[0])-params.first[0]);
539     const dim_t idx1 = std::max(dim_t(0), (m_offset[1]/params.multiplier[1])-params.first[1]);
540 sshaw 5149 // if restX > 0 the first value in the respective dimension has been
541     // written restX times already in a previous rank so this rank only
542     // contributes (multiplier-rank) copies of that value
543     const dim_t rest0 = m_offset[0]%params.multiplier[0];
544     const dim_t rest1 = m_offset[1]%params.multiplier[1];
545     // number of values to read
546     const dim_t num0 = std::min(params.numValues[0]-idx0, myN0-first0);
547     const dim_t num1 = std::min(params.numValues[1]-idx1, myN1-first1);
548    
549     out.requireWrite();
550     std::vector<ValueType> values(num0*numComp);
551     const int dpp = out.getNumDataPointsPerSample();
552    
553     for (dim_t y=0; y<num1; y++) {
554     const dim_t fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
555 sshaw 5150 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
556 sshaw 5149 const dim_t m1limit = (y==0 ? params.multiplier[1]-rest1 : params.multiplier[1]);
557     dim_t dataYbase = first1+y*params.multiplier[1];
558     if (y>0)
559     dataYbase -= rest1;
560     for (dim_t x=0; x<num0; x++) {
561     const dim_t m0limit = (x==0 ? params.multiplier[0]-rest0 : params.multiplier[0]);
562     dim_t dataXbase = first0+x*params.multiplier[0];
563     if (x>0)
564     dataXbase -= rest0;
565     // write a block of mult0 x mult1 identical values into Data object
566     for (dim_t m1=0; m1<m1limit; m1++) {
567     const dim_t dataY = dataYbase+m1;
568     if (dataY >= myN1)
569     break;
570     for (dim_t m0=0; m0<m0limit; m0++) {
571     const dim_t dataX = dataXbase+m0;
572     if (dataX >= myN0)
573     break;
574     const dim_t dataIndex = dataX+dataY*m_NN[0];
575     double* dest = out.getSampleDataRW(dataIndex*m_order);
576     for (int c=0; c<numComp; c++) {
577     ValueType val = values[x*numComp+c];
578    
579     if (params.byteOrder != BYTEORDER_NATIVE) {
580     char* cval = reinterpret_cast<char*>(&val);
581     // this will alter val!!
582     if (sizeof(ValueType) > 4) {
583     byte_swap64(cval);
584     } else {
585     byte_swap32(cval);
586     }
587     }
588 jfenwick 5535 if (!bm::isnan(val)) {
589 sshaw 5149 for (int q=0; q<dpp; q++) {
590     *dest++ = static_cast<double>(val);
591     }
592     }
593     }
594     }
595     }
596     }
597     }
598 sshaw 5154 interpolateFromCorners(out);
599 sshaw 5149 }
600     #endif
601    
602 sshaw 5154 void Rectangle::interpolateFromCorners(escript::Data &out) const
603     {
604     const int numComp = out.getDataPointSize();
605     //interpolate the missing portions
606 sshaw 5158 #pragma omp parallel for
607 sshaw 5154 for (dim_t y = 0; y < m_NN[1]; y++) {
608     for (dim_t x = 0; x < m_NN[0]; x++) {
609     //skip the points we have values for
610     if (y % m_order == 0 && x % m_order == 0)
611     continue;
612     //point location in element: x,y
613     const double px = point_locations[m_order-2][x%m_order];
614     const double py = point_locations[m_order-2][y%m_order];
615    
616     double *point = out.getSampleDataRW(INDEX2(x, y, m_NN[0]));
617    
618     const dim_t left = x - x%m_order;
619     const dim_t right = left < m_NN[0] - 1 ? left + m_order : left;
620     const dim_t front = y - y%m_order;
621     const dim_t back = front < m_NN[1] - 1 ? front + m_order : front;
622    
623    
624     const double *lowleft = out.getSampleDataRO(
625     INDEX2(left, front, m_NN[0]));
626     const double *lowright = out.getSampleDataRO(
627     INDEX2(right, front, m_NN[0]));
628     const double *highleft = out.getSampleDataRO(
629     INDEX2(left, back, m_NN[0]));
630     const double *highright = out.getSampleDataRO(
631     INDEX2(right, back, m_NN[0]));
632    
633     for (int comp = 0; comp < numComp; comp++) {
634 sshaw 5191 point[comp] = highright[comp]*px*py
635 sshaw 5154 + highleft[comp]*(1-px)*py
636     + lowright[comp]*px*(1-py)
637     + lowleft[comp]*(1-px)*(1-py);
638     }
639     }
640     }
641     }
642    
643 sshaw 5149 void Rectangle::writeBinaryGrid(const escript::Data& in, std::string filename,
644     int byteOrder, int dataType) const
645     {
646     // the mapping is not universally correct but should work on our
647     // supported platforms
648     switch (dataType) {
649     case DATATYPE_INT32:
650     writeBinaryGridImpl<int>(in, filename, byteOrder);
651     break;
652     case DATATYPE_FLOAT32:
653     writeBinaryGridImpl<float>(in, filename, byteOrder);
654     break;
655     case DATATYPE_FLOAT64:
656     writeBinaryGridImpl<double>(in, filename, byteOrder);
657     break;
658     default:
659     throw SpeckleyException("writeBinaryGrid(): invalid or unsupported datatype");
660     }
661     }
662    
663     template<typename ValueType>
664     void Rectangle::writeBinaryGridImpl(const escript::Data& in,
665     const std::string& filename, int byteOrder) const
666     {
667     // check function space and determine number of points
668     dim_t myN0, myN1;
669     dim_t totalN0, totalN1;
670     if (in.getFunctionSpace().getTypeCode() == Nodes) {
671     myN0 = m_NE[0]+1;
672     myN1 = m_NE[1]+1;
673     totalN0 = m_gNE[0]+1;
674     totalN1 = m_gNE[1]+1;
675     // } else if (in.getFunctionSpace().getTypeCode() == Elements) {
676     // myN0 = m_NE[0];
677     // myN1 = m_NE[1];
678     // totalN0 = m_gNE[0];
679     // totalN1 = m_gNE[1];
680     } else
681     throw SpeckleyException("writeBinaryGrid(): invalid function space of data object");
682    
683     const int numComp = in.getDataPointSize();
684     const int dpp = in.getNumDataPointsPerSample();
685    
686     if (numComp > 1 || dpp > 1)
687     throw SpeckleyException("writeBinaryGrid(): only scalar, single-value data supported");
688    
689     const dim_t fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1;
690    
691     // from here on we know that each sample consists of one value
692     FileWriter fw;
693     fw.openFile(filename, fileSize);
694     MPIBarrier();
695    
696 sshaw 5177 for (index_t y=0; y<myN1; y++) {
697 sshaw 5149 const dim_t fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0)*sizeof(ValueType);
698     std::ostringstream oss;
699    
700 sshaw 5177 for (index_t x=0; x<myN0; x++) {
701 sshaw 5149 const double* sample = in.getSampleDataRO((y*m_NN[0]+x)*m_order);
702     ValueType fvalue = static_cast<ValueType>(*sample);
703     if (byteOrder == BYTEORDER_NATIVE) {
704     oss.write((char*)&fvalue, sizeof(fvalue));
705     } else {
706     char* value = reinterpret_cast<char*>(&fvalue);
707     if (sizeof(fvalue)>4) {
708     byte_swap64(value);
709     } else {
710     byte_swap32(value);
711     }
712     oss.write(value, sizeof(fvalue));
713     }
714     }
715     fw.writeAt(oss, fileofs);
716     }
717     fw.close();
718     }
719    
720     void Rectangle::write(const std::string& filename) const
721     {
722     throw SpeckleyException("write: not supported");
723     }
724    
725     void Rectangle::dump(const std::string& fileName) const
726     {
727     #if USE_SILO
728     std::string fn(fileName);
729     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
730     fn+=".silo";
731     }
732    
733     int driver=DB_HDF5;
734     DBfile* dbfile = NULL;
735     const char* blockDirFmt = "/block%04d";
736    
737     #ifdef ESYS_MPI
738     PMPIO_baton_t* baton = NULL;
739     const int NUM_SILO_FILES = 1;
740     #endif
741    
742     if (m_mpiInfo->size > 1) {
743     #ifdef ESYS_MPI
744     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
745     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
746     PMPIO_DefaultClose, (void*)&driver);
747     // try the fallback driver in case of error
748     if (!baton && driver != DB_PDB) {
749     driver = DB_PDB;
750     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
751     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
752     PMPIO_DefaultClose, (void*)&driver);
753     }
754     if (baton) {
755     char siloPath[64];
756     snprintf(siloPath, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
757     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath);
758     }
759     #endif
760     } else {
761     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
762     getDescription().c_str(), driver);
763     // try the fallback driver in case of error
764     if (!dbfile && driver != DB_PDB) {
765     driver = DB_PDB;
766     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
767     getDescription().c_str(), driver);
768     }
769     char siloPath[64];
770     snprintf(siloPath, 64, blockDirFmt, 0);
771     DBMkDir(dbfile, siloPath);
772     DBSetDir(dbfile, siloPath);
773     }
774    
775     if (!dbfile)
776     throw SpeckleyException("dump: Could not create Silo file");
777    
778     /*
779     if (driver==DB_HDF5) {
780     // gzip level 1 already provides good compression with minimal
781     // performance penalty. Some tests showed that gzip levels >3 performed
782     // rather badly on escript data both in terms of time and space
783     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
784     }
785     */
786    
787     const dim_t NN0 = m_NN[0];
788     const dim_t NN1 = m_NN[1];
789     boost::scoped_ptr<double> x(new double[NN0]);
790     boost::scoped_ptr<double> y(new double[NN1]);
791     double* coords[2] = { x.get(), y.get() };
792     #pragma omp parallel
793     {
794     #pragma omp for nowait
795     for (dim_t i0 = 0; i0 < NN0; i0++) {
796     coords[0][i0]=getLocalCoordinate(i0, 0);
797     }
798     #pragma omp for nowait
799     for (dim_t i1 = 0; i1 < NN1; i1++) {
800     coords[1][i1]=getLocalCoordinate(i1, 1);
801     }
802     }
803 caltinay 5194 std::vector<int> dims(m_NN, m_NN+2);
804 sshaw 5149
805     // write mesh
806 caltinay 5194 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
807 sshaw 5149 DB_COLLINEAR, NULL);
808    
809     // write node ids
810 caltinay 5194 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
811 sshaw 5149 NULL, 0, DB_INT, DB_NODECENT, NULL);
812    
813     // write element ids
814 caltinay 5194 dims.assign(m_NE, m_NE+2);
815 sshaw 5149 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
816 caltinay 5194 &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
817 sshaw 5149
818     // rank 0 writes multimesh and multivar
819     if (m_mpiInfo->rank == 0) {
820     std::vector<std::string> tempstrings;
821     std::vector<char*> names;
822     for (dim_t i=0; i<m_mpiInfo->size; i++) {
823     std::stringstream path;
824     path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/mesh";
825     tempstrings.push_back(path.str());
826     names.push_back((char*)tempstrings.back().c_str());
827     }
828     std::vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
829     DBSetDir(dbfile, "/");
830     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
831     &types[0], NULL);
832     tempstrings.clear();
833     names.clear();
834     for (dim_t i=0; i<m_mpiInfo->size; i++) {
835     std::stringstream path;
836     path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/nodeId";
837     tempstrings.push_back(path.str());
838     names.push_back((char*)tempstrings.back().c_str());
839     }
840     types.assign(m_mpiInfo->size, DB_QUADVAR);
841     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
842     &types[0], NULL);
843     tempstrings.clear();
844     names.clear();
845     for (dim_t i=0; i<m_mpiInfo->size; i++) {
846     std::stringstream path;
847     path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/elementId";
848     tempstrings.push_back(path.str());
849     names.push_back((char*)tempstrings.back().c_str());
850     }
851     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
852     &types[0], NULL);
853     }
854    
855     if (m_mpiInfo->size > 1) {
856     #ifdef ESYS_MPI
857     PMPIO_HandOffBaton(baton, dbfile);
858     PMPIO_Finish(baton);
859     #endif
860     } else {
861     DBClose(dbfile);
862     }
863    
864     #else // USE_SILO
865     throw SpeckleyException("dump: no Silo support");
866     #endif
867     }
868    
869 sshaw 5123 const dim_t* Rectangle::borrowSampleReferenceIDs(int fsType) const
870     {
871     switch (fsType) {
872     case DegreesOfFreedom:
873     case Nodes:
874     return &m_nodeId[0];
875     case Elements:
876 sshaw 5478 case ReducedElements:
877 sshaw 5123 return &m_elementId[0];
878     case Points:
879     return &m_diracPointNodeIDs[0];
880     default:
881     break;
882     }
883    
884     std::stringstream msg;
885     msg << "borrowSampleReferenceIDs: invalid function space type " << fsType;
886     throw SpeckleyException(msg.str());
887     }
888    
889     bool Rectangle::ownSample(int fsType, index_t id) const
890     {
891     throw SpeckleyException("ownSample not implemented");
892     }
893    
894     void Rectangle::setToNormal(escript::Data& out) const
895     {
896     throw SpeckleyException("setToNormal not implemented");
897     }
898    
899     void Rectangle::setToSize(escript::Data& out) const
900     {
901     if (out.getFunctionSpace().getTypeCode() == Elements) {
902     out.requireWrite();
903 sshaw 5574 const dim_t numQuad = m_order + 1;
904 sshaw 5123 const dim_t numElements = getNumElements();
905 sshaw 5574 const double *quad_locs = point_locations[m_order-2];
906     //since elements are uniform, calc the first and copy to others
907     double* first_element = out.getSampleDataRW(0);
908 sshaw 5123 #pragma omp parallel for
909 sshaw 5574 for (short qy = 0; qy < m_order; qy++) {
910     const double y = quad_locs[qy+1] - quad_locs[qy];
911     for (short qx = 0; qx < m_order; qx++) {
912     const double x = quad_locs[qx+1] - quad_locs[qx];
913     first_element[qx + qy*numQuad] = sqrt(x*x + y*y);
914     }
915     }
916     const short top_start = (numQuad - 1)*numQuad;
917     for (short edge = 0; edge < m_order; edge++) {
918     //right edge = left edge
919     first_element[(edge+1)*numQuad - 1] = first_element[edge*numQuad];
920     //top edge = bottom edge
921     first_element[top_start + edge] = first_element[edge];
922     }
923     //top-right corner
924     first_element[numQuad*numQuad - 1] = first_element[0];
925     const size_t size = numQuad*numQuad*sizeof(double);
926     #pragma omp parallel for
927 sshaw 5123 for (index_t k = 0; k < numElements; ++k) {
928     double* o = out.getSampleDataRW(k);
929 sshaw 5574 memcpy(o, first_element, size);
930 sshaw 5123 }
931     } else {
932     std::stringstream msg;
933     msg << "setToSize: invalid function space type "
934     << out.getFunctionSpace().getTypeCode();
935     throw SpeckleyException(msg.str());
936     }
937     }
938    
939     void Rectangle::Print_Mesh_Info(const bool full) const
940     {
941     SpeckleyDomain::Print_Mesh_Info(full);
942     if (full) {
943     std::cout << " Id Coordinates" << std::endl;
944     std::cout.precision(15);
945     std::cout.setf(std::ios::scientific, std::ios::floatfield);
946     for (index_t i=0; i < getNumNodes(); i++) {
947     std::cout << " " << std::setw(5) << m_nodeId[i]
948     << " " << getLocalCoordinate(i%m_NN[0], 0)
949     << " " << getLocalCoordinate(i/m_NN[0], 1) << std::endl;
950     }
951     }
952     }
953    
954    
955     //protected
956     void Rectangle::assembleCoordinates(escript::Data& arg) const
957     {
958     int numDim = m_numDim;
959 sshaw 5775 if (!arg.isDataPointShapeEqual(1, &numDim))
960 sshaw 5123 throw SpeckleyException("setToX: Invalid Data object shape");
961 sshaw 5775 if (!arg.numSamplesEqual(1, getNumNodes()))
962 sshaw 5123 throw SpeckleyException("setToX: Illegal number of samples in Data object");
963    
964     const dim_t NN0 = m_NN[0];
965     const dim_t NN1 = m_NN[1];
966     arg.requireWrite();
967     #pragma omp parallel for
968     for (dim_t y = 0; y < NN1; y++) {
969     for (dim_t x = 0; x < NN0; x++) {
970 sshaw 5128 double *point = arg.getSampleDataRW(y*NN0 + x);
971 sshaw 5123 point[0] = getLocalCoordinate(x, 0);
972     point[1] = getLocalCoordinate(y, 1);
973     }
974     }
975     }
976    
977     //protected
978     void Rectangle::assembleGradient(escript::Data& out, const escript::Data& in) const
979     {
980     escript::Data converted;
981    
982     if (in.getFunctionSpace().getTypeCode() != Elements) {
983     converted = escript::Data(in, escript::function(*this));
984     } else {
985     converted = in;
986     }
987    
988     if (m_order == 2) {
989     gradient_order2(out,converted);
990     } else if (m_order == 3) {
991     gradient_order3(out,converted);
992     } else if (m_order == 4) {
993     gradient_order4(out,converted);
994     } else if (m_order == 5) {
995     gradient_order5(out,converted);
996     } else if (m_order == 6) {
997     gradient_order6(out,converted);
998     } else if (m_order == 7) {
999     gradient_order7(out,converted);
1000     } else if (m_order == 8) {
1001     gradient_order8(out,converted);
1002     } else if (m_order == 9) {
1003     gradient_order9(out,converted);
1004     } else if (m_order == 10) {
1005     gradient_order10(out,converted);
1006     }
1007     }
1008    
1009     //protected
1010     void Rectangle::assembleIntegrate(std::vector<double>& integrals,
1011     const escript::Data& arg) const
1012     {
1013     const int fs = arg.getFunctionSpace().getTypeCode();
1014     if (fs != Elements)
1015     throw new SpeckleyException("Speckley doesn't currently support integrals of non-Element functionspaces");
1016     if (!arg.actsExpanded())
1017     throw new SpeckleyException("Speckley doesn't currently support unexpanded data");
1018 sshaw 5191
1019 sshaw 5123 if (m_order == 2) {
1020     integral_order2(integrals, arg);
1021     } else if (m_order == 3) {
1022     integral_order3(integrals, arg);
1023     } else if (m_order == 4) {
1024     integral_order4(integrals, arg);
1025     } else if (m_order == 5) {
1026     integral_order5(integrals, arg);
1027     } else if (m_order == 6) {
1028     integral_order6(integrals, arg);
1029     } else if (m_order == 7) {
1030     integral_order7(integrals, arg);
1031     } else if (m_order == 8) {
1032     integral_order8(integrals, arg);
1033     } else if (m_order == 9) {
1034     integral_order9(integrals, arg);
1035     } else if (m_order == 10) {
1036     integral_order10(integrals, arg);
1037     }
1038     }
1039    
1040     /* This is a wrapper for filtered (and non-filtered) randoms
1041     * For detailed doco see randomFillWorker
1042 sshaw 5191 */
1043 sshaw 5123 escript::Data Rectangle::randomFill(const escript::DataTypes::ShapeType& shape,
1044     const escript::FunctionSpace& fs,
1045     long seed, const boost::python::tuple& filter) const {
1046     int numvals=escript::DataTypes::noValues(shape);
1047     int per_element = (m_order+1)*(m_order+1)*numvals;
1048     if (len(filter)>0) {
1049     throw SpeckleyException("Speckley does not support filters.");
1050     }
1051    
1052     double* src=new double[m_NE[0]*m_NE[1]*per_element*numvals];
1053 caltinay 5958 escript::randomFillArray(seed, src, m_NE[0]*m_NE[1]*per_element);
1054 sshaw 5123 escript::Data res(0, shape, escript::function(*this), true);
1055     int current = 0;
1056     for (int ei = 0; ei < m_NE[1]; ++ei) {
1057     for (int ej = 0; ej < m_NE[0]; ++ej) {
1058     double *e = res.getSampleDataRW(INDEX2(ej,ei,m_NE[0]));
1059     memcpy(e, &src[current], sizeof(double)*per_element);
1060     current += per_element;
1061     }
1062     }
1063     if (res.getFunctionSpace() != fs) {
1064     return escript::Data(res, fs);
1065     }
1066     return res;
1067     }
1068    
1069     //private
1070     void Rectangle::populateSampleIds()
1071     {
1072     // degrees of freedom are numbered from left to right, bottom to top in
1073     // each rank, continuing on the next rank (ranks also go left-right,
1074     // bottom-top).
1075     // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which
1076     // helps when writing out data rank after rank.
1077    
1078     // build node distribution vector first.
1079     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is
1080     // constant for all ranks in this implementation
1081     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1082     const index_t left = (m_offset[0]==0 ? 0 : 1);
1083     const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1084     for (dim_t k=1; k<m_mpiInfo->size; k++) {
1085     index_t rank_left = (k-1)%m_NX[0] == 0 ? 0 : 1;
1086     index_t rank_bottom = (k-1)/m_NX[0] == 0 ? 0 : 1;
1087 sshaw 5191 m_nodeDistribution[k] = m_nodeDistribution[k-1]
1088 sshaw 5123 + (m_NN[0]-rank_left)*(m_NN[1]-rank_bottom);
1089     }
1090     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1091     try {
1092     m_nodeId.resize(getNumNodes());
1093     m_elementId.resize(getNumElements());
1094     } catch (const std::length_error& le) {
1095     throw SpeckleyException("The system does not have sufficient memory for a domain of this size.");
1096     }
1097    
1098     // populate face element counts
1099     //left
1100     if (m_offset[0]==0)
1101     m_faceCount[0]=m_NE[1];
1102     else
1103     m_faceCount[0]=0;
1104     //right
1105     if (m_mpiInfo->rank%m_NX[0]==m_NX[0]-1)
1106     m_faceCount[1]=m_NE[1];
1107     else
1108     m_faceCount[1]=0;
1109     //bottom
1110     if (m_offset[1]==0)
1111     m_faceCount[2]=m_NE[0];
1112     else
1113     m_faceCount[2]=0;
1114     //top
1115     if (m_mpiInfo->rank/m_NX[0]==m_NX[1]-1)
1116     m_faceCount[3]=m_NE[0];
1117     else
1118     m_faceCount[3]=0;
1119    
1120    
1121     if (bottom && left) {
1122     //get lower-left node
1123     m_nodeId[0] = m_nodeDistribution[m_mpiInfo->rank - m_NX[0]] - 1;
1124     }
1125     if (bottom) {
1126 sshaw 5191 //DOF size of the neighbouring rank
1127     index_t rankDOF = m_nodeDistribution[m_mpiInfo->rank - m_NX[0] + 1]
1128 sshaw 5123 - m_nodeDistribution[m_mpiInfo->rank - m_NX[0]];
1129     //beginning of last row of neighbouring rank
1130     index_t begin = m_nodeDistribution[m_mpiInfo->rank - m_NX[0]]
1131     + rankDOF - m_NN[0];
1132     for (index_t x = left; x < m_NN[0]; x++) {
1133     m_nodeId[x] = begin + x;
1134     }
1135     }
1136     if (left) {
1137     //is the rank to the left itself right of another rank
1138     index_t rank_left = (m_mpiInfo->rank - 1) % m_NX[0] == 0 ? 0 : 1;
1139     //end of first owned row of neighbouring rank
1140 sshaw 5191 index_t end = m_nodeDistribution[m_mpiInfo->rank - 1]
1141 sshaw 5123 + m_NN[0] - rank_left - 1;
1142     for (index_t y = bottom; y < m_NN[1]; y++) {
1143     m_nodeId[y*m_NN[0]] = end + (y-bottom)*(m_NN[0]-rank_left);
1144     }
1145     }
1146    
1147     #pragma omp parallel for
1148     for (index_t y = bottom; y < m_NN[1]; y++) {
1149     for (index_t x = left; x < m_NN[0]; x++) {
1150     m_nodeId[y*m_NN[0]+x] = m_nodeDistribution[m_mpiInfo->rank] + (y-bottom)*(m_NN[0]-left) + (x-left);
1151     }
1152     }
1153    
1154     m_nodeTags.assign(getNumNodes(), 0);
1155     updateTagsInUse(Nodes);
1156    
1157     m_elementTags.assign(getNumElements(), 0);
1158     updateTagsInUse(Elements);
1159     }
1160    
1161     //private
1162     void Rectangle::addToMatrixAndRHS(escript::AbstractSystemMatrix* S, escript::Data& F,
1163     const std::vector<double>& EM_S, const std::vector<double>& EM_F, bool addS,
1164 caltinay 5194 bool addF, index_t firstNode, int nEq, int nComp) const
1165 sshaw 5123 {
1166     throw SpeckleyException("Rectangle::addToMatrixAndRHS, adding to matrix not supported");
1167     }
1168    
1169 sshaw 5478 void Rectangle::reduceElements(escript::Data& out, const escript::Data& in) const
1170     {
1171     if (m_order == 2) {
1172     reduction_order2(in, out);
1173     } else if (m_order == 3) {
1174     reduction_order3(in, out);
1175     } else if (m_order == 4) {
1176     reduction_order4(in, out);
1177     } else if (m_order == 5) {
1178     reduction_order5(in, out);
1179     } else if (m_order == 6) {
1180     reduction_order6(in, out);
1181     } else if (m_order == 7) {
1182     reduction_order7(in, out);
1183     } else if (m_order == 8) {
1184     reduction_order8(in, out);
1185     } else if (m_order == 9) {
1186     reduction_order9(in, out);
1187     } else if (m_order == 10) {
1188     reduction_order10(in, out);
1189     }
1190     }
1191    
1192 sshaw 5123 //protected
1193     void Rectangle::interpolateNodesOnElements(escript::Data& out,
1194 sshaw 5478 const escript::Data& in,
1195     bool reduced) const
1196 sshaw 5123 {
1197     const dim_t numComp = in.getDataPointSize();
1198     const dim_t NE0 = m_NE[0];
1199     const dim_t NE1 = m_NE[1];
1200     const int quads = m_order + 1;
1201     const int max_x = m_NN[0];
1202     out.requireWrite();
1203 sshaw 5478 if (reduced) { //going to ReducedElements
1204     escript::Data funcIn(in, escript::function(*this));
1205     reduceElements(out, funcIn);
1206     return;
1207     }
1208 sshaw 5123 #pragma omp parallel for
1209     for (dim_t ey = 0; ey < NE1; ey++) {
1210     for (dim_t ex = 0; ex < NE0; ex++) {
1211     double *e_out = out.getSampleDataRW(ex + ey*NE0);
1212 sshaw 5177 dim_t start = ex*m_order + ey*max_x*m_order;
1213 sshaw 5123 int quad = 0;
1214     for (int qy = 0; qy < quads; qy++) {
1215     for (int qx = 0; qx < quads; qx++, quad++) {
1216     const double *n_in = in.getSampleDataRO(start + max_x*qy + qx);
1217     memcpy(e_out+quad*numComp, n_in, sizeof(double) * numComp);
1218     }
1219     }
1220     }
1221     }
1222     }
1223    
1224 sshaw 5133 #ifdef ESYS_MPI
1225 sshaw 5123 //protected
1226 sshaw 5132 void Rectangle::balanceNeighbours(escript::Data& data, bool average) const {
1227     if (m_NX[0] * m_NX[1] == 1) {
1228     return;
1229     }
1230     const int numComp = data.getDataPointSize();
1231     const int rx = m_mpiInfo->rank % m_NX[0];
1232     const int ry = m_mpiInfo->rank / m_NX[0];
1233     //include bordering ranks in summation
1234     if (m_NX[1] != 1)
1235     shareVertical(data, rx, ry);
1236     if (m_NX[0] != 1)
1237     shareSides(data, rx, ry);
1238     if (m_NX[0] != 1 && m_NX[1] != 1) {
1239     shareCorners(data, rx, ry);
1240     if (!average)
1241     return;
1242     //averaging out corners
1243     // bottom left
1244     if (rx && ry) {
1245     double *values = data.getSampleDataRW(0);
1246     for (int comp = 0; comp < numComp; comp++) {
1247     values[comp] /= 2;
1248     }
1249     }
1250     // bottom right
1251     if (rx < (m_NX[0] - 1) && ry) {
1252     double *values = data.getSampleDataRW(m_NN[0]-1);
1253     for (int comp = 0; comp < numComp; comp++) {
1254     values[comp] /= 2;
1255     }
1256     }
1257     // top left
1258     if (rx && ry < (m_NX[0] - 1)) {
1259     double *values = data.getSampleDataRW((m_NN[1]-1)*m_NN[0]);
1260     for (int comp = 0; comp < numComp; comp++) {
1261     values[comp] /= 2;
1262     }
1263     }
1264     // top right
1265     if (rx < (m_NX[0] - 1) && ry < (m_NX[0] - 1)) {
1266     double *values = data.getSampleDataRW(m_NN[1]*m_NN[0] - 1);
1267     for (int comp = 0; comp < numComp; comp++) {
1268     values[comp] /= 2;
1269     }
1270     }
1271     }
1272     if (!average)
1273     return;
1274     // average shared-edges in x and y
1275     //left
1276     if (rx) {
1277     #pragma omp parallel for
1278     for (dim_t qy = 0; qy < m_NN[1]; qy++) {
1279     double *values = data.getSampleDataRW(qy*m_NN[0]);
1280     for (int comp = 0; comp < numComp; comp++) {
1281     values[comp] /= 2;
1282     }
1283     }
1284     }
1285     //right
1286     if (rx < m_NX[0] - 1) {
1287     #pragma omp parallel for
1288     for (dim_t qy = 0; qy < m_NN[1]; qy++) {
1289     double *values = data.getSampleDataRW(qy*m_NN[0] + m_NN[0] - 1);
1290     for (int comp = 0; comp < numComp; comp++) {
1291     values[comp] /= 2;
1292     }
1293     }
1294     }
1295     //bottom
1296     if (ry) {
1297     #pragma omp parallel for
1298     for (dim_t qx = 0; qx < m_NN[0]; qx++) {
1299     double *values = data.getSampleDataRW(qx);
1300     for (int comp = 0; comp < numComp; comp++) {
1301     values[comp] /= 2;
1302     }
1303     }
1304     }
1305     //top
1306     if (ry < m_NX[1] - 1) {
1307     const dim_t start = (m_NN[1]-1)*m_NN[0];
1308     #pragma omp parallel for
1309     for (dim_t qx = 0; qx < m_NN[0]; qx++) {
1310     double *values = data.getSampleDataRW(start + qx);
1311     for (int comp = 0; comp < numComp; comp++) {
1312     values[comp] /= 2;
1313     }
1314     }
1315     }
1316     }
1317 sshaw 5133 #endif //#ifdef ESYS_MPI
1318 sshaw 5132
1319     //protected
1320 sshaw 5123 void Rectangle::interpolateElementsOnNodes(escript::Data& out,
1321 sshaw 5158 const escript::Data& in) const {
1322 sshaw 5123 const dim_t numComp = in.getDataPointSize();
1323     const dim_t NE0 = m_NE[0];
1324     const dim_t NE1 = m_NE[1];
1325     const int quads = m_order + 1;
1326     const dim_t max_x = (m_order*NE0) + 1;
1327     const dim_t max_y = (m_order*NE1) + 1;
1328     out.requireWrite();
1329 sshaw 5478 const int inFS = in.getFunctionSpace().getTypeCode();
1330 sshaw 5123 // the summation portion
1331 sshaw 5478 if (inFS == ReducedElements) {
1332     for (dim_t colouring = 0; colouring < 2; colouring++) {
1333 sshaw 5123 #pragma omp parallel for
1334 sshaw 5478 for (dim_t ey = colouring; ey < NE1; ey += 2) {
1335     for (dim_t ex = 0; ex < NE0; ex++) {
1336     dim_t start = ex*m_order + ey*max_x*m_order;
1337     const double *e_in = in.getSampleDataRO(ex + ey*NE0);
1338     for (int qy = 0; qy < quads; qy++) {
1339     for (int qx = 0; qx < quads; qx++) {
1340     double *n_out = out.getSampleDataRW(start + max_x*qy + qx);
1341     for (int comp = 0; comp < numComp; comp++) {
1342     n_out[comp] += e_in[comp];
1343     }
1344 sshaw 5123 }
1345     }
1346     }
1347     }
1348     }
1349 sshaw 5478 } else { //inFS == Elements
1350     for (dim_t colouring = 0; colouring < 2; colouring++) {
1351     #pragma omp parallel for
1352     for (dim_t ey = colouring; ey < NE1; ey += 2) {
1353     for (dim_t ex = 0; ex < NE0; ex++) {
1354     dim_t start = ex*m_order + ey*max_x*m_order;
1355     const double *e_in = in.getSampleDataRO(ex + ey*NE0);
1356     for (int qy = 0; qy < quads; qy++) {
1357     for (int qx = 0; qx < quads; qx++) {
1358     double *n_out = out.getSampleDataRW(start + max_x*qy + qx);
1359     for (int comp = 0; comp < numComp; comp++) {
1360     n_out[comp] += e_in[INDEX3(comp, qx, qy, numComp, quads)];
1361     }
1362     }
1363     }
1364     }
1365     }
1366     }
1367 sshaw 5123 }
1368 sshaw 5133 #ifdef ESYS_MPI
1369 sshaw 5132 //share and average shared edges/corners
1370     balanceNeighbours(out, true);
1371 sshaw 5133 #endif
1372 sshaw 5132 // for every internal edge in x
1373 sshaw 5123 #pragma omp parallel for
1374     for (dim_t qy = 0; qy < max_y; qy++) {
1375     for (dim_t qx = m_order; qx < max_x - m_order; qx += m_order) {
1376     double *n_out = out.getSampleDataRW(qx + qy*max_x);
1377     for (int comp = 0; comp < numComp; comp++) {
1378     n_out[comp] /= 2;
1379     }
1380     }
1381     }
1382 sshaw 5132
1383     // for every internal edge in y
1384 caltinay 5373 const dim_t order = m_order;
1385 sshaw 5123 #pragma omp parallel for
1386 caltinay 5373 for (dim_t qy = order; qy < max_y - order; qy += order) {
1387 sshaw 5123 for (dim_t qx = 0; qx < max_x; qx ++) {
1388     double *n_out = out.getSampleDataRW(qx + qy*max_x);
1389     for (int comp = 0; comp < numComp; comp++) {
1390     n_out[comp] /= 2;
1391     }
1392     }
1393     }
1394     }
1395    
1396 sshaw 5133 #ifdef ESYS_MPI
1397 sshaw 5132 //private
1398     void Rectangle::shareCorners(escript::Data& out, int rx, int ry) const
1399     {
1400     //setup
1401     const int tag = 0;
1402     MPI_Status status;
1403 sshaw 5584 MPI_Request request[4];
1404 sshaw 5132 const int numComp = out.getDataPointSize();
1405     const int count = 4 * numComp;
1406     std::vector<double> outbuf(count, 0);
1407     std::vector<double> inbuf(count, 0);
1408     const int rank = m_mpiInfo->rank;
1409     //precalc bounds so we can loop nicely, can probably be cleaned up
1410 sshaw 5584 const bool conds[4] = {rx && ry,
1411     rx < (m_NX[0] - 1) && ry,
1412     rx && ry < (m_NX[1] - 1),
1413     rx < (m_NX[0] - 1) && ry < (m_NX[1] - 1)};
1414     const int ranks[4] = {rank-m_NX[0]-1,
1415     rank-m_NX[0]+1,
1416     rank+m_NX[0]-1,
1417     rank+m_NX[0]+1};
1418 sshaw 5132 //fill everything, regardless of whether we're sharing that corner or not
1419     for (int y = 0; y < 2; y++) {
1420     for (int x = 0; x < 2; x++) {
1421     const double *data = out.getSampleDataRO(x*(m_NN[0]-1) + y*(m_NN[1]-1)*m_NN[0]);
1422     std::copy(data, data + numComp, &outbuf[(x + 2*y)*numComp]);
1423     }
1424     }
1425 sshaw 5191
1426 sshaw 5132 //share
1427     for (int i = 0; i < 4; i++) {
1428     if (conds[i]) {
1429 sshaw 5584 MPI_Isend(&outbuf[i], numComp, MPI_DOUBLE, ranks[i], tag,
1430     m_mpiInfo->comm, &request[i]);
1431 sshaw 5132 }
1432     }
1433 sshaw 5123
1434 sshaw 5132 //unpack
1435     for (int y = 0; y < 2; y++) {
1436     for (int x = 0; x < 2; x++) {
1437 sshaw 5584 int i = 2*y+x;
1438     if (conds[i]) {
1439     MPI_Recv(&inbuf[i], numComp, MPI_DOUBLE, ranks[i], tag,
1440     m_mpiInfo->comm, &status);
1441     double *data = out.getSampleDataRW(x*(m_NN[0]-1) + y*(m_NN[1]-1)*m_NN[0]);
1442     for (int comp = 0; comp < numComp; comp++) {
1443     data[comp] += inbuf[i*numComp + comp];
1444     }
1445 sshaw 5132 }
1446     }
1447     }
1448 sshaw 5584 for (int i = 0; i < 4; i++) {
1449     if (conds[i]) {
1450     MPI_Wait(request + i, &status);
1451     }
1452     }
1453 sshaw 5132 }
1454    
1455     //private
1456     void Rectangle::shareVertical(escript::Data& out, int rx, int ry) const
1457     {
1458     const int tag = 0;
1459     MPI_Status status;
1460     const int numComp = out.getDataPointSize();
1461     const dim_t count = m_NN[0]*numComp;
1462     const int up_neighbour = m_mpiInfo->rank + m_NX[0];
1463     const int down_neighbour = m_mpiInfo->rank - m_NX[0];
1464     //allocate some space to recieve
1465     std::vector<double> recv(count);
1466     //get our sources
1467     double *top = out.getSampleDataRW((m_NN[1]-1) * m_NN[0]);
1468     double *bottom = out.getSampleDataRW(0);
1469 sshaw 5191
1470 sshaw 5584 MPI_Request request[2];
1471    
1472     if (ry) {
1473     MPI_Isend(bottom, count, MPI_DOUBLE, down_neighbour, tag,
1474     m_mpiInfo->comm, request);
1475     }
1476    
1477     if (ry < m_NX[1] - 1) {
1478     MPI_Isend(top, count, MPI_DOUBLE, up_neighbour, tag,
1479     m_mpiInfo->comm, request+1);
1480     }
1481    
1482     //read down
1483     if (ry) {
1484 sshaw 5132 MPI_Recv(&recv[0], count, MPI_DOUBLE, down_neighbour, tag,
1485 sshaw 5584 m_mpiInfo->comm, &status);
1486    
1487 sshaw 5132 //unpack bottom
1488     for (dim_t i = 0; i < count; i++) {
1489     bottom[i] += recv[i];
1490     }
1491 sshaw 5584 }
1492 sshaw 5132
1493 sshaw 5584 //read up, send up
1494     if (ry < m_NX[1] - 1) {
1495     MPI_Recv(&recv[0], count, MPI_DOUBLE, up_neighbour, tag,
1496     m_mpiInfo->comm, &status);
1497    
1498     //unpack up
1499     for (dim_t i = 0; i < count; i++) {
1500     top[i] += recv[i];
1501 sshaw 5132 }
1502     }
1503 sshaw 5584
1504     if (ry) {
1505     MPI_Wait(request, &status);
1506     }
1507     if (ry < m_NX[1] - 1) {
1508     MPI_Wait(request+1, &status);
1509     }
1510 sshaw 5132 }
1511    
1512     //private
1513     void Rectangle::shareSides(escript::Data& out, int rx, int ry) const
1514     {
1515     const int tag = 0;
1516     MPI_Status status;
1517     const int numComp = out.getDataPointSize();
1518 sshaw 5177 const dim_t count = m_NN[1]*numComp;
1519 sshaw 5132 const int left_neighbour = m_mpiInfo->rank - 1;
1520     const int right_neighbour = m_mpiInfo->rank + 1;
1521     //allocate some space
1522     std::vector<double> left(count,170);
1523     std::vector<double> right(count,17000);
1524     std::vector<double> recv(count,1700);
1525 sshaw 5584
1526     MPI_Request request[2];
1527    
1528     if (rx) {
1529     for (dim_t n = 0; n < m_NN[1]; n++) {
1530     index_t index = n*m_NN[0];
1531     const double *leftData = out.getSampleDataRO(index);
1532     std::copy(leftData, leftData + numComp, &left[n*numComp]);
1533     }
1534     MPI_Isend(&left[0], count, MPI_DOUBLE, left_neighbour, tag,
1535     m_mpiInfo->comm, request);
1536 sshaw 5132 }
1537 sshaw 5191
1538 sshaw 5584 if (rx < m_NX[0] - 1) {
1539     for (dim_t n = 0; n < m_NN[1]; n++) {
1540     index_t index = n*m_NN[0];
1541     const double *rightData = out.getSampleDataRO(index+m_NN[0]-1);
1542     std::copy(rightData, rightData + numComp, &right[n*numComp]);
1543 sshaw 5132 }
1544 sshaw 5584 MPI_Isend(&right[0], count, MPI_DOUBLE, right_neighbour, tag,
1545     m_mpiInfo->comm, request+1);
1546     }
1547    
1548     //read left
1549     if (rx) {
1550 sshaw 5132 MPI_Recv(&recv[0], count, MPI_DOUBLE, left_neighbour, tag,
1551 sshaw 5584 m_mpiInfo->comm, &status);
1552 sshaw 5132 //unpack to left
1553     for (dim_t i = 0; i < m_NN[1]; i++) {
1554     double *data = out.getSampleDataRW(i*m_NN[0]);
1555     for (int comp = 0; comp < numComp; comp++) {
1556     data[comp] += recv[i*numComp+comp];
1557     }
1558     }
1559 sshaw 5584 }
1560 sshaw 5132
1561 sshaw 5584 //read right
1562     if (rx < m_NX[0] - 1) {
1563     MPI_Recv(&recv[0], count, MPI_DOUBLE, right_neighbour, tag,
1564     m_mpiInfo->comm, &status);
1565     //unpack to right
1566     for (dim_t i = 0; i < m_NN[1]; i++) {
1567     double *data = out.getSampleDataRW((i + 1) * m_NN[0] - 1);
1568     for (int comp = 0; comp < numComp; comp++) {
1569     data[comp] += recv[i*numComp+comp];
1570 sshaw 5132 }
1571     }
1572     }
1573 sshaw 5584 if (rx) {
1574     MPI_Wait(request, &status);
1575     }
1576     if (rx < m_NX[0] - 1) {
1577     MPI_Wait(request+1, &status);
1578     }
1579 sshaw 5132 }
1580 sshaw 5133 #endif //#ifdef ESYS_MPI
1581 sshaw 5132
1582 sshaw 5123 dim_t Rectangle::findNode(const double *coords) const
1583     {
1584     const dim_t NOT_MINE = -1;
1585     //is the found element even owned by this rank
1586     for (int dim = 0; dim < m_numDim; dim++) {
1587     double min = m_origin[dim] + m_offset[dim]* m_dx[dim]
1588     - m_dx[dim]/2.; //allows for point outside mapping onto node
1589     double max = m_origin[dim] + (m_offset[dim] + m_NE[dim])*m_dx[dim]
1590     + m_dx[dim]/2.;
1591     if (min > coords[dim] || max < coords[dim]) {
1592     return NOT_MINE;
1593     }
1594     }
1595 sshaw 5189 // get distance from origin
1596     double x = coords[0] - m_origin[0];
1597     double y = coords[1] - m_origin[1];
1598 sshaw 5123
1599     //check if the point is even inside the domain
1600     if (x < 0 || y < 0 || x > m_length[0] || y > m_length[1])
1601     return NOT_MINE;
1602    
1603 sshaw 5189 // trim to rank reference point
1604     x -= m_offset[0] * m_dx[0];
1605     y -= m_offset[1] * m_dx[1];
1606    
1607 sshaw 5123 // distance in elements
1608 sshaw 5173 dim_t ex = (dim_t) floor((x + 0.01*m_dx[0]) / m_dx[0]);
1609     dim_t ey = (dim_t) floor((y + 0.01*m_dx[1]) / m_dx[1]);
1610 sshaw 5123 // set the min distance high enough to be outside the element plus a bit
1611     dim_t closest = NOT_MINE;
1612     double minDist = 1;
1613     for (int dim = 0; dim < m_numDim; dim++) {
1614     minDist += m_dx[dim]*m_dx[dim];
1615     }
1616     //find the closest node
1617     for (int dx = 0; dx < 2; dx++) {
1618     double xdist = x - (ex + dx)*m_dx[0];
1619     for (int dy = 0; dy < 2; dy++) {
1620     double ydist = y - (ey + dy)*m_dx[1];
1621     double total = xdist*xdist + ydist*ydist;
1622     if (total < minDist) {
1623 sshaw 5189 closest = (ex+dx)*m_order + (ey+dy)*m_order*m_NN[0];
1624 sshaw 5123 minDist = total;
1625     }
1626     }
1627     }
1628     //if this happens, we've let a dirac point slip through, which is awful
1629     if (closest == NOT_MINE) {
1630     throw SpeckleyException("Unable to map appropriate dirac point to a node,"
1631     " implementation problem in Rectangle::findNode()");
1632     }
1633     return closest;
1634     }
1635    
1636     Assembler_ptr Rectangle::createAssembler(std::string type,
1637 sshaw 5574 const DataMap& options) const
1638     {
1639 sshaw 5123 if (type.compare("DefaultAssembler") == 0) {
1640 caltinay 5194 return Assembler_ptr(new DefaultAssembler2D(shared_from_this(), m_dx, m_NE, m_NN));
1641 sshaw 5574 } else if (type.compare("WaveAssembler") == 0) {
1642     return Assembler_ptr(new WaveAssembler2D(shared_from_this(), m_dx, m_NE, m_NN, options));
1643 sshaw 5123 }
1644     throw SpeckleyException("Speckley::Rectangle does not support the"
1645     " requested assembler");
1646     }
1647    
1648 sshaw 5191 bool Rectangle::probeInterpolationAcross(int fsType_source,
1649     const escript::AbstractDomain& domain, int fsType_target) const
1650     {
1651 sshaw 5226 #ifdef USE_RIPLEY
1652     return speckley::probeInterpolationAcross(fsType_source, domain,
1653     fsType_target, 2);
1654     #else
1655     return false;
1656     #endif
1657 sshaw 5191 }
1658    
1659     void Rectangle::interpolateAcross(escript::Data& target, const escript::Data& source) const
1660     {
1661 sshaw 5226 #ifdef USE_RIPLEY
1662     if (coupler == NULL) {
1663 sshaw 5246 coupler = new RipleyCoupler(this, m_dx, m_mpiInfo->rank);
1664 sshaw 5191 }
1665 sshaw 5226 coupler->interpolate(target, source);
1666     #else
1667     throw SpeckleyException("Speckley::Rectangle interpolation to unsupported domain");
1668     #endif
1669 sshaw 5191 }
1670    
1671 sshaw 5123 } // end of namespace speckley
1672    

  ViewVC Help
Powered by ViewVC 1.1.26