/[escript]/trunk/speckley/src/Rectangle.cpp
ViewVC logotype

Contents of /trunk/speckley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6141 - (show annotations)
Wed Apr 6 03:51:30 2016 UTC (2 years, 8 months ago) by caltinay
File size: 59160 byte(s)
more namespacing of defines.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <speckley/Rectangle.h>
18 #include <speckley/DefaultAssembler2D.h>
19 #include <speckley/WaveAssembler2D.h>
20 #ifdef USE_RIPLEY
21 #include <speckley/CrossDomainCoupler.h>
22 #endif
23
24 #include <escript/index.h>
25 #include <escript/FileWriter.h>
26 #include <escript/FunctionSpaceFactory.h>
27 #include <escript/Random.h>
28
29 #include <boost/scoped_array.hpp>
30 #include <boost/math/special_functions/fpclassify.hpp> // for isnan
31
32 #ifdef USE_NETCDF
33 #include <netcdfcpp.h>
34 #endif
35
36 #if USE_SILO
37 #include <silo.h>
38 #ifdef ESYS_MPI
39 #include <pmpio.h>
40 #endif
41 #endif
42
43 #include <algorithm>
44 #include <iomanip>
45 #include <limits>
46
47 namespace bm=boost::math;
48 using escript::FileWriter;
49
50 namespace speckley {
51
52 Rectangle::Rectangle(int order, dim_t n0, dim_t n1, double x0, double y0, double x1,
53 double y1, int d0, int d1,
54 const std::vector<double>& points,
55 const std::vector<int>& tags,
56 const TagMap& tagnamestonums,
57 escript::SubWorld_ptr w) :
58 SpeckleyDomain(2, order, w)
59 {
60 if (static_cast<long>(n0 + 1) * static_cast<long>(n1 + 1)
61 > std::numeric_limits<dim_t>::max())
62 throw SpeckleyException("The number of elements has overflowed, this "
63 "limit may be raised in future releases.");
64
65 if (n0 <= 0 || n1 <= 0)
66 throw SpeckleyException("Number of elements in each spatial dimension "
67 "must be positive");
68
69 // ignore subdivision parameters for serial run
70 if (m_mpiInfo->size == 1) {
71 d0=1;
72 d1=1;
73 }
74
75 bool warn=false;
76 std::vector<int> factors;
77 int ranks = m_mpiInfo->size;
78 dim_t epr[2] = {n0,n1};
79 int d[2] = {d0,d1};
80 if (d0<=0 || d1<=0) {
81 for (int i = 0; i < 2; i++) {
82 if (d[i] < 1) {
83 d[i] = 1;
84 continue;
85 }
86 epr[i] = -1; // can no longer be max
87 //remove
88 if (ranks % d[i] != 0) {
89 throw SpeckleyException("Invalid number of spatial subdivisions");
90 }
91 ranks /= d[i];
92 }
93 factorise(factors, ranks);
94 if (factors.size() != 0) {
95 warn = true;
96 }
97 }
98
99 while (factors.size() > 0) {
100 int i = epr[0] > epr[1] ? 0 : 1;
101 int f = factors.back();
102 factors.pop_back();
103 d[i] *= f;
104 epr[i] /= f;
105 }
106 d0 = d[0]; d1 = d[1];
107
108 // ensure number of subdivisions is valid and nodes can be distributed
109 // among number of ranks
110 if (d0*d1 != m_mpiInfo->size)
111 throw SpeckleyException("Invalid number of spatial subdivisions");
112
113 if (warn) {
114 std::cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
115 << d1 << "). This may not be optimal!" << std::endl;
116 }
117
118 double l0 = x1-x0;
119 double l1 = y1-y0;
120 m_dx[0] = l0/n0;
121 m_dx[1] = l1/n1;
122
123 if (n0 % d0 > 0) {
124 n0 += d0 - (n0 % d0);
125 l0 = m_dx[0]*n0;
126 std::cout << "Warning: Adjusted number of elements and length. N0="
127 << n0 << ", l0=" << l0 << std::endl;
128 }
129 if (n1 % d1 > 0) {
130 n1 += d1 - (n1 % d1);
131 l1 = m_dx[1]*n1;
132 std::cout << "Warning: Adjusted number of elements and length. N1="
133 << n1 << ", l1=" << l1 << std::endl;
134 }
135
136 if (n0/d0 < 2 || n1/d1 < 2)
137 throw SpeckleyException("Too few elements for the number of ranks");
138
139 m_gNE[0] = n0;
140 m_gNE[1] = n1;
141 m_origin[0] = x0;
142 m_origin[1] = y0;
143 m_length[0] = l0;
144 m_length[1] = l1;
145 m_NX[0] = d0;
146 m_NX[1] = d1;
147
148 // local number of elements
149 m_NE[0] = m_gNE[0] / d0;
150 m_NE[1] = m_gNE[1] / d1;
151
152 // local number of nodes
153 m_NN[0] = m_NE[0]*m_order+1;
154 m_NN[1] = m_NE[1]*m_order+1;
155
156 // bottom-left node is at (offset0,offset1) in global mesh
157 m_offset[0] = n0/d0*(m_mpiInfo->rank%d0);
158 m_offset[1] = n1/d1*(m_mpiInfo->rank/d0);
159
160 populateSampleIds();
161
162 for (TagMap::const_iterator i = tagnamestonums.begin();
163 i != tagnamestonums.end(); i++) {
164 setTagMap(i->first, i->second);
165 }
166 addPoints(points, tags);
167
168
169 #ifdef USE_RIPLEY
170 coupler = NULL;
171 #endif
172 }
173
174 Rectangle::~Rectangle()
175 {
176 #ifdef USE_RIPLEY
177 if (coupler != NULL)
178 delete coupler;
179 #endif
180 }
181
182 std::string Rectangle::getDescription() const
183 {
184 return "speckley::Rectangle";
185 }
186
187 bool Rectangle::operator==(const escript::AbstractDomain& other) const
188 {
189 const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
190 if (o) {
191 return (SpeckleyDomain::operator==(other) &&
192 m_order == o->m_order &&
193 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1]
194 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1]
195 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1]
196 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1]);
197 }
198
199 return false;
200 }
201
202 void Rectangle::readNcGrid(escript::Data& out, std::string filename,
203 std::string varname, const ReaderParameters& params) const
204 {
205 #ifdef USE_NETCDF
206 // check destination function space
207 dim_t myN0, myN1;
208 if (out.getFunctionSpace().getTypeCode() == Nodes) {
209 myN0 = m_NE[0] + 1;
210 myN1 = m_NE[1] + 1;
211 // } else if (out.getFunctionSpace().getTypeCode() == Elements) {
212 // myN0 = m_NE[0];
213 // myN1 = m_NE[1];
214 } else
215 throw SpeckleyException("readNcGrid(): invalid function space for output data object");
216
217 if (params.first.size() != 2)
218 throw SpeckleyException("readNcGrid(): argument 'first' must have 2 entries");
219
220 if (params.numValues.size() != 2)
221 throw SpeckleyException("readNcGrid(): argument 'numValues' must have 2 entries");
222
223 if (params.multiplier.size() != 2)
224 throw SpeckleyException("readNcGrid(): argument 'multiplier' must have 2 entries");
225 for (size_t i=0; i<params.multiplier.size(); i++)
226 if (params.multiplier[i]<1)
227 throw SpeckleyException("readNcGrid(): all multipliers must be positive");
228 if (params.reverse.size() != 2)
229 throw SpeckleyException("readNcGrid(): argument 'reverse' must have 2 entries");
230
231 // check file existence and size
232 NcFile f(filename.c_str(), NcFile::ReadOnly);
233 if (!f.is_valid())
234 throw SpeckleyException("readNcGrid(): cannot open file");
235
236 NcVar* var = f.get_var(varname.c_str());
237 if (!var)
238 throw SpeckleyException("readNcGrid(): invalid variable");
239
240 // TODO: rank>0 data support
241 const int numComp = out.getDataPointSize();
242 if (numComp > 1)
243 throw SpeckleyException("readNcGrid(): only scalar data supported");
244
245 const int dims = var->num_dims();
246 boost::scoped_array<long> edges(var->edges());
247
248 // is this a slice of the data object (dims!=2)?
249 // note the expected ordering of edges (as in numpy: y,x)
250 if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1]))
251 || (dims==1 && params.numValues[1]>1) ) {
252 throw SpeckleyException("readNcGrid(): not enough data in file");
253 }
254
255 // check if this rank contributes anything
256 if (params.first[0] >= m_offset[0]+myN0 ||
257 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
258 params.first[1] >= m_offset[1]+myN1 ||
259 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1])
260 return;
261
262 // now determine how much this rank has to write
263
264 // first coordinates in data object to write to
265 const dim_t first0 = std::max(dim_t(0), params.first[0]-m_offset[0]);
266 const dim_t first1 = std::max(dim_t(0), params.first[1]-m_offset[1]);
267 // indices to first value in file (not accounting for reverse yet)
268 dim_t idx0 = std::max(dim_t(0), m_offset[0]-params.first[0]);
269 dim_t idx1 = std::max(dim_t(0), m_offset[1]-params.first[1]);
270 // number of values to read
271 const dim_t num0 = std::min(params.numValues[0]-idx0, myN0-first0);
272 const dim_t num1 = std::min(params.numValues[1]-idx1, myN1-first1);
273
274 // make sure we read the right block if going backwards through file
275 if (params.reverse[0])
276 idx0 = edges[dims-1]-num0-idx0;
277 if (dims>1 && params.reverse[1])
278 idx1 = edges[dims-2]-num1-idx1;
279
280 std::vector<double> values(num0*num1);
281 if (dims==2) {
282 var->set_cur(idx1, idx0);
283 var->get(&values[0], num1, num0);
284 } else {
285 var->set_cur(idx0);
286 var->get(&values[0], num0);
287 }
288
289 const int dpp = out.getNumDataPointsPerSample();
290 out.requireWrite();
291
292 // helpers for reversing
293 const dim_t x0 = (params.reverse[0] ? num0-1 : 0);
294 const int x_mult = (params.reverse[0] ? -1 : 1);
295 const dim_t y0 = (params.reverse[1] ? num1-1 : 0);
296 const int y_mult = (params.reverse[1] ? -1 : 1);
297
298 for (index_t y=0; y<num1; y++) {
299 #pragma omp parallel for
300 for (index_t x=0; x<num0; x++) {
301 const dim_t baseIndex = first0+x*params.multiplier[0]
302 +(first1+y*params.multiplier[1])*myN0;
303 const dim_t srcIndex = (y0+y_mult*y)*num0+(x0+x_mult*x);
304 if (!bm::isnan(values[srcIndex])) {
305 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
306 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
307 const dim_t dataIndex = baseIndex+m0+m1*myN0;
308 double* dest = out.getSampleDataRW(dataIndex);
309 for (index_t q=0; q<dpp; q++) {
310 *dest++ = values[srcIndex];
311 }
312 }
313 }
314 }
315 }
316 }
317 #else
318 throw SpeckleyException("readNcGrid(): not compiled with netCDF support");
319 #endif
320 }
321
322 void Rectangle::readBinaryGrid(escript::Data& out, std::string filename,
323 const ReaderParameters& params) const
324 {
325 // the mapping is not universally correct but should work on our
326 // supported platforms
327 switch (params.dataType) {
328 case DATATYPE_INT32:
329 readBinaryGridImpl<int>(out, filename, params);
330 break;
331 case DATATYPE_FLOAT32:
332 readBinaryGridImpl<float>(out, filename, params);
333 break;
334 case DATATYPE_FLOAT64:
335 readBinaryGridImpl<double>(out, filename, params);
336 break;
337 default:
338 throw SpeckleyException("readBinaryGrid(): invalid or unsupported datatype");
339 }
340 }
341
342 void Rectangle::readBinaryGridFromZipped(escript::Data& out, std::string filename,
343 const ReaderParameters& params) const
344 {
345 #ifdef ESYS_HAVE_BOOST_IO
346 // the mapping is not universally correct but should work on our
347 // supported platforms
348 switch (params.dataType) {
349 case DATATYPE_INT32:
350 readBinaryGridZippedImpl<int>(out, filename, params);
351 break;
352 case DATATYPE_FLOAT32:
353 readBinaryGridZippedImpl<float>(out, filename, params);
354 break;
355 case DATATYPE_FLOAT64:
356 readBinaryGridZippedImpl<double>(out, filename, params);
357 break;
358 default:
359 throw SpeckleyException("readBinaryGridFromZipped(): invalid or unsupported datatype");
360 }
361 #else
362 throw SpeckleyException("readBinaryGridFromZipped(): not built with zip support");
363 #endif
364 }
365
366 template<typename ValueType>
367 void Rectangle::readBinaryGridImpl(escript::Data& out, const std::string& filename,
368 const ReaderParameters& params) const
369 {
370 // check destination function space
371 dim_t myN0, myN1;
372 if (out.getFunctionSpace().getTypeCode() == Nodes) {
373 myN0 = m_NE[0] + 1;
374 myN1 = m_NE[1] + 1;
375 // } else if (out.getFunctionSpace().getTypeCode() == Elements) {
376 // myN0 = m_NE[0];
377 // myN1 = m_NE[1];
378 } else
379 throw SpeckleyException("readBinaryGrid(): invalid function space for output data object");
380
381 if (params.first.size() != 2)
382 throw SpeckleyException("readBinaryGrid(): argument 'first' must have 2 entries");
383
384 if (params.numValues.size() != 2)
385 throw SpeckleyException("readBinaryGrid(): argument 'numValues' must have 2 entries");
386
387 if (params.multiplier.size() != 2)
388 throw SpeckleyException("readBinaryGrid(): argument 'multiplier' must have 2 entries");
389 for (size_t i=0; i<params.multiplier.size(); i++)
390 if (params.multiplier[i]<1)
391 throw SpeckleyException("readBinaryGrid(): all multipliers must be positive");
392 if (params.reverse[0] != 0 || params.reverse[1] != 0)
393 throw SpeckleyException("readBinaryGrid(): reversing not supported yet");
394
395 // check file existence and size
396 std::ifstream f(filename.c_str(), std::ifstream::binary);
397 if (f.fail()) {
398 throw SpeckleyException("readBinaryGrid(): cannot open file");
399 }
400 f.seekg(0, std::ios::end);
401 const int numComp = out.getDataPointSize();
402 const dim_t filesize = f.tellg();
403 const dim_t reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
404 if (filesize < reqsize) {
405 f.close();
406 throw SpeckleyException("readBinaryGrid(): not enough data in file");
407 }
408
409 // check if this rank contributes anything
410 if (params.first[0] >= m_offset[0]+myN0 ||
411 params.first[0]+(params.numValues[0]*params.multiplier[0]) <= m_offset[0] ||
412 params.first[1] >= m_offset[1]+myN1 ||
413 params.first[1]+(params.numValues[1]*params.multiplier[1]) <= m_offset[1]) {
414 f.close();
415 return;
416 }
417
418 // now determine how much this rank has to write
419
420 // first coordinates in data object to write to
421 const dim_t first0 = std::max(dim_t(0), params.first[0]-m_offset[0]);
422 const dim_t first1 = std::max(dim_t(0), params.first[1]-m_offset[1]);
423 // indices to first value in file
424 const dim_t idx0 = std::max(dim_t(0), (m_offset[0]/params.multiplier[0])-params.first[0]);
425 const dim_t idx1 = std::max(dim_t(0), (m_offset[1]/params.multiplier[1])-params.first[1]);
426 // if restX > 0 the first value in the respective dimension has been
427 // written restX times already in a previous rank so this rank only
428 // contributes (multiplier-rank) copies of that value
429 const dim_t rest0 = m_offset[0]%params.multiplier[0];
430 const dim_t rest1 = m_offset[1]%params.multiplier[1];
431 // number of values to read
432 const dim_t num0 = std::min(params.numValues[0]-idx0, myN0-first0);
433 const dim_t num1 = std::min(params.numValues[1]-idx1, myN1-first1);
434
435 out.requireWrite();
436 std::vector<ValueType> values(num0*numComp);
437 const int dpp = out.getNumDataPointsPerSample();
438
439 for (dim_t y=0; y<num1; y++) {
440 const dim_t fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
441 f.seekg(fileofs*sizeof(ValueType));
442 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
443 const dim_t m1limit = (y==0 ? params.multiplier[1]-rest1 : params.multiplier[1]);
444 dim_t dataYbase = first1+y*params.multiplier[1];
445 if (y>0)
446 dataYbase -= rest1;
447 for (dim_t x=0; x<num0; x++) {
448 const dim_t m0limit = (x==0 ? params.multiplier[0]-rest0 : params.multiplier[0]);
449 dim_t dataXbase = first0+x*params.multiplier[0];
450 if (x>0)
451 dataXbase -= rest0;
452 // write a block of mult0 x mult1 identical values into Data object
453 for (dim_t m1=0; m1<m1limit; m1++) {
454 const dim_t dataY = dataYbase+m1;
455 if (dataY >= myN1)
456 break;
457 for (dim_t m0=0; m0<m0limit; m0++) {
458 const dim_t dataX = dataXbase+m0;
459 if (dataX >= myN0)
460 break;
461 const dim_t dataIndex = dataX+dataY*m_NN[0];
462 double* dest = out.getSampleDataRW(dataIndex*m_order);
463 for (int c=0; c<numComp; c++) {
464 ValueType val = values[x*numComp+c];
465 if (params.byteOrder != BYTEORDER_NATIVE) {
466 char* cval = reinterpret_cast<char*>(&val);
467 // this will alter val!!
468 if (sizeof(ValueType) > 4) {
469 byte_swap64(cval);
470 } else {
471 byte_swap32(cval);
472 }
473 }
474 if (!bm::isnan(val)) {
475 for (int q=0; q<dpp; q++) {
476 *dest++ = static_cast<double>(val);
477 }
478 }
479 }
480 }
481 }
482 }
483 }
484
485 f.close();
486 interpolateFromCorners(out);
487 }
488
489 #ifdef ESYS_HAVE_BOOST_IO
490 template<typename ValueType>
491 void Rectangle::readBinaryGridZippedImpl(escript::Data& out, const std::string& filename,
492 const ReaderParameters& params) const
493 {
494 // check destination function space
495 dim_t myN0, myN1;
496 if (out.getFunctionSpace().getTypeCode() == Nodes) {
497 myN0 = m_NE[0] + 1;
498 myN1 = m_NE[1] + 1;
499 // } else if (out.getFunctionSpace().getTypeCode() == Elements) {
500 // myN0 = m_NE[0];
501 // myN1 = m_NE[1];
502 } else
503 throw SpeckleyException("readBinaryGrid(): invalid function space for output data object");
504
505 // check file existence and size
506 std::ifstream f(filename.c_str(), std::ifstream::binary);
507 if (f.fail()) {
508 throw SpeckleyException(strerror(errno));//"readBinaryGridFromZipped(): cannot open file");
509 }
510 f.seekg(0, std::ios::end);
511 const int numComp = out.getDataPointSize();
512 dim_t filesize = f.tellg();
513 f.seekg(0, std::ios::beg);
514 std::vector<char> compressed(filesize);
515 f.read((char*)&compressed[0], filesize);
516 f.close();
517 std::vector<char> decompressed = unzip(compressed);
518 filesize = decompressed.size();
519 const dim_t reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
520 if (filesize < reqsize) {
521 throw SpeckleyException("readBinaryGridFromZipped(): not enough data in file");
522 }
523
524 // check if this rank contributes anything
525 if (params.first[0] >= m_offset[0]+myN0 ||
526 params.first[0]+(params.numValues[0]*params.multiplier[0]) <= m_offset[0] ||
527 params.first[1] >= m_offset[1]+myN1 ||
528 params.first[1]+(params.numValues[1]*params.multiplier[1]) <= m_offset[1]) {
529 return;
530 }
531
532 // now determine how much this rank has to write
533
534 // first coordinates in data object to write to
535 const dim_t first0 = std::max(dim_t(0), params.first[0]-m_offset[0]);
536 const dim_t first1 = std::max(dim_t(0), params.first[1]-m_offset[1]);
537 // indices to first value in file
538 const dim_t idx0 = std::max(dim_t(0), (m_offset[0]/params.multiplier[0])-params.first[0]);
539 const dim_t idx1 = std::max(dim_t(0), (m_offset[1]/params.multiplier[1])-params.first[1]);
540 // if restX > 0 the first value in the respective dimension has been
541 // written restX times already in a previous rank so this rank only
542 // contributes (multiplier-rank) copies of that value
543 const dim_t rest0 = m_offset[0]%params.multiplier[0];
544 const dim_t rest1 = m_offset[1]%params.multiplier[1];
545 // number of values to read
546 const dim_t num0 = std::min(params.numValues[0]-idx0, myN0-first0);
547 const dim_t num1 = std::min(params.numValues[1]-idx1, myN1-first1);
548
549 out.requireWrite();
550 std::vector<ValueType> values(num0*numComp);
551 const int dpp = out.getNumDataPointsPerSample();
552
553 for (dim_t y=0; y<num1; y++) {
554 const dim_t fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
555 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
556 const dim_t m1limit = (y==0 ? params.multiplier[1]-rest1 : params.multiplier[1]);
557 dim_t dataYbase = first1+y*params.multiplier[1];
558 if (y>0)
559 dataYbase -= rest1;
560 for (dim_t x=0; x<num0; x++) {
561 const dim_t m0limit = (x==0 ? params.multiplier[0]-rest0 : params.multiplier[0]);
562 dim_t dataXbase = first0+x*params.multiplier[0];
563 if (x>0)
564 dataXbase -= rest0;
565 // write a block of mult0 x mult1 identical values into Data object
566 for (dim_t m1=0; m1<m1limit; m1++) {
567 const dim_t dataY = dataYbase+m1;
568 if (dataY >= myN1)
569 break;
570 for (dim_t m0=0; m0<m0limit; m0++) {
571 const dim_t dataX = dataXbase+m0;
572 if (dataX >= myN0)
573 break;
574 const dim_t dataIndex = dataX+dataY*m_NN[0];
575 double* dest = out.getSampleDataRW(dataIndex*m_order);
576 for (int c=0; c<numComp; c++) {
577 ValueType val = values[x*numComp+c];
578
579 if (params.byteOrder != BYTEORDER_NATIVE) {
580 char* cval = reinterpret_cast<char*>(&val);
581 // this will alter val!!
582 if (sizeof(ValueType) > 4) {
583 byte_swap64(cval);
584 } else {
585 byte_swap32(cval);
586 }
587 }
588 if (!bm::isnan(val)) {
589 for (int q=0; q<dpp; q++) {
590 *dest++ = static_cast<double>(val);
591 }
592 }
593 }
594 }
595 }
596 }
597 }
598 interpolateFromCorners(out);
599 }
600 #endif
601
602 void Rectangle::interpolateFromCorners(escript::Data &out) const
603 {
604 const int numComp = out.getDataPointSize();
605 //interpolate the missing portions
606 #pragma omp parallel for
607 for (dim_t y = 0; y < m_NN[1]; y++) {
608 for (dim_t x = 0; x < m_NN[0]; x++) {
609 //skip the points we have values for
610 if (y % m_order == 0 && x % m_order == 0)
611 continue;
612 //point location in element: x,y
613 const double px = point_locations[m_order-2][x%m_order];
614 const double py = point_locations[m_order-2][y%m_order];
615
616 double *point = out.getSampleDataRW(INDEX2(x, y, m_NN[0]));
617
618 const dim_t left = x - x%m_order;
619 const dim_t right = left < m_NN[0] - 1 ? left + m_order : left;
620 const dim_t front = y - y%m_order;
621 const dim_t back = front < m_NN[1] - 1 ? front + m_order : front;
622
623
624 const double *lowleft = out.getSampleDataRO(
625 INDEX2(left, front, m_NN[0]));
626 const double *lowright = out.getSampleDataRO(
627 INDEX2(right, front, m_NN[0]));
628 const double *highleft = out.getSampleDataRO(
629 INDEX2(left, back, m_NN[0]));
630 const double *highright = out.getSampleDataRO(
631 INDEX2(right, back, m_NN[0]));
632
633 for (int comp = 0; comp < numComp; comp++) {
634 point[comp] = highright[comp]*px*py
635 + highleft[comp]*(1-px)*py
636 + lowright[comp]*px*(1-py)
637 + lowleft[comp]*(1-px)*(1-py);
638 }
639 }
640 }
641 }
642
643 void Rectangle::writeBinaryGrid(const escript::Data& in, std::string filename,
644 int byteOrder, int dataType) const
645 {
646 // the mapping is not universally correct but should work on our
647 // supported platforms
648 switch (dataType) {
649 case DATATYPE_INT32:
650 writeBinaryGridImpl<int>(in, filename, byteOrder);
651 break;
652 case DATATYPE_FLOAT32:
653 writeBinaryGridImpl<float>(in, filename, byteOrder);
654 break;
655 case DATATYPE_FLOAT64:
656 writeBinaryGridImpl<double>(in, filename, byteOrder);
657 break;
658 default:
659 throw SpeckleyException("writeBinaryGrid(): invalid or unsupported datatype");
660 }
661 }
662
663 template<typename ValueType>
664 void Rectangle::writeBinaryGridImpl(const escript::Data& in,
665 const std::string& filename, int byteOrder) const
666 {
667 // check function space and determine number of points
668 dim_t myN0, myN1;
669 dim_t totalN0, totalN1;
670 if (in.getFunctionSpace().getTypeCode() == Nodes) {
671 myN0 = m_NE[0]+1;
672 myN1 = m_NE[1]+1;
673 totalN0 = m_gNE[0]+1;
674 totalN1 = m_gNE[1]+1;
675 // } else if (in.getFunctionSpace().getTypeCode() == Elements) {
676 // myN0 = m_NE[0];
677 // myN1 = m_NE[1];
678 // totalN0 = m_gNE[0];
679 // totalN1 = m_gNE[1];
680 } else
681 throw SpeckleyException("writeBinaryGrid(): invalid function space of data object");
682
683 const int numComp = in.getDataPointSize();
684 const int dpp = in.getNumDataPointsPerSample();
685
686 if (numComp > 1 || dpp > 1)
687 throw SpeckleyException("writeBinaryGrid(): only scalar, single-value data supported");
688
689 const dim_t fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1;
690
691 // from here on we know that each sample consists of one value
692 FileWriter fw;
693 fw.openFile(filename, fileSize);
694 MPIBarrier();
695
696 for (index_t y=0; y<myN1; y++) {
697 const dim_t fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0)*sizeof(ValueType);
698 std::ostringstream oss;
699
700 for (index_t x=0; x<myN0; x++) {
701 const double* sample = in.getSampleDataRO((y*m_NN[0]+x)*m_order);
702 ValueType fvalue = static_cast<ValueType>(*sample);
703 if (byteOrder == BYTEORDER_NATIVE) {
704 oss.write((char*)&fvalue, sizeof(fvalue));
705 } else {
706 char* value = reinterpret_cast<char*>(&fvalue);
707 if (sizeof(fvalue)>4) {
708 byte_swap64(value);
709 } else {
710 byte_swap32(value);
711 }
712 oss.write(value, sizeof(fvalue));
713 }
714 }
715 fw.writeAt(oss, fileofs);
716 }
717 fw.close();
718 }
719
720 void Rectangle::write(const std::string& filename) const
721 {
722 throw SpeckleyException("write: not supported");
723 }
724
725 void Rectangle::dump(const std::string& fileName) const
726 {
727 #if USE_SILO
728 std::string fn(fileName);
729 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
730 fn+=".silo";
731 }
732
733 int driver=DB_HDF5;
734 DBfile* dbfile = NULL;
735 const char* blockDirFmt = "/block%04d";
736
737 #ifdef ESYS_MPI
738 PMPIO_baton_t* baton = NULL;
739 const int NUM_SILO_FILES = 1;
740 #endif
741
742 if (m_mpiInfo->size > 1) {
743 #ifdef ESYS_MPI
744 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
745 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
746 PMPIO_DefaultClose, (void*)&driver);
747 // try the fallback driver in case of error
748 if (!baton && driver != DB_PDB) {
749 driver = DB_PDB;
750 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
751 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
752 PMPIO_DefaultClose, (void*)&driver);
753 }
754 if (baton) {
755 char siloPath[64];
756 snprintf(siloPath, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
757 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath);
758 }
759 #endif
760 } else {
761 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
762 getDescription().c_str(), driver);
763 // try the fallback driver in case of error
764 if (!dbfile && driver != DB_PDB) {
765 driver = DB_PDB;
766 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
767 getDescription().c_str(), driver);
768 }
769 char siloPath[64];
770 snprintf(siloPath, 64, blockDirFmt, 0);
771 DBMkDir(dbfile, siloPath);
772 DBSetDir(dbfile, siloPath);
773 }
774
775 if (!dbfile)
776 throw SpeckleyException("dump: Could not create Silo file");
777
778 /*
779 if (driver==DB_HDF5) {
780 // gzip level 1 already provides good compression with minimal
781 // performance penalty. Some tests showed that gzip levels >3 performed
782 // rather badly on escript data both in terms of time and space
783 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
784 }
785 */
786
787 const dim_t NN0 = m_NN[0];
788 const dim_t NN1 = m_NN[1];
789 boost::scoped_ptr<double> x(new double[NN0]);
790 boost::scoped_ptr<double> y(new double[NN1]);
791 double* coords[2] = { x.get(), y.get() };
792 #pragma omp parallel
793 {
794 #pragma omp for nowait
795 for (dim_t i0 = 0; i0 < NN0; i0++) {
796 coords[0][i0]=getLocalCoordinate(i0, 0);
797 }
798 #pragma omp for nowait
799 for (dim_t i1 = 0; i1 < NN1; i1++) {
800 coords[1][i1]=getLocalCoordinate(i1, 1);
801 }
802 }
803 std::vector<int> dims(m_NN, m_NN+2);
804
805 // write mesh
806 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
807 DB_COLLINEAR, NULL);
808
809 // write node ids
810 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
811 NULL, 0, DB_INT, DB_NODECENT, NULL);
812
813 // write element ids
814 dims.assign(m_NE, m_NE+2);
815 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
816 &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
817
818 // rank 0 writes multimesh and multivar
819 if (m_mpiInfo->rank == 0) {
820 std::vector<std::string> tempstrings;
821 std::vector<char*> names;
822 for (dim_t i=0; i<m_mpiInfo->size; i++) {
823 std::stringstream path;
824 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/mesh";
825 tempstrings.push_back(path.str());
826 names.push_back((char*)tempstrings.back().c_str());
827 }
828 std::vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
829 DBSetDir(dbfile, "/");
830 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
831 &types[0], NULL);
832 tempstrings.clear();
833 names.clear();
834 for (dim_t i=0; i<m_mpiInfo->size; i++) {
835 std::stringstream path;
836 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/nodeId";
837 tempstrings.push_back(path.str());
838 names.push_back((char*)tempstrings.back().c_str());
839 }
840 types.assign(m_mpiInfo->size, DB_QUADVAR);
841 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
842 &types[0], NULL);
843 tempstrings.clear();
844 names.clear();
845 for (dim_t i=0; i<m_mpiInfo->size; i++) {
846 std::stringstream path;
847 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/elementId";
848 tempstrings.push_back(path.str());
849 names.push_back((char*)tempstrings.back().c_str());
850 }
851 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
852 &types[0], NULL);
853 }
854
855 if (m_mpiInfo->size > 1) {
856 #ifdef ESYS_MPI
857 PMPIO_HandOffBaton(baton, dbfile);
858 PMPIO_Finish(baton);
859 #endif
860 } else {
861 DBClose(dbfile);
862 }
863
864 #else // USE_SILO
865 throw SpeckleyException("dump: no Silo support");
866 #endif
867 }
868
869 const dim_t* Rectangle::borrowSampleReferenceIDs(int fsType) const
870 {
871 switch (fsType) {
872 case DegreesOfFreedom:
873 case Nodes:
874 return &m_nodeId[0];
875 case Elements:
876 case ReducedElements:
877 return &m_elementId[0];
878 case Points:
879 return &m_diracPointNodeIDs[0];
880 default:
881 break;
882 }
883
884 std::stringstream msg;
885 msg << "borrowSampleReferenceIDs: invalid function space type " << fsType;
886 throw SpeckleyException(msg.str());
887 }
888
889 bool Rectangle::ownSample(int fsType, index_t id) const
890 {
891 throw SpeckleyException("ownSample not implemented");
892 }
893
894 void Rectangle::setToNormal(escript::Data& out) const
895 {
896 throw SpeckleyException("setToNormal not implemented");
897 }
898
899 void Rectangle::setToSize(escript::Data& out) const
900 {
901 if (out.getFunctionSpace().getTypeCode() == Elements) {
902 out.requireWrite();
903 const dim_t numQuad = m_order + 1;
904 const dim_t numElements = getNumElements();
905 const double *quad_locs = point_locations[m_order-2];
906 //since elements are uniform, calc the first and copy to others
907 double* first_element = out.getSampleDataRW(0);
908 #pragma omp parallel for
909 for (short qy = 0; qy < m_order; qy++) {
910 const double y = quad_locs[qy+1] - quad_locs[qy];
911 for (short qx = 0; qx < m_order; qx++) {
912 const double x = quad_locs[qx+1] - quad_locs[qx];
913 first_element[qx + qy*numQuad] = sqrt(x*x + y*y);
914 }
915 }
916 const short top_start = (numQuad - 1)*numQuad;
917 for (short edge = 0; edge < m_order; edge++) {
918 //right edge = left edge
919 first_element[(edge+1)*numQuad - 1] = first_element[edge*numQuad];
920 //top edge = bottom edge
921 first_element[top_start + edge] = first_element[edge];
922 }
923 //top-right corner
924 first_element[numQuad*numQuad - 1] = first_element[0];
925 const size_t size = numQuad*numQuad*sizeof(double);
926 #pragma omp parallel for
927 for (index_t k = 0; k < numElements; ++k) {
928 double* o = out.getSampleDataRW(k);
929 memcpy(o, first_element, size);
930 }
931 } else {
932 std::stringstream msg;
933 msg << "setToSize: invalid function space type "
934 << out.getFunctionSpace().getTypeCode();
935 throw SpeckleyException(msg.str());
936 }
937 }
938
939 void Rectangle::Print_Mesh_Info(const bool full) const
940 {
941 SpeckleyDomain::Print_Mesh_Info(full);
942 if (full) {
943 std::cout << " Id Coordinates" << std::endl;
944 std::cout.precision(15);
945 std::cout.setf(std::ios::scientific, std::ios::floatfield);
946 for (index_t i=0; i < getNumNodes(); i++) {
947 std::cout << " " << std::setw(5) << m_nodeId[i]
948 << " " << getLocalCoordinate(i%m_NN[0], 0)
949 << " " << getLocalCoordinate(i/m_NN[0], 1) << std::endl;
950 }
951 }
952 }
953
954
955 //protected
956 void Rectangle::assembleCoordinates(escript::Data& arg) const
957 {
958 int numDim = m_numDim;
959 if (!arg.isDataPointShapeEqual(1, &numDim))
960 throw SpeckleyException("setToX: Invalid Data object shape");
961 if (!arg.numSamplesEqual(1, getNumNodes()))
962 throw SpeckleyException("setToX: Illegal number of samples in Data object");
963
964 const dim_t NN0 = m_NN[0];
965 const dim_t NN1 = m_NN[1];
966 arg.requireWrite();
967 #pragma omp parallel for
968 for (dim_t y = 0; y < NN1; y++) {
969 for (dim_t x = 0; x < NN0; x++) {
970 double *point = arg.getSampleDataRW(y*NN0 + x);
971 point[0] = getLocalCoordinate(x, 0);
972 point[1] = getLocalCoordinate(y, 1);
973 }
974 }
975 }
976
977 //protected
978 void Rectangle::assembleGradient(escript::Data& out, const escript::Data& in) const
979 {
980 escript::Data converted;
981
982 if (in.getFunctionSpace().getTypeCode() != Elements) {
983 converted = escript::Data(in, escript::function(*this));
984 } else {
985 converted = in;
986 }
987
988 if (m_order == 2) {
989 gradient_order2(out,converted);
990 } else if (m_order == 3) {
991 gradient_order3(out,converted);
992 } else if (m_order == 4) {
993 gradient_order4(out,converted);
994 } else if (m_order == 5) {
995 gradient_order5(out,converted);
996 } else if (m_order == 6) {
997 gradient_order6(out,converted);
998 } else if (m_order == 7) {
999 gradient_order7(out,converted);
1000 } else if (m_order == 8) {
1001 gradient_order8(out,converted);
1002 } else if (m_order == 9) {
1003 gradient_order9(out,converted);
1004 } else if (m_order == 10) {
1005 gradient_order10(out,converted);
1006 }
1007 }
1008
1009 //protected
1010 void Rectangle::assembleIntegrate(std::vector<double>& integrals,
1011 const escript::Data& arg) const
1012 {
1013 const int fs = arg.getFunctionSpace().getTypeCode();
1014 if (fs != Elements)
1015 throw new SpeckleyException("Speckley doesn't currently support integrals of non-Element functionspaces");
1016 if (!arg.actsExpanded())
1017 throw new SpeckleyException("Speckley doesn't currently support unexpanded data");
1018
1019 if (m_order == 2) {
1020 integral_order2(integrals, arg);
1021 } else if (m_order == 3) {
1022 integral_order3(integrals, arg);
1023 } else if (m_order == 4) {
1024 integral_order4(integrals, arg);
1025 } else if (m_order == 5) {
1026 integral_order5(integrals, arg);
1027 } else if (m_order == 6) {
1028 integral_order6(integrals, arg);
1029 } else if (m_order == 7) {
1030 integral_order7(integrals, arg);
1031 } else if (m_order == 8) {
1032 integral_order8(integrals, arg);
1033 } else if (m_order == 9) {
1034 integral_order9(integrals, arg);
1035 } else if (m_order == 10) {
1036 integral_order10(integrals, arg);
1037 }
1038 }
1039
1040 /* This is a wrapper for filtered (and non-filtered) randoms
1041 * For detailed doco see randomFillWorker
1042 */
1043 escript::Data Rectangle::randomFill(const escript::DataTypes::ShapeType& shape,
1044 const escript::FunctionSpace& fs,
1045 long seed, const boost::python::tuple& filter) const {
1046 int numvals=escript::DataTypes::noValues(shape);
1047 int per_element = (m_order+1)*(m_order+1)*numvals;
1048 if (len(filter)>0) {
1049 throw SpeckleyException("Speckley does not support filters.");
1050 }
1051
1052 double* src=new double[m_NE[0]*m_NE[1]*per_element*numvals];
1053 escript::randomFillArray(seed, src, m_NE[0]*m_NE[1]*per_element);
1054 escript::Data res(0, shape, escript::function(*this), true);
1055 int current = 0;
1056 for (int ei = 0; ei < m_NE[1]; ++ei) {
1057 for (int ej = 0; ej < m_NE[0]; ++ej) {
1058 double *e = res.getSampleDataRW(INDEX2(ej,ei,m_NE[0]));
1059 memcpy(e, &src[current], sizeof(double)*per_element);
1060 current += per_element;
1061 }
1062 }
1063 if (res.getFunctionSpace() != fs) {
1064 return escript::Data(res, fs);
1065 }
1066 return res;
1067 }
1068
1069 //private
1070 void Rectangle::populateSampleIds()
1071 {
1072 // degrees of freedom are numbered from left to right, bottom to top in
1073 // each rank, continuing on the next rank (ranks also go left-right,
1074 // bottom-top).
1075 // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which
1076 // helps when writing out data rank after rank.
1077
1078 // build node distribution vector first.
1079 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is
1080 // constant for all ranks in this implementation
1081 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1082 const index_t left = (m_offset[0]==0 ? 0 : 1);
1083 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1084 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1085 index_t rank_left = (k-1)%m_NX[0] == 0 ? 0 : 1;
1086 index_t rank_bottom = (k-1)/m_NX[0] == 0 ? 0 : 1;
1087 m_nodeDistribution[k] = m_nodeDistribution[k-1]
1088 + (m_NN[0]-rank_left)*(m_NN[1]-rank_bottom);
1089 }
1090 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1091 try {
1092 m_nodeId.resize(getNumNodes());
1093 m_elementId.resize(getNumElements());
1094 } catch (const std::length_error& le) {
1095 throw SpeckleyException("The system does not have sufficient memory for a domain of this size.");
1096 }
1097
1098 // populate face element counts
1099 //left
1100 if (m_offset[0]==0)
1101 m_faceCount[0]=m_NE[1];
1102 else
1103 m_faceCount[0]=0;
1104 //right
1105 if (m_mpiInfo->rank%m_NX[0]==m_NX[0]-1)
1106 m_faceCount[1]=m_NE[1];
1107 else
1108 m_faceCount[1]=0;
1109 //bottom
1110 if (m_offset[1]==0)
1111 m_faceCount[2]=m_NE[0];
1112 else
1113 m_faceCount[2]=0;
1114 //top
1115 if (m_mpiInfo->rank/m_NX[0]==m_NX[1]-1)
1116 m_faceCount[3]=m_NE[0];
1117 else
1118 m_faceCount[3]=0;
1119
1120
1121 if (bottom && left) {
1122 //get lower-left node
1123 m_nodeId[0] = m_nodeDistribution[m_mpiInfo->rank - m_NX[0]] - 1;
1124 }
1125 if (bottom) {
1126 //DOF size of the neighbouring rank
1127 index_t rankDOF = m_nodeDistribution[m_mpiInfo->rank - m_NX[0] + 1]
1128 - m_nodeDistribution[m_mpiInfo->rank - m_NX[0]];
1129 //beginning of last row of neighbouring rank
1130 index_t begin = m_nodeDistribution[m_mpiInfo->rank - m_NX[0]]
1131 + rankDOF - m_NN[0];
1132 for (index_t x = left; x < m_NN[0]; x++) {
1133 m_nodeId[x] = begin + x;
1134 }
1135 }
1136 if (left) {
1137 //is the rank to the left itself right of another rank
1138 index_t rank_left = (m_mpiInfo->rank - 1) % m_NX[0] == 0 ? 0 : 1;
1139 //end of first owned row of neighbouring rank
1140 index_t end = m_nodeDistribution[m_mpiInfo->rank - 1]
1141 + m_NN[0] - rank_left - 1;
1142 for (index_t y = bottom; y < m_NN[1]; y++) {
1143 m_nodeId[y*m_NN[0]] = end + (y-bottom)*(m_NN[0]-rank_left);
1144 }
1145 }
1146
1147 #pragma omp parallel for
1148 for (index_t y = bottom; y < m_NN[1]; y++) {
1149 for (index_t x = left; x < m_NN[0]; x++) {
1150 m_nodeId[y*m_NN[0]+x] = m_nodeDistribution[m_mpiInfo->rank] + (y-bottom)*(m_NN[0]-left) + (x-left);
1151 }
1152 }
1153
1154 m_nodeTags.assign(getNumNodes(), 0);
1155 updateTagsInUse(Nodes);
1156
1157 m_elementTags.assign(getNumElements(), 0);
1158 updateTagsInUse(Elements);
1159 }
1160
1161 //private
1162 void Rectangle::addToMatrixAndRHS(escript::AbstractSystemMatrix* S, escript::Data& F,
1163 const std::vector<double>& EM_S, const std::vector<double>& EM_F, bool addS,
1164 bool addF, index_t firstNode, int nEq, int nComp) const
1165 {
1166 throw SpeckleyException("Rectangle::addToMatrixAndRHS, adding to matrix not supported");
1167 }
1168
1169 void Rectangle::reduceElements(escript::Data& out, const escript::Data& in) const
1170 {
1171 if (m_order == 2) {
1172 reduction_order2(in, out);
1173 } else if (m_order == 3) {
1174 reduction_order3(in, out);
1175 } else if (m_order == 4) {
1176 reduction_order4(in, out);
1177 } else if (m_order == 5) {
1178 reduction_order5(in, out);
1179 } else if (m_order == 6) {
1180 reduction_order6(in, out);
1181 } else if (m_order == 7) {
1182 reduction_order7(in, out);
1183 } else if (m_order == 8) {
1184 reduction_order8(in, out);
1185 } else if (m_order == 9) {
1186 reduction_order9(in, out);
1187 } else if (m_order == 10) {
1188 reduction_order10(in, out);
1189 }
1190 }
1191
1192 //protected
1193 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1194 const escript::Data& in,
1195 bool reduced) const
1196 {
1197 const dim_t numComp = in.getDataPointSize();
1198 const dim_t NE0 = m_NE[0];
1199 const dim_t NE1 = m_NE[1];
1200 const int quads = m_order + 1;
1201 const int max_x = m_NN[0];
1202 out.requireWrite();
1203 if (reduced) { //going to ReducedElements
1204 escript::Data funcIn(in, escript::function(*this));
1205 reduceElements(out, funcIn);
1206 return;
1207 }
1208 #pragma omp parallel for
1209 for (dim_t ey = 0; ey < NE1; ey++) {
1210 for (dim_t ex = 0; ex < NE0; ex++) {
1211 double *e_out = out.getSampleDataRW(ex + ey*NE0);
1212 dim_t start = ex*m_order + ey*max_x*m_order;
1213 int quad = 0;
1214 for (int qy = 0; qy < quads; qy++) {
1215 for (int qx = 0; qx < quads; qx++, quad++) {
1216 const double *n_in = in.getSampleDataRO(start + max_x*qy + qx);
1217 memcpy(e_out+quad*numComp, n_in, sizeof(double) * numComp);
1218 }
1219 }
1220 }
1221 }
1222 }
1223
1224 #ifdef ESYS_MPI
1225 //protected
1226 void Rectangle::balanceNeighbours(escript::Data& data, bool average) const {
1227 if (m_NX[0] * m_NX[1] == 1) {
1228 return;
1229 }
1230 const int numComp = data.getDataPointSize();
1231 const int rx = m_mpiInfo->rank % m_NX[0];
1232 const int ry = m_mpiInfo->rank / m_NX[0];
1233 //include bordering ranks in summation
1234 if (m_NX[1] != 1)
1235 shareVertical(data, rx, ry);
1236 if (m_NX[0] != 1)
1237 shareSides(data, rx, ry);
1238 if (m_NX[0] != 1 && m_NX[1] != 1) {
1239 shareCorners(data, rx, ry);
1240 if (!average)
1241 return;
1242 //averaging out corners
1243 // bottom left
1244 if (rx && ry) {
1245 double *values = data.getSampleDataRW(0);
1246 for (int comp = 0; comp < numComp; comp++) {
1247 values[comp] /= 2;
1248 }
1249 }
1250 // bottom right
1251 if (rx < (m_NX[0] - 1) && ry) {
1252 double *values = data.getSampleDataRW(m_NN[0]-1);
1253 for (int comp = 0; comp < numComp; comp++) {
1254 values[comp] /= 2;
1255 }
1256 }
1257 // top left
1258 if (rx && ry < (m_NX[0] - 1)) {
1259 double *values = data.getSampleDataRW((m_NN[1]-1)*m_NN[0]);
1260 for (int comp = 0; comp < numComp; comp++) {
1261 values[comp] /= 2;
1262 }
1263 }
1264 // top right
1265 if (rx < (m_NX[0] - 1) && ry < (m_NX[0] - 1)) {
1266 double *values = data.getSampleDataRW(m_NN[1]*m_NN[0] - 1);
1267 for (int comp = 0; comp < numComp; comp++) {
1268 values[comp] /= 2;
1269 }
1270 }
1271 }
1272 if (!average)
1273 return;
1274 // average shared-edges in x and y
1275 //left
1276 if (rx) {
1277 #pragma omp parallel for
1278 for (dim_t qy = 0; qy < m_NN[1]; qy++) {
1279 double *values = data.getSampleDataRW(qy*m_NN[0]);
1280 for (int comp = 0; comp < numComp; comp++) {
1281 values[comp] /= 2;
1282 }
1283 }
1284 }
1285 //right
1286 if (rx < m_NX[0] - 1) {
1287 #pragma omp parallel for
1288 for (dim_t qy = 0; qy < m_NN[1]; qy++) {
1289 double *values = data.getSampleDataRW(qy*m_NN[0] + m_NN[0] - 1);
1290 for (int comp = 0; comp < numComp; comp++) {
1291 values[comp] /= 2;
1292 }
1293 }
1294 }
1295 //bottom
1296 if (ry) {
1297 #pragma omp parallel for
1298 for (dim_t qx = 0; qx < m_NN[0]; qx++) {
1299 double *values = data.getSampleDataRW(qx);
1300 for (int comp = 0; comp < numComp; comp++) {
1301 values[comp] /= 2;
1302 }
1303 }
1304 }
1305 //top
1306 if (ry < m_NX[1] - 1) {
1307 const dim_t start = (m_NN[1]-1)*m_NN[0];
1308 #pragma omp parallel for
1309 for (dim_t qx = 0; qx < m_NN[0]; qx++) {
1310 double *values = data.getSampleDataRW(start + qx);
1311 for (int comp = 0; comp < numComp; comp++) {
1312 values[comp] /= 2;
1313 }
1314 }
1315 }
1316 }
1317 #endif //#ifdef ESYS_MPI
1318
1319 //protected
1320 void Rectangle::interpolateElementsOnNodes(escript::Data& out,
1321 const escript::Data& in) const {
1322 const dim_t numComp = in.getDataPointSize();
1323 const dim_t NE0 = m_NE[0];
1324 const dim_t NE1 = m_NE[1];
1325 const int quads = m_order + 1;
1326 const dim_t max_x = (m_order*NE0) + 1;
1327 const dim_t max_y = (m_order*NE1) + 1;
1328 out.requireWrite();
1329 const int inFS = in.getFunctionSpace().getTypeCode();
1330 // the summation portion
1331 if (inFS == ReducedElements) {
1332 for (dim_t colouring = 0; colouring < 2; colouring++) {
1333 #pragma omp parallel for
1334 for (dim_t ey = colouring; ey < NE1; ey += 2) {
1335 for (dim_t ex = 0; ex < NE0; ex++) {
1336 dim_t start = ex*m_order + ey*max_x*m_order;
1337 const double *e_in = in.getSampleDataRO(ex + ey*NE0);
1338 for (int qy = 0; qy < quads; qy++) {
1339 for (int qx = 0; qx < quads; qx++) {
1340 double *n_out = out.getSampleDataRW(start + max_x*qy + qx);
1341 for (int comp = 0; comp < numComp; comp++) {
1342 n_out[comp] += e_in[comp];
1343 }
1344 }
1345 }
1346 }
1347 }
1348 }
1349 } else { //inFS == Elements
1350 for (dim_t colouring = 0; colouring < 2; colouring++) {
1351 #pragma omp parallel for
1352 for (dim_t ey = colouring; ey < NE1; ey += 2) {
1353 for (dim_t ex = 0; ex < NE0; ex++) {
1354 dim_t start = ex*m_order + ey*max_x*m_order;
1355 const double *e_in = in.getSampleDataRO(ex + ey*NE0);
1356 for (int qy = 0; qy < quads; qy++) {
1357 for (int qx = 0; qx < quads; qx++) {
1358 double *n_out = out.getSampleDataRW(start + max_x*qy + qx);
1359 for (int comp = 0; comp < numComp; comp++) {
1360 n_out[comp] += e_in[INDEX3(comp, qx, qy, numComp, quads)];
1361 }
1362 }
1363 }
1364 }
1365 }
1366 }
1367 }
1368 #ifdef ESYS_MPI
1369 //share and average shared edges/corners
1370 balanceNeighbours(out, true);
1371 #endif
1372 // for every internal edge in x
1373 #pragma omp parallel for
1374 for (dim_t qy = 0; qy < max_y; qy++) {
1375 for (dim_t qx = m_order; qx < max_x - m_order; qx += m_order) {
1376 double *n_out = out.getSampleDataRW(qx + qy*max_x);
1377 for (int comp = 0; comp < numComp; comp++) {
1378 n_out[comp] /= 2;
1379 }
1380 }
1381 }
1382
1383 // for every internal edge in y
1384 const dim_t order = m_order;
1385 #pragma omp parallel for
1386 for (dim_t qy = order; qy < max_y - order; qy += order) {
1387 for (dim_t qx = 0; qx < max_x; qx ++) {
1388 double *n_out = out.getSampleDataRW(qx + qy*max_x);
1389 for (int comp = 0; comp < numComp; comp++) {
1390 n_out[comp] /= 2;
1391 }
1392 }
1393 }
1394 }
1395
1396 #ifdef ESYS_MPI
1397 //private
1398 void Rectangle::shareCorners(escript::Data& out, int rx, int ry) const
1399 {
1400 //setup
1401 const int tag = 0;
1402 MPI_Status status;
1403 MPI_Request request[4];
1404 const int numComp = out.getDataPointSize();
1405 const int count = 4 * numComp;
1406 std::vector<double> outbuf(count, 0);
1407 std::vector<double> inbuf(count, 0);
1408 const int rank = m_mpiInfo->rank;
1409 //precalc bounds so we can loop nicely, can probably be cleaned up
1410 const bool conds[4] = {rx && ry,
1411 rx < (m_NX[0] - 1) && ry,
1412 rx && ry < (m_NX[1] - 1),
1413 rx < (m_NX[0] - 1) && ry < (m_NX[1] - 1)};
1414 const int ranks[4] = {rank-m_NX[0]-1,
1415 rank-m_NX[0]+1,
1416 rank+m_NX[0]-1,
1417 rank+m_NX[0]+1};
1418 //fill everything, regardless of whether we're sharing that corner or not
1419 for (int y = 0; y < 2; y++) {
1420 for (int x = 0; x < 2; x++) {
1421 const double *data = out.getSampleDataRO(x*(m_NN[0]-1) + y*(m_NN[1]-1)*m_NN[0]);
1422 std::copy(data, data + numComp, &outbuf[(x + 2*y)*numComp]);
1423 }
1424 }
1425
1426 //share
1427 for (int i = 0; i < 4; i++) {
1428 if (conds[i]) {
1429 MPI_Isend(&outbuf[i], numComp, MPI_DOUBLE, ranks[i], tag,
1430 m_mpiInfo->comm, &request[i]);
1431 }
1432 }
1433
1434 //unpack
1435 for (int y = 0; y < 2; y++) {
1436 for (int x = 0; x < 2; x++) {
1437 int i = 2*y+x;
1438 if (conds[i]) {
1439 MPI_Recv(&inbuf[i], numComp, MPI_DOUBLE, ranks[i], tag,
1440 m_mpiInfo->comm, &status);
1441 double *data = out.getSampleDataRW(x*(m_NN[0]-1) + y*(m_NN[1]-1)*m_NN[0]);
1442 for (int comp = 0; comp < numComp; comp++) {
1443 data[comp] += inbuf[i*numComp + comp];
1444 }
1445 }
1446 }
1447 }
1448 for (int i = 0; i < 4; i++) {
1449 if (conds[i]) {
1450 MPI_Wait(request + i, &status);
1451 }
1452 }
1453 }
1454
1455 //private
1456 void Rectangle::shareVertical(escript::Data& out, int rx, int ry) const
1457 {
1458 const int tag = 0;
1459 MPI_Status status;
1460 const int numComp = out.getDataPointSize();
1461 const dim_t count = m_NN[0]*numComp;
1462 const int up_neighbour = m_mpiInfo->rank + m_NX[0];
1463 const int down_neighbour = m_mpiInfo->rank - m_NX[0];
1464 //allocate some space to recieve
1465 std::vector<double> recv(count);
1466 //get our sources
1467 double *top = out.getSampleDataRW((m_NN[1]-1) * m_NN[0]);
1468 double *bottom = out.getSampleDataRW(0);
1469
1470 MPI_Request request[2];
1471
1472 if (ry) {
1473 MPI_Isend(bottom, count, MPI_DOUBLE, down_neighbour, tag,
1474 m_mpiInfo->comm, request);
1475 }
1476
1477 if (ry < m_NX[1] - 1) {
1478 MPI_Isend(top, count, MPI_DOUBLE, up_neighbour, tag,
1479 m_mpiInfo->comm, request+1);
1480 }
1481
1482 //read down
1483 if (ry) {
1484 MPI_Recv(&recv[0], count, MPI_DOUBLE, down_neighbour, tag,
1485 m_mpiInfo->comm, &status);
1486
1487 //unpack bottom
1488 for (dim_t i = 0; i < count; i++) {
1489 bottom[i] += recv[i];
1490 }
1491 }
1492
1493 //read up, send up
1494 if (ry < m_NX[1] - 1) {
1495 MPI_Recv(&recv[0], count, MPI_DOUBLE, up_neighbour, tag,
1496 m_mpiInfo->comm, &status);
1497
1498 //unpack up
1499 for (dim_t i = 0; i < count; i++) {
1500 top[i] += recv[i];
1501 }
1502 }
1503
1504 if (ry) {
1505 MPI_Wait(request, &status);
1506 }
1507 if (ry < m_NX[1] - 1) {
1508 MPI_Wait(request+1, &status);
1509 }
1510 }
1511
1512 //private
1513 void Rectangle::shareSides(escript::Data& out, int rx, int ry) const
1514 {
1515 const int tag = 0;
1516 MPI_Status status;
1517 const int numComp = out.getDataPointSize();
1518 const dim_t count = m_NN[1]*numComp;
1519 const int left_neighbour = m_mpiInfo->rank - 1;
1520 const int right_neighbour = m_mpiInfo->rank + 1;
1521 //allocate some space
1522 std::vector<double> left(count,170);
1523 std::vector<double> right(count,17000);
1524 std::vector<double> recv(count,1700);
1525
1526 MPI_Request request[2];
1527
1528 if (rx) {
1529 for (dim_t n = 0; n < m_NN[1]; n++) {
1530 index_t index = n*m_NN[0];
1531 const double *leftData = out.getSampleDataRO(index);
1532 std::copy(leftData, leftData + numComp, &left[n*numComp]);
1533 }
1534 MPI_Isend(&left[0], count, MPI_DOUBLE, left_neighbour, tag,
1535 m_mpiInfo->comm, request);
1536 }
1537
1538 if (rx < m_NX[0] - 1) {
1539 for (dim_t n = 0; n < m_NN[1]; n++) {
1540 index_t index = n*m_NN[0];
1541 const double *rightData = out.getSampleDataRO(index+m_NN[0]-1);
1542 std::copy(rightData, rightData + numComp, &right[n*numComp]);
1543 }
1544 MPI_Isend(&right[0], count, MPI_DOUBLE, right_neighbour, tag,
1545 m_mpiInfo->comm, request+1);
1546 }
1547
1548 //read left
1549 if (rx) {
1550 MPI_Recv(&recv[0], count, MPI_DOUBLE, left_neighbour, tag,
1551 m_mpiInfo->comm, &status);
1552 //unpack to left
1553 for (dim_t i = 0; i < m_NN[1]; i++) {
1554 double *data = out.getSampleDataRW(i*m_NN[0]);
1555 for (int comp = 0; comp < numComp; comp++) {
1556 data[comp] += recv[i*numComp+comp];
1557 }
1558 }
1559 }
1560
1561 //read right
1562 if (rx < m_NX[0] - 1) {
1563 MPI_Recv(&recv[0], count, MPI_DOUBLE, right_neighbour, tag,
1564 m_mpiInfo->comm, &status);
1565 //unpack to right
1566 for (dim_t i = 0; i < m_NN[1]; i++) {
1567 double *data = out.getSampleDataRW((i + 1) * m_NN[0] - 1);
1568 for (int comp = 0; comp < numComp; comp++) {
1569 data[comp] += recv[i*numComp+comp];
1570 }
1571 }
1572 }
1573 if (rx) {
1574 MPI_Wait(request, &status);
1575 }
1576 if (rx < m_NX[0] - 1) {
1577 MPI_Wait(request+1, &status);
1578 }
1579 }
1580 #endif //#ifdef ESYS_MPI
1581
1582 dim_t Rectangle::findNode(const double *coords) const
1583 {
1584 const dim_t NOT_MINE = -1;
1585 //is the found element even owned by this rank
1586 for (int dim = 0; dim < m_numDim; dim++) {
1587 double min = m_origin[dim] + m_offset[dim]* m_dx[dim]
1588 - m_dx[dim]/2.; //allows for point outside mapping onto node
1589 double max = m_origin[dim] + (m_offset[dim] + m_NE[dim])*m_dx[dim]
1590 + m_dx[dim]/2.;
1591 if (min > coords[dim] || max < coords[dim]) {
1592 return NOT_MINE;
1593 }
1594 }
1595 // get distance from origin
1596 double x = coords[0] - m_origin[0];
1597 double y = coords[1] - m_origin[1];
1598
1599 //check if the point is even inside the domain
1600 if (x < 0 || y < 0 || x > m_length[0] || y > m_length[1])
1601 return NOT_MINE;
1602
1603 // trim to rank reference point
1604 x -= m_offset[0] * m_dx[0];
1605 y -= m_offset[1] * m_dx[1];
1606
1607 // distance in elements
1608 dim_t ex = (dim_t) floor((x + 0.01*m_dx[0]) / m_dx[0]);
1609 dim_t ey = (dim_t) floor((y + 0.01*m_dx[1]) / m_dx[1]);
1610 // set the min distance high enough to be outside the element plus a bit
1611 dim_t closest = NOT_MINE;
1612 double minDist = 1;
1613 for (int dim = 0; dim < m_numDim; dim++) {
1614 minDist += m_dx[dim]*m_dx[dim];
1615 }
1616 //find the closest node
1617 for (int dx = 0; dx < 2; dx++) {
1618 double xdist = x - (ex + dx)*m_dx[0];
1619 for (int dy = 0; dy < 2; dy++) {
1620 double ydist = y - (ey + dy)*m_dx[1];
1621 double total = xdist*xdist + ydist*ydist;
1622 if (total < minDist) {
1623 closest = (ex+dx)*m_order + (ey+dy)*m_order*m_NN[0];
1624 minDist = total;
1625 }
1626 }
1627 }
1628 //if this happens, we've let a dirac point slip through, which is awful
1629 if (closest == NOT_MINE) {
1630 throw SpeckleyException("Unable to map appropriate dirac point to a node,"
1631 " implementation problem in Rectangle::findNode()");
1632 }
1633 return closest;
1634 }
1635
1636 Assembler_ptr Rectangle::createAssembler(std::string type,
1637 const DataMap& options) const
1638 {
1639 if (type.compare("DefaultAssembler") == 0) {
1640 return Assembler_ptr(new DefaultAssembler2D(shared_from_this(), m_dx, m_NE, m_NN));
1641 } else if (type.compare("WaveAssembler") == 0) {
1642 return Assembler_ptr(new WaveAssembler2D(shared_from_this(), m_dx, m_NE, m_NN, options));
1643 }
1644 throw SpeckleyException("Speckley::Rectangle does not support the"
1645 " requested assembler");
1646 }
1647
1648 bool Rectangle::probeInterpolationAcross(int fsType_source,
1649 const escript::AbstractDomain& domain, int fsType_target) const
1650 {
1651 #ifdef USE_RIPLEY
1652 return speckley::probeInterpolationAcross(fsType_source, domain,
1653 fsType_target, 2);
1654 #else
1655 return false;
1656 #endif
1657 }
1658
1659 void Rectangle::interpolateAcross(escript::Data& target, const escript::Data& source) const
1660 {
1661 #ifdef USE_RIPLEY
1662 if (coupler == NULL) {
1663 coupler = new RipleyCoupler(this, m_dx, m_mpiInfo->rank);
1664 }
1665 coupler->interpolate(target, source);
1666 #else
1667 throw SpeckleyException("Speckley::Rectangle interpolation to unsupported domain");
1668 #endif
1669 }
1670
1671 } // end of namespace speckley
1672

  ViewVC Help
Powered by ViewVC 1.1.26