/[escript]/trunk/speckley/src/Rectangle.cpp
ViewVC logotype

Contents of /trunk/speckley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5478 - (show annotations)
Wed Feb 18 01:57:49 2015 UTC (4 years, 7 months ago) by sshaw
File size: 59241 byte(s)
introducing shiny new reduced function spaces to speckley
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2015 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #define ESNEEDPYTHON
18 #include "esysUtils/first.h"
19
20
21 #include <algorithm>
22 #include <limits>
23
24 #include <speckley/Rectangle.h>
25 #include <esysUtils/esysFileWriter.h>
26 #include <esysUtils/index.h>
27 #include <esysUtils/Esys_MPI.h>
28 #include <speckley/DefaultAssembler2D.h>
29 #include <boost/scoped_array.hpp>
30 #include <escript/FunctionSpaceFactory.h>
31 #include "esysUtils/EsysRandom.h"
32
33 #ifdef USE_RIPLEY
34 #include <speckley/CrossDomainCoupler.h>
35 #endif
36
37 #ifdef USE_NETCDF
38 #include <netcdfcpp.h>
39 #endif
40
41 #if USE_SILO
42 #include <silo.h>
43 #ifdef ESYS_MPI
44 #include <pmpio.h>
45 #endif
46 #endif
47
48 #include <iomanip>
49
50 using esysUtils::FileWriter;
51 using namespace std; // to allow isnan to work
52 namespace speckley {
53
54 Rectangle::Rectangle(int order, dim_t n0, dim_t n1, double x0, double y0, double x1,
55 double y1, int d0, int d1,
56 const std::vector<double>& points,
57 const std::vector<int>& tags,
58 const TagMap& tagnamestonums,
59 escript::SubWorld_ptr w) :
60 SpeckleyDomain(2, order, w)
61 {
62 if (static_cast<long>(n0 + 1) * static_cast<long>(n1 + 1)
63 > std::numeric_limits<dim_t>::max())
64 throw SpeckleyException("The number of elements has overflowed, this "
65 "limit may be raised in future releases.");
66
67 if (n0 <= 0 || n1 <= 0)
68 throw SpeckleyException("Number of elements in each spatial dimension "
69 "must be positive");
70
71 // ignore subdivision parameters for serial run
72 if (m_mpiInfo->size == 1) {
73 d0=1;
74 d1=1;
75 }
76
77 bool warn=false;
78 std::vector<int> factors;
79 int ranks = m_mpiInfo->size;
80 dim_t epr[2] = {n0,n1};
81 int d[2] = {d0,d1};
82 if (d0<=0 || d1<=0) {
83 for (int i = 0; i < 2; i++) {
84 if (d[i] < 1) {
85 d[i] = 1;
86 continue;
87 }
88 epr[i] = -1; // can no longer be max
89 //remove
90 if (ranks % d[i] != 0) {
91 throw SpeckleyException("Invalid number of spatial subdivisions");
92 }
93 ranks /= d[i];
94 }
95 factorise(factors, ranks);
96 if (factors.size() != 0) {
97 warn = true;
98 }
99 }
100
101 while (factors.size() > 0) {
102 int i = epr[0] > epr[1] ? 0 : 1;
103 int f = factors.back();
104 factors.pop_back();
105 d[i] *= f;
106 epr[i] /= f;
107 }
108 d0 = d[0]; d1 = d[1];
109
110 // ensure number of subdivisions is valid and nodes can be distributed
111 // among number of ranks
112 if (d0*d1 != m_mpiInfo->size)
113 throw SpeckleyException("Invalid number of spatial subdivisions");
114
115 if (warn) {
116 std::cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
117 << d1 << "). This may not be optimal!" << std::endl;
118 }
119
120 double l0 = x1-x0;
121 double l1 = y1-y0;
122 m_dx[0] = l0/n0;
123 m_dx[1] = l1/n1;
124
125 if (n0 % d0 > 0) {
126 n0 += d0 - (n0 % d0);
127 l0 = m_dx[0]*n0;
128 std::cout << "Warning: Adjusted number of elements and length. N0="
129 << n0 << ", l0=" << l0 << std::endl;
130 }
131 if (n1 % d1 > 0) {
132 n1 += d1 - (n1 % d1);
133 l1 = m_dx[1]*n1;
134 std::cout << "Warning: Adjusted number of elements and length. N1="
135 << n1 << ", l1=" << l1 << std::endl;
136 }
137
138 if (n0/d0 < 2 || n1/d1 < 2)
139 throw SpeckleyException("Too few elements for the number of ranks");
140
141 m_gNE[0] = n0;
142 m_gNE[1] = n1;
143 m_origin[0] = x0;
144 m_origin[1] = y0;
145 m_length[0] = l0;
146 m_length[1] = l1;
147 m_NX[0] = d0;
148 m_NX[1] = d1;
149
150 // local number of elements
151 m_NE[0] = m_gNE[0] / d0;
152 m_NE[1] = m_gNE[1] / d1;
153
154 // local number of nodes
155 m_NN[0] = m_NE[0]*m_order+1;
156 m_NN[1] = m_NE[1]*m_order+1;
157
158 // bottom-left node is at (offset0,offset1) in global mesh
159 m_offset[0] = n0/d0*(m_mpiInfo->rank%d0);
160 m_offset[1] = n1/d1*(m_mpiInfo->rank/d0);
161
162 populateSampleIds();
163
164 for (TagMap::const_iterator i = tagnamestonums.begin();
165 i != tagnamestonums.end(); i++) {
166 setTagMap(i->first, i->second);
167 }
168 addPoints(points, tags);
169
170
171 #ifdef USE_RIPLEY
172 coupler = NULL;
173 #endif
174 }
175
176 Rectangle::~Rectangle()
177 {
178 #ifdef USE_RIPLEY
179 if (coupler != NULL)
180 delete coupler;
181 #endif
182 }
183
184 std::string Rectangle::getDescription() const
185 {
186 return "speckley::Rectangle";
187 }
188
189 bool Rectangle::operator==(const AbstractDomain& other) const
190 {
191 const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
192 if (o) {
193 return (SpeckleyDomain::operator==(other) &&
194 m_order == o->m_order &&
195 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1]
196 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1]
197 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1]
198 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1]);
199 }
200
201 return false;
202 }
203
204 void Rectangle::readNcGrid(escript::Data& out, std::string filename,
205 std::string varname, const ReaderParameters& params) const
206 {
207 #ifdef USE_NETCDF
208 // check destination function space
209 dim_t myN0, myN1;
210 if (out.getFunctionSpace().getTypeCode() == Nodes) {
211 myN0 = m_NE[0] + 1;
212 myN1 = m_NE[1] + 1;
213 // } else if (out.getFunctionSpace().getTypeCode() == Elements) {
214 // myN0 = m_NE[0];
215 // myN1 = m_NE[1];
216 } else
217 throw SpeckleyException("readNcGrid(): invalid function space for output data object");
218
219 if (params.first.size() != 2)
220 throw SpeckleyException("readNcGrid(): argument 'first' must have 2 entries");
221
222 if (params.numValues.size() != 2)
223 throw SpeckleyException("readNcGrid(): argument 'numValues' must have 2 entries");
224
225 if (params.multiplier.size() != 2)
226 throw SpeckleyException("readNcGrid(): argument 'multiplier' must have 2 entries");
227 for (size_t i=0; i<params.multiplier.size(); i++)
228 if (params.multiplier[i]<1)
229 throw SpeckleyException("readNcGrid(): all multipliers must be positive");
230 if (params.reverse.size() != 2)
231 throw SpeckleyException("readNcGrid(): argument 'reverse' must have 2 entries");
232
233 // check file existence and size
234 NcFile f(filename.c_str(), NcFile::ReadOnly);
235 if (!f.is_valid())
236 throw SpeckleyException("readNcGrid(): cannot open file");
237
238 NcVar* var = f.get_var(varname.c_str());
239 if (!var)
240 throw SpeckleyException("readNcGrid(): invalid variable");
241
242 // TODO: rank>0 data support
243 const int numComp = out.getDataPointSize();
244 if (numComp > 1)
245 throw SpeckleyException("readNcGrid(): only scalar data supported");
246
247 const int dims = var->num_dims();
248 boost::scoped_array<long> edges(var->edges());
249
250 // is this a slice of the data object (dims!=2)?
251 // note the expected ordering of edges (as in numpy: y,x)
252 if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1]))
253 || (dims==1 && params.numValues[1]>1) ) {
254 throw SpeckleyException("readNcGrid(): not enough data in file");
255 }
256
257 // check if this rank contributes anything
258 if (params.first[0] >= m_offset[0]+myN0 ||
259 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
260 params.first[1] >= m_offset[1]+myN1 ||
261 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1])
262 return;
263
264 // now determine how much this rank has to write
265
266 // first coordinates in data object to write to
267 const dim_t first0 = std::max(dim_t(0), params.first[0]-m_offset[0]);
268 const dim_t first1 = std::max(dim_t(0), params.first[1]-m_offset[1]);
269 // indices to first value in file (not accounting for reverse yet)
270 dim_t idx0 = std::max(dim_t(0), m_offset[0]-params.first[0]);
271 dim_t idx1 = std::max(dim_t(0), m_offset[1]-params.first[1]);
272 // number of values to read
273 const dim_t num0 = std::min(params.numValues[0]-idx0, myN0-first0);
274 const dim_t num1 = std::min(params.numValues[1]-idx1, myN1-first1);
275
276 // make sure we read the right block if going backwards through file
277 if (params.reverse[0])
278 idx0 = edges[dims-1]-num0-idx0;
279 if (dims>1 && params.reverse[1])
280 idx1 = edges[dims-2]-num1-idx1;
281
282 std::vector<double> values(num0*num1);
283 if (dims==2) {
284 var->set_cur(idx1, idx0);
285 var->get(&values[0], num1, num0);
286 } else {
287 var->set_cur(idx0);
288 var->get(&values[0], num0);
289 }
290
291 const int dpp = out.getNumDataPointsPerSample();
292 out.requireWrite();
293
294 // helpers for reversing
295 const dim_t x0 = (params.reverse[0] ? num0-1 : 0);
296 const int x_mult = (params.reverse[0] ? -1 : 1);
297 const dim_t y0 = (params.reverse[1] ? num1-1 : 0);
298 const int y_mult = (params.reverse[1] ? -1 : 1);
299
300 for (index_t y=0; y<num1; y++) {
301 #pragma omp parallel for
302 for (index_t x=0; x<num0; x++) {
303 const dim_t baseIndex = first0+x*params.multiplier[0]
304 +(first1+y*params.multiplier[1])*myN0;
305 const dim_t srcIndex = (y0+y_mult*y)*num0+(x0+x_mult*x);
306 if (!isnan(values[srcIndex])) {
307 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
308 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
309 const dim_t dataIndex = baseIndex+m0+m1*myN0;
310 double* dest = out.getSampleDataRW(dataIndex);
311 for (index_t q=0; q<dpp; q++) {
312 *dest++ = values[srcIndex];
313 }
314 }
315 }
316 }
317 }
318 }
319 #else
320 throw SpeckleyException("readNcGrid(): not compiled with netCDF support");
321 #endif
322 }
323
324 void Rectangle::readBinaryGrid(escript::Data& out, std::string filename,
325 const ReaderParameters& params) const
326 {
327 // the mapping is not universally correct but should work on our
328 // supported platforms
329 switch (params.dataType) {
330 case DATATYPE_INT32:
331 readBinaryGridImpl<int>(out, filename, params);
332 break;
333 case DATATYPE_FLOAT32:
334 readBinaryGridImpl<float>(out, filename, params);
335 break;
336 case DATATYPE_FLOAT64:
337 readBinaryGridImpl<double>(out, filename, params);
338 break;
339 default:
340 throw SpeckleyException("readBinaryGrid(): invalid or unsupported datatype");
341 }
342 }
343
344 #ifdef USE_BOOSTIO
345 void Rectangle::readBinaryGridFromZipped(escript::Data& out, std::string filename,
346 const ReaderParameters& params) const
347 {
348 // the mapping is not universally correct but should work on our
349 // supported platforms
350 switch (params.dataType) {
351 case DATATYPE_INT32:
352 readBinaryGridZippedImpl<int>(out, filename, params);
353 break;
354 case DATATYPE_FLOAT32:
355 readBinaryGridZippedImpl<float>(out, filename, params);
356 break;
357 case DATATYPE_FLOAT64:
358 readBinaryGridZippedImpl<double>(out, filename, params);
359 break;
360 default:
361 throw SpeckleyException("readBinaryGridFromZipped(): invalid or unsupported datatype");
362 }
363 }
364 #endif
365
366 template<typename ValueType>
367 void Rectangle::readBinaryGridImpl(escript::Data& out, const std::string& filename,
368 const ReaderParameters& params) const
369 {
370 // check destination function space
371 dim_t myN0, myN1;
372 if (out.getFunctionSpace().getTypeCode() == Nodes) {
373 myN0 = m_NE[0] + 1;
374 myN1 = m_NE[1] + 1;
375 // } else if (out.getFunctionSpace().getTypeCode() == Elements) {
376 // myN0 = m_NE[0];
377 // myN1 = m_NE[1];
378 } else
379 throw SpeckleyException("readBinaryGrid(): invalid function space for output data object");
380
381 if (params.first.size() != 2)
382 throw SpeckleyException("readBinaryGrid(): argument 'first' must have 2 entries");
383
384 if (params.numValues.size() != 2)
385 throw SpeckleyException("readBinaryGrid(): argument 'numValues' must have 2 entries");
386
387 if (params.multiplier.size() != 2)
388 throw SpeckleyException("readBinaryGrid(): argument 'multiplier' must have 2 entries");
389 for (size_t i=0; i<params.multiplier.size(); i++)
390 if (params.multiplier[i]<1)
391 throw SpeckleyException("readBinaryGrid(): all multipliers must be positive");
392 if (params.reverse[0] != 0 || params.reverse[1] != 0)
393 throw SpeckleyException("readBinaryGrid(): reversing not supported yet");
394
395 // check file existence and size
396 std::ifstream f(filename.c_str(), std::ifstream::binary);
397 if (f.fail()) {
398 throw SpeckleyException("readBinaryGrid(): cannot open file");
399 }
400 f.seekg(0, std::ios::end);
401 const int numComp = out.getDataPointSize();
402 const dim_t filesize = f.tellg();
403 const dim_t reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
404 if (filesize < reqsize) {
405 f.close();
406 throw SpeckleyException("readBinaryGrid(): not enough data in file");
407 }
408
409 // check if this rank contributes anything
410 if (params.first[0] >= m_offset[0]+myN0 ||
411 params.first[0]+(params.numValues[0]*params.multiplier[0]) <= m_offset[0] ||
412 params.first[1] >= m_offset[1]+myN1 ||
413 params.first[1]+(params.numValues[1]*params.multiplier[1]) <= m_offset[1]) {
414 f.close();
415 return;
416 }
417
418 // now determine how much this rank has to write
419
420 // first coordinates in data object to write to
421 const dim_t first0 = std::max(dim_t(0), params.first[0]-m_offset[0]);
422 const dim_t first1 = std::max(dim_t(0), params.first[1]-m_offset[1]);
423 // indices to first value in file
424 const dim_t idx0 = std::max(dim_t(0), (m_offset[0]/params.multiplier[0])-params.first[0]);
425 const dim_t idx1 = std::max(dim_t(0), (m_offset[1]/params.multiplier[1])-params.first[1]);
426 // if restX > 0 the first value in the respective dimension has been
427 // written restX times already in a previous rank so this rank only
428 // contributes (multiplier-rank) copies of that value
429 const dim_t rest0 = m_offset[0]%params.multiplier[0];
430 const dim_t rest1 = m_offset[1]%params.multiplier[1];
431 // number of values to read
432 const dim_t num0 = std::min(params.numValues[0]-idx0, myN0-first0);
433 const dim_t num1 = std::min(params.numValues[1]-idx1, myN1-first1);
434
435 out.requireWrite();
436 std::vector<ValueType> values(num0*numComp);
437 const int dpp = out.getNumDataPointsPerSample();
438
439 for (dim_t y=0; y<num1; y++) {
440 const dim_t fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
441 f.seekg(fileofs*sizeof(ValueType));
442 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
443 const dim_t m1limit = (y==0 ? params.multiplier[1]-rest1 : params.multiplier[1]);
444 dim_t dataYbase = first1+y*params.multiplier[1];
445 if (y>0)
446 dataYbase -= rest1;
447 for (dim_t x=0; x<num0; x++) {
448 const dim_t m0limit = (x==0 ? params.multiplier[0]-rest0 : params.multiplier[0]);
449 dim_t dataXbase = first0+x*params.multiplier[0];
450 if (x>0)
451 dataXbase -= rest0;
452 // write a block of mult0 x mult1 identical values into Data object
453 for (dim_t m1=0; m1<m1limit; m1++) {
454 const dim_t dataY = dataYbase+m1;
455 if (dataY >= myN1)
456 break;
457 for (dim_t m0=0; m0<m0limit; m0++) {
458 const dim_t dataX = dataXbase+m0;
459 if (dataX >= myN0)
460 break;
461 const dim_t dataIndex = dataX+dataY*m_NN[0];
462 double* dest = out.getSampleDataRW(dataIndex*m_order);
463 for (int c=0; c<numComp; c++) {
464 ValueType val = values[x*numComp+c];
465 if (params.byteOrder != BYTEORDER_NATIVE) {
466 char* cval = reinterpret_cast<char*>(&val);
467 // this will alter val!!
468 if (sizeof(ValueType) > 4) {
469 byte_swap64(cval);
470 } else {
471 byte_swap32(cval);
472 }
473 }
474 if (!isnan(val)) {
475 for (int q=0; q<dpp; q++) {
476 *dest++ = static_cast<double>(val);
477 }
478 }
479 }
480 }
481 }
482 }
483 }
484
485 f.close();
486 interpolateFromCorners(out);
487 }
488
489 #ifdef USE_BOOSTIO
490 template<typename ValueType>
491 void Rectangle::readBinaryGridZippedImpl(escript::Data& out, const std::string& filename,
492 const ReaderParameters& params) const
493 {
494 // check destination function space
495 dim_t myN0, myN1;
496 if (out.getFunctionSpace().getTypeCode() == Nodes) {
497 myN0 = m_NE[0] + 1;
498 myN1 = m_NE[1] + 1;
499 // } else if (out.getFunctionSpace().getTypeCode() == Elements) {
500 // myN0 = m_NE[0];
501 // myN1 = m_NE[1];
502 } else
503 throw SpeckleyException("readBinaryGrid(): invalid function space for output data object");
504
505 // check file existence and size
506 std::ifstream f(filename.c_str(), std::ifstream::binary);
507 if (f.fail()) {
508 throw SpeckleyException(strerror(errno));//"readBinaryGridFromZipped(): cannot open file");
509 }
510 f.seekg(0, std::ios::end);
511 const int numComp = out.getDataPointSize();
512 dim_t filesize = f.tellg();
513 f.seekg(0, std::ios::beg);
514 std::vector<char> compressed(filesize);
515 f.read((char*)&compressed[0], filesize);
516 f.close();
517 std::vector<char> decompressed = unzip(compressed);
518 filesize = decompressed.size();
519 const dim_t reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
520 if (filesize < reqsize) {
521 throw SpeckleyException("readBinaryGridFromZipped(): not enough data in file");
522 }
523
524 // check if this rank contributes anything
525 if (params.first[0] >= m_offset[0]+myN0 ||
526 params.first[0]+(params.numValues[0]*params.multiplier[0]) <= m_offset[0] ||
527 params.first[1] >= m_offset[1]+myN1 ||
528 params.first[1]+(params.numValues[1]*params.multiplier[1]) <= m_offset[1]) {
529 return;
530 }
531
532 // now determine how much this rank has to write
533
534 // first coordinates in data object to write to
535 const dim_t first0 = std::max(dim_t(0), params.first[0]-m_offset[0]);
536 const dim_t first1 = std::max(dim_t(0), params.first[1]-m_offset[1]);
537 // indices to first value in file
538 const dim_t idx0 = std::max(dim_t(0), (m_offset[0]/params.multiplier[0])-params.first[0]);
539 const dim_t idx1 = std::max(dim_t(0), (m_offset[1]/params.multiplier[1])-params.first[1]);
540 // if restX > 0 the first value in the respective dimension has been
541 // written restX times already in a previous rank so this rank only
542 // contributes (multiplier-rank) copies of that value
543 const dim_t rest0 = m_offset[0]%params.multiplier[0];
544 const dim_t rest1 = m_offset[1]%params.multiplier[1];
545 // number of values to read
546 const dim_t num0 = std::min(params.numValues[0]-idx0, myN0-first0);
547 const dim_t num1 = std::min(params.numValues[1]-idx1, myN1-first1);
548
549 out.requireWrite();
550 std::vector<ValueType> values(num0*numComp);
551 const int dpp = out.getNumDataPointsPerSample();
552
553 for (dim_t y=0; y<num1; y++) {
554 const dim_t fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
555 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
556 const dim_t m1limit = (y==0 ? params.multiplier[1]-rest1 : params.multiplier[1]);
557 dim_t dataYbase = first1+y*params.multiplier[1];
558 if (y>0)
559 dataYbase -= rest1;
560 for (dim_t x=0; x<num0; x++) {
561 const dim_t m0limit = (x==0 ? params.multiplier[0]-rest0 : params.multiplier[0]);
562 dim_t dataXbase = first0+x*params.multiplier[0];
563 if (x>0)
564 dataXbase -= rest0;
565 // write a block of mult0 x mult1 identical values into Data object
566 for (dim_t m1=0; m1<m1limit; m1++) {
567 const dim_t dataY = dataYbase+m1;
568 if (dataY >= myN1)
569 break;
570 for (dim_t m0=0; m0<m0limit; m0++) {
571 const dim_t dataX = dataXbase+m0;
572 if (dataX >= myN0)
573 break;
574 const dim_t dataIndex = dataX+dataY*m_NN[0];
575 double* dest = out.getSampleDataRW(dataIndex*m_order);
576 for (int c=0; c<numComp; c++) {
577 ValueType val = values[x*numComp+c];
578
579 if (params.byteOrder != BYTEORDER_NATIVE) {
580 char* cval = reinterpret_cast<char*>(&val);
581 // this will alter val!!
582 if (sizeof(ValueType) > 4) {
583 byte_swap64(cval);
584 } else {
585 byte_swap32(cval);
586 }
587 }
588 if (!isnan(val)) {
589 for (int q=0; q<dpp; q++) {
590 *dest++ = static_cast<double>(val);
591 }
592 }
593 }
594 }
595 }
596 }
597 }
598 interpolateFromCorners(out);
599 }
600 #endif
601
602 void Rectangle::interpolateFromCorners(escript::Data &out) const
603 {
604 const int numComp = out.getDataPointSize();
605 //interpolate the missing portions
606 #pragma omp parallel for
607 for (dim_t y = 0; y < m_NN[1]; y++) {
608 for (dim_t x = 0; x < m_NN[0]; x++) {
609 //skip the points we have values for
610 if (y % m_order == 0 && x % m_order == 0)
611 continue;
612 //point location in element: x,y
613 const double px = point_locations[m_order-2][x%m_order];
614 const double py = point_locations[m_order-2][y%m_order];
615
616 double *point = out.getSampleDataRW(INDEX2(x, y, m_NN[0]));
617
618 const dim_t left = x - x%m_order;
619 const dim_t right = left < m_NN[0] - 1 ? left + m_order : left;
620 const dim_t front = y - y%m_order;
621 const dim_t back = front < m_NN[1] - 1 ? front + m_order : front;
622
623
624 const double *lowleft = out.getSampleDataRO(
625 INDEX2(left, front, m_NN[0]));
626 const double *lowright = out.getSampleDataRO(
627 INDEX2(right, front, m_NN[0]));
628 const double *highleft = out.getSampleDataRO(
629 INDEX2(left, back, m_NN[0]));
630 const double *highright = out.getSampleDataRO(
631 INDEX2(right, back, m_NN[0]));
632
633 for (int comp = 0; comp < numComp; comp++) {
634 point[comp] = highright[comp]*px*py
635 + highleft[comp]*(1-px)*py
636 + lowright[comp]*px*(1-py)
637 + lowleft[comp]*(1-px)*(1-py);
638 }
639 }
640 }
641 }
642
643 void Rectangle::writeBinaryGrid(const escript::Data& in, std::string filename,
644 int byteOrder, int dataType) const
645 {
646 // the mapping is not universally correct but should work on our
647 // supported platforms
648 switch (dataType) {
649 case DATATYPE_INT32:
650 writeBinaryGridImpl<int>(in, filename, byteOrder);
651 break;
652 case DATATYPE_FLOAT32:
653 writeBinaryGridImpl<float>(in, filename, byteOrder);
654 break;
655 case DATATYPE_FLOAT64:
656 writeBinaryGridImpl<double>(in, filename, byteOrder);
657 break;
658 default:
659 throw SpeckleyException("writeBinaryGrid(): invalid or unsupported datatype");
660 }
661 }
662
663 template<typename ValueType>
664 void Rectangle::writeBinaryGridImpl(const escript::Data& in,
665 const std::string& filename, int byteOrder) const
666 {
667 // check function space and determine number of points
668 dim_t myN0, myN1;
669 dim_t totalN0, totalN1;
670 if (in.getFunctionSpace().getTypeCode() == Nodes) {
671 myN0 = m_NE[0]+1;
672 myN1 = m_NE[1]+1;
673 totalN0 = m_gNE[0]+1;
674 totalN1 = m_gNE[1]+1;
675 // } else if (in.getFunctionSpace().getTypeCode() == Elements) {
676 // myN0 = m_NE[0];
677 // myN1 = m_NE[1];
678 // totalN0 = m_gNE[0];
679 // totalN1 = m_gNE[1];
680 } else
681 throw SpeckleyException("writeBinaryGrid(): invalid function space of data object");
682
683 const int numComp = in.getDataPointSize();
684 const int dpp = in.getNumDataPointsPerSample();
685
686 if (numComp > 1 || dpp > 1)
687 throw SpeckleyException("writeBinaryGrid(): only scalar, single-value data supported");
688
689 const dim_t fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1;
690
691 // from here on we know that each sample consists of one value
692 FileWriter fw;
693 fw.openFile(filename, fileSize);
694 MPIBarrier();
695
696 for (index_t y=0; y<myN1; y++) {
697 const dim_t fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0)*sizeof(ValueType);
698 std::ostringstream oss;
699
700 for (index_t x=0; x<myN0; x++) {
701 const double* sample = in.getSampleDataRO((y*m_NN[0]+x)*m_order);
702 ValueType fvalue = static_cast<ValueType>(*sample);
703 if (byteOrder == BYTEORDER_NATIVE) {
704 oss.write((char*)&fvalue, sizeof(fvalue));
705 } else {
706 char* value = reinterpret_cast<char*>(&fvalue);
707 if (sizeof(fvalue)>4) {
708 byte_swap64(value);
709 } else {
710 byte_swap32(value);
711 }
712 oss.write(value, sizeof(fvalue));
713 }
714 }
715 fw.writeAt(oss, fileofs);
716 }
717 fw.close();
718 }
719
720 void Rectangle::write(const std::string& filename) const
721 {
722 throw SpeckleyException("write: not supported");
723 }
724
725 void Rectangle::dump(const std::string& fileName) const
726 {
727 #if USE_SILO
728 std::string fn(fileName);
729 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
730 fn+=".silo";
731 }
732
733 int driver=DB_HDF5;
734 DBfile* dbfile = NULL;
735 const char* blockDirFmt = "/block%04d";
736
737 #ifdef ESYS_MPI
738 PMPIO_baton_t* baton = NULL;
739 const int NUM_SILO_FILES = 1;
740 #endif
741
742 if (m_mpiInfo->size > 1) {
743 #ifdef ESYS_MPI
744 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
745 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
746 PMPIO_DefaultClose, (void*)&driver);
747 // try the fallback driver in case of error
748 if (!baton && driver != DB_PDB) {
749 driver = DB_PDB;
750 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
751 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
752 PMPIO_DefaultClose, (void*)&driver);
753 }
754 if (baton) {
755 char siloPath[64];
756 snprintf(siloPath, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
757 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath);
758 }
759 #endif
760 } else {
761 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
762 getDescription().c_str(), driver);
763 // try the fallback driver in case of error
764 if (!dbfile && driver != DB_PDB) {
765 driver = DB_PDB;
766 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
767 getDescription().c_str(), driver);
768 }
769 char siloPath[64];
770 snprintf(siloPath, 64, blockDirFmt, 0);
771 DBMkDir(dbfile, siloPath);
772 DBSetDir(dbfile, siloPath);
773 }
774
775 if (!dbfile)
776 throw SpeckleyException("dump: Could not create Silo file");
777
778 /*
779 if (driver==DB_HDF5) {
780 // gzip level 1 already provides good compression with minimal
781 // performance penalty. Some tests showed that gzip levels >3 performed
782 // rather badly on escript data both in terms of time and space
783 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
784 }
785 */
786
787 const dim_t NN0 = m_NN[0];
788 const dim_t NN1 = m_NN[1];
789 boost::scoped_ptr<double> x(new double[NN0]);
790 boost::scoped_ptr<double> y(new double[NN1]);
791 double* coords[2] = { x.get(), y.get() };
792 #pragma omp parallel
793 {
794 #pragma omp for nowait
795 for (dim_t i0 = 0; i0 < NN0; i0++) {
796 coords[0][i0]=getLocalCoordinate(i0, 0);
797 }
798 #pragma omp for nowait
799 for (dim_t i1 = 0; i1 < NN1; i1++) {
800 coords[1][i1]=getLocalCoordinate(i1, 1);
801 }
802 }
803 std::vector<int> dims(m_NN, m_NN+2);
804
805 // write mesh
806 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
807 DB_COLLINEAR, NULL);
808
809 // write node ids
810 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
811 NULL, 0, DB_INT, DB_NODECENT, NULL);
812
813 // write element ids
814 dims.assign(m_NE, m_NE+2);
815 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
816 &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
817
818 // rank 0 writes multimesh and multivar
819 if (m_mpiInfo->rank == 0) {
820 std::vector<std::string> tempstrings;
821 std::vector<char*> names;
822 for (dim_t i=0; i<m_mpiInfo->size; i++) {
823 std::stringstream path;
824 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/mesh";
825 tempstrings.push_back(path.str());
826 names.push_back((char*)tempstrings.back().c_str());
827 }
828 std::vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
829 DBSetDir(dbfile, "/");
830 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
831 &types[0], NULL);
832 tempstrings.clear();
833 names.clear();
834 for (dim_t i=0; i<m_mpiInfo->size; i++) {
835 std::stringstream path;
836 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/nodeId";
837 tempstrings.push_back(path.str());
838 names.push_back((char*)tempstrings.back().c_str());
839 }
840 types.assign(m_mpiInfo->size, DB_QUADVAR);
841 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
842 &types[0], NULL);
843 tempstrings.clear();
844 names.clear();
845 for (dim_t i=0; i<m_mpiInfo->size; i++) {
846 std::stringstream path;
847 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/elementId";
848 tempstrings.push_back(path.str());
849 names.push_back((char*)tempstrings.back().c_str());
850 }
851 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
852 &types[0], NULL);
853 }
854
855 if (m_mpiInfo->size > 1) {
856 #ifdef ESYS_MPI
857 PMPIO_HandOffBaton(baton, dbfile);
858 PMPIO_Finish(baton);
859 #endif
860 } else {
861 DBClose(dbfile);
862 }
863
864 #else // USE_SILO
865 throw SpeckleyException("dump: no Silo support");
866 #endif
867 }
868
869 const dim_t* Rectangle::borrowSampleReferenceIDs(int fsType) const
870 {
871 switch (fsType) {
872 case DegreesOfFreedom:
873 case Nodes:
874 return &m_nodeId[0];
875 case Elements:
876 case ReducedElements:
877 return &m_elementId[0];
878 case Points:
879 return &m_diracPointNodeIDs[0];
880 default:
881 break;
882 }
883
884 std::stringstream msg;
885 msg << "borrowSampleReferenceIDs: invalid function space type " << fsType;
886 throw SpeckleyException(msg.str());
887 }
888
889 bool Rectangle::ownSample(int fsType, index_t id) const
890 {
891 throw SpeckleyException("ownSample not implemented");
892 }
893
894 void Rectangle::setToNormal(escript::Data& out) const
895 {
896 throw SpeckleyException("setToNormal not implemented");
897 }
898
899 void Rectangle::setToSize(escript::Data& out) const
900 {
901 if (out.getFunctionSpace().getTypeCode() == Elements) {
902 out.requireWrite();
903 const dim_t numQuad = out.getNumDataPointsPerSample();
904 const dim_t numElements = getNumElements();
905 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]);
906 #pragma omp parallel for
907 for (index_t k = 0; k < numElements; ++k) {
908 double* o = out.getSampleDataRW(k);
909 std::fill(o, o+numQuad, size);
910 }
911 } else {
912 std::stringstream msg;
913 msg << "setToSize: invalid function space type "
914 << out.getFunctionSpace().getTypeCode();
915 throw SpeckleyException(msg.str());
916 }
917 }
918
919 void Rectangle::Print_Mesh_Info(const bool full) const
920 {
921 SpeckleyDomain::Print_Mesh_Info(full);
922 if (full) {
923 std::cout << " Id Coordinates" << std::endl;
924 std::cout.precision(15);
925 std::cout.setf(std::ios::scientific, std::ios::floatfield);
926 for (index_t i=0; i < getNumNodes(); i++) {
927 std::cout << " " << std::setw(5) << m_nodeId[i]
928 << " " << getLocalCoordinate(i%m_NN[0], 0)
929 << " " << getLocalCoordinate(i/m_NN[0], 1) << std::endl;
930 }
931 }
932 }
933
934
935 //protected
936 void Rectangle::assembleCoordinates(escript::Data& arg) const
937 {
938 escriptDataC x = arg.getDataC();
939 int numDim = m_numDim;
940 if (!isDataPointShapeEqual(&x, 1, &numDim))
941 throw SpeckleyException("setToX: Invalid Data object shape");
942 if (!numSamplesEqual(&x, 1, getNumNodes()))
943 throw SpeckleyException("setToX: Illegal number of samples in Data object");
944
945 const dim_t NN0 = m_NN[0];
946 const dim_t NN1 = m_NN[1];
947 arg.requireWrite();
948 #pragma omp parallel for
949 for (dim_t y = 0; y < NN1; y++) {
950 for (dim_t x = 0; x < NN0; x++) {
951 double *point = arg.getSampleDataRW(y*NN0 + x);
952 point[0] = getLocalCoordinate(x, 0);
953 point[1] = getLocalCoordinate(y, 1);
954 }
955 }
956 }
957
958 //protected
959 void Rectangle::assembleGradient(escript::Data& out, const escript::Data& in) const
960 {
961 escript::Data converted;
962
963 if (in.getFunctionSpace().getTypeCode() != Elements) {
964 converted = escript::Data(in, escript::function(*this));
965 } else {
966 converted = in;
967 }
968
969 if (m_order == 2) {
970 gradient_order2(out,converted);
971 } else if (m_order == 3) {
972 gradient_order3(out,converted);
973 } else if (m_order == 4) {
974 gradient_order4(out,converted);
975 } else if (m_order == 5) {
976 gradient_order5(out,converted);
977 } else if (m_order == 6) {
978 gradient_order6(out,converted);
979 } else if (m_order == 7) {
980 gradient_order7(out,converted);
981 } else if (m_order == 8) {
982 gradient_order8(out,converted);
983 } else if (m_order == 9) {
984 gradient_order9(out,converted);
985 } else if (m_order == 10) {
986 gradient_order10(out,converted);
987 }
988 }
989
990 //protected
991 void Rectangle::assembleIntegrate(std::vector<double>& integrals,
992 const escript::Data& arg) const
993 {
994 const int fs = arg.getFunctionSpace().getTypeCode();
995 if (fs != Elements)
996 throw new SpeckleyException("Speckley doesn't currently support integrals of non-Element functionspaces");
997 if (!arg.actsExpanded())
998 throw new SpeckleyException("Speckley doesn't currently support unexpanded data");
999
1000 if (m_order == 2) {
1001 integral_order2(integrals, arg);
1002 } else if (m_order == 3) {
1003 integral_order3(integrals, arg);
1004 } else if (m_order == 4) {
1005 integral_order4(integrals, arg);
1006 } else if (m_order == 5) {
1007 integral_order5(integrals, arg);
1008 } else if (m_order == 6) {
1009 integral_order6(integrals, arg);
1010 } else if (m_order == 7) {
1011 integral_order7(integrals, arg);
1012 } else if (m_order == 8) {
1013 integral_order8(integrals, arg);
1014 } else if (m_order == 9) {
1015 integral_order9(integrals, arg);
1016 } else if (m_order == 10) {
1017 integral_order10(integrals, arg);
1018 }
1019 }
1020
1021 /* This is a wrapper for filtered (and non-filtered) randoms
1022 * For detailed doco see randomFillWorker
1023 */
1024 escript::Data Rectangle::randomFill(const escript::DataTypes::ShapeType& shape,
1025 const escript::FunctionSpace& fs,
1026 long seed, const boost::python::tuple& filter) const {
1027 int numvals=escript::DataTypes::noValues(shape);
1028 int per_element = (m_order+1)*(m_order+1)*numvals;
1029 if (len(filter)>0) {
1030 throw SpeckleyException("Speckley does not support filters.");
1031 }
1032
1033 double* src=new double[m_NE[0]*m_NE[1]*per_element*numvals];
1034 esysUtils::randomFillArray(seed, src, m_NE[0]*m_NE[1]*per_element);
1035 escript::Data res(0, shape, escript::function(*this), true);
1036 int current = 0;
1037 for (int ei = 0; ei < m_NE[1]; ++ei) {
1038 for (int ej = 0; ej < m_NE[0]; ++ej) {
1039 double *e = res.getSampleDataRW(INDEX2(ej,ei,m_NE[0]));
1040 memcpy(e, &src[current], sizeof(double)*per_element);
1041 current += per_element;
1042 }
1043 }
1044 if (res.getFunctionSpace() != fs) {
1045 return escript::Data(res, fs);
1046 }
1047 return res;
1048 }
1049
1050 //private
1051 void Rectangle::populateSampleIds()
1052 {
1053 // degrees of freedom are numbered from left to right, bottom to top in
1054 // each rank, continuing on the next rank (ranks also go left-right,
1055 // bottom-top).
1056 // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which
1057 // helps when writing out data rank after rank.
1058
1059 // build node distribution vector first.
1060 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is
1061 // constant for all ranks in this implementation
1062 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1063 const index_t left = (m_offset[0]==0 ? 0 : 1);
1064 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1065 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1066 index_t rank_left = (k-1)%m_NX[0] == 0 ? 0 : 1;
1067 index_t rank_bottom = (k-1)/m_NX[0] == 0 ? 0 : 1;
1068 m_nodeDistribution[k] = m_nodeDistribution[k-1]
1069 + (m_NN[0]-rank_left)*(m_NN[1]-rank_bottom);
1070 }
1071 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1072 try {
1073 m_nodeId.resize(getNumNodes());
1074 m_elementId.resize(getNumElements());
1075 } catch (const std::length_error& le) {
1076 throw SpeckleyException("The system does not have sufficient memory for a domain of this size.");
1077 }
1078
1079 // populate face element counts
1080 //left
1081 if (m_offset[0]==0)
1082 m_faceCount[0]=m_NE[1];
1083 else
1084 m_faceCount[0]=0;
1085 //right
1086 if (m_mpiInfo->rank%m_NX[0]==m_NX[0]-1)
1087 m_faceCount[1]=m_NE[1];
1088 else
1089 m_faceCount[1]=0;
1090 //bottom
1091 if (m_offset[1]==0)
1092 m_faceCount[2]=m_NE[0];
1093 else
1094 m_faceCount[2]=0;
1095 //top
1096 if (m_mpiInfo->rank/m_NX[0]==m_NX[1]-1)
1097 m_faceCount[3]=m_NE[0];
1098 else
1099 m_faceCount[3]=0;
1100
1101
1102 if (bottom && left) {
1103 //get lower-left node
1104 m_nodeId[0] = m_nodeDistribution[m_mpiInfo->rank - m_NX[0]] - 1;
1105 }
1106 if (bottom) {
1107 //DOF size of the neighbouring rank
1108 index_t rankDOF = m_nodeDistribution[m_mpiInfo->rank - m_NX[0] + 1]
1109 - m_nodeDistribution[m_mpiInfo->rank - m_NX[0]];
1110 //beginning of last row of neighbouring rank
1111 index_t begin = m_nodeDistribution[m_mpiInfo->rank - m_NX[0]]
1112 + rankDOF - m_NN[0];
1113 for (index_t x = left; x < m_NN[0]; x++) {
1114 m_nodeId[x] = begin + x;
1115 }
1116 }
1117 if (left) {
1118 //is the rank to the left itself right of another rank
1119 index_t rank_left = (m_mpiInfo->rank - 1) % m_NX[0] == 0 ? 0 : 1;
1120 //end of first owned row of neighbouring rank
1121 index_t end = m_nodeDistribution[m_mpiInfo->rank - 1]
1122 + m_NN[0] - rank_left - 1;
1123 for (index_t y = bottom; y < m_NN[1]; y++) {
1124 m_nodeId[y*m_NN[0]] = end + (y-bottom)*(m_NN[0]-rank_left);
1125 }
1126 }
1127
1128 #pragma omp parallel for
1129 for (index_t y = bottom; y < m_NN[1]; y++) {
1130 for (index_t x = left; x < m_NN[0]; x++) {
1131 m_nodeId[y*m_NN[0]+x] = m_nodeDistribution[m_mpiInfo->rank] + (y-bottom)*(m_NN[0]-left) + (x-left);
1132 }
1133 }
1134
1135 m_nodeTags.assign(getNumNodes(), 0);
1136 updateTagsInUse(Nodes);
1137
1138 m_elementTags.assign(getNumElements(), 0);
1139 updateTagsInUse(Elements);
1140 }
1141
1142 //private
1143 void Rectangle::addToMatrixAndRHS(escript::AbstractSystemMatrix* S, escript::Data& F,
1144 const std::vector<double>& EM_S, const std::vector<double>& EM_F, bool addS,
1145 bool addF, index_t firstNode, int nEq, int nComp) const
1146 {
1147 throw SpeckleyException("Rectangle::addToMatrixAndRHS, adding to matrix not supported");
1148 }
1149
1150 void Rectangle::reduceElements(escript::Data& out, const escript::Data& in) const
1151 {
1152 if (m_order == 2) {
1153 reduction_order2(in, out);
1154 } else if (m_order == 3) {
1155 reduction_order3(in, out);
1156 } else if (m_order == 4) {
1157 reduction_order4(in, out);
1158 } else if (m_order == 5) {
1159 reduction_order5(in, out);
1160 } else if (m_order == 6) {
1161 reduction_order6(in, out);
1162 } else if (m_order == 7) {
1163 reduction_order7(in, out);
1164 } else if (m_order == 8) {
1165 reduction_order8(in, out);
1166 } else if (m_order == 9) {
1167 reduction_order9(in, out);
1168 } else if (m_order == 10) {
1169 reduction_order10(in, out);
1170 }
1171 }
1172
1173 //protected
1174 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1175 const escript::Data& in,
1176 bool reduced) const
1177 {
1178 const dim_t numComp = in.getDataPointSize();
1179 const dim_t NE0 = m_NE[0];
1180 const dim_t NE1 = m_NE[1];
1181 const int quads = m_order + 1;
1182 const int max_x = m_NN[0];
1183 out.requireWrite();
1184 if (reduced) { //going to ReducedElements
1185 escript::Data funcIn(in, escript::function(*this));
1186 reduceElements(out, funcIn);
1187 return;
1188 }
1189 #pragma omp parallel for
1190 for (dim_t ey = 0; ey < NE1; ey++) {
1191 for (dim_t ex = 0; ex < NE0; ex++) {
1192 double *e_out = out.getSampleDataRW(ex + ey*NE0);
1193 dim_t start = ex*m_order + ey*max_x*m_order;
1194 int quad = 0;
1195 for (int qy = 0; qy < quads; qy++) {
1196 for (int qx = 0; qx < quads; qx++, quad++) {
1197 const double *n_in = in.getSampleDataRO(start + max_x*qy + qx);
1198 memcpy(e_out+quad*numComp, n_in, sizeof(double) * numComp);
1199 }
1200 }
1201 }
1202 }
1203 }
1204
1205 #ifdef ESYS_MPI
1206 //protected
1207 void Rectangle::balanceNeighbours(escript::Data& data, bool average) const {
1208 if (m_NX[0] * m_NX[1] == 1) {
1209 return;
1210 }
1211 const int numComp = data.getDataPointSize();
1212 const int rx = m_mpiInfo->rank % m_NX[0];
1213 const int ry = m_mpiInfo->rank / m_NX[0];
1214 //include bordering ranks in summation
1215 if (m_NX[1] != 1)
1216 shareVertical(data, rx, ry);
1217 if (m_NX[0] != 1)
1218 shareSides(data, rx, ry);
1219 if (m_NX[0] != 1 && m_NX[1] != 1) {
1220 shareCorners(data, rx, ry);
1221 if (!average)
1222 return;
1223 //averaging out corners
1224 // bottom left
1225 if (rx && ry) {
1226 double *values = data.getSampleDataRW(0);
1227 for (int comp = 0; comp < numComp; comp++) {
1228 values[comp] /= 2;
1229 }
1230 }
1231 // bottom right
1232 if (rx < (m_NX[0] - 1) && ry) {
1233 double *values = data.getSampleDataRW(m_NN[0]-1);
1234 for (int comp = 0; comp < numComp; comp++) {
1235 values[comp] /= 2;
1236 }
1237 }
1238 // top left
1239 if (rx && ry < (m_NX[0] - 1)) {
1240 double *values = data.getSampleDataRW((m_NN[1]-1)*m_NN[0]);
1241 for (int comp = 0; comp < numComp; comp++) {
1242 values[comp] /= 2;
1243 }
1244 }
1245 // top right
1246 if (rx < (m_NX[0] - 1) && ry < (m_NX[0] - 1)) {
1247 double *values = data.getSampleDataRW(m_NN[1]*m_NN[0] - 1);
1248 for (int comp = 0; comp < numComp; comp++) {
1249 values[comp] /= 2;
1250 }
1251 }
1252 }
1253 if (!average)
1254 return;
1255 // average shared-edges in x and y
1256 //left
1257 if (rx) {
1258 #pragma omp parallel for
1259 for (dim_t qy = 0; qy < m_NN[1]; qy++) {
1260 double *values = data.getSampleDataRW(qy*m_NN[0]);
1261 for (int comp = 0; comp < numComp; comp++) {
1262 values[comp] /= 2;
1263 }
1264 }
1265 }
1266 //right
1267 if (rx < m_NX[0] - 1) {
1268 #pragma omp parallel for
1269 for (dim_t qy = 0; qy < m_NN[1]; qy++) {
1270 double *values = data.getSampleDataRW(qy*m_NN[0] + m_NN[0] - 1);
1271 for (int comp = 0; comp < numComp; comp++) {
1272 values[comp] /= 2;
1273 }
1274 }
1275 }
1276 //bottom
1277 if (ry) {
1278 #pragma omp parallel for
1279 for (dim_t qx = 0; qx < m_NN[0]; qx++) {
1280 double *values = data.getSampleDataRW(qx);
1281 for (int comp = 0; comp < numComp; comp++) {
1282 values[comp] /= 2;
1283 }
1284 }
1285 }
1286 //top
1287 if (ry < m_NX[1] - 1) {
1288 const dim_t start = (m_NN[1]-1)*m_NN[0];
1289 #pragma omp parallel for
1290 for (dim_t qx = 0; qx < m_NN[0]; qx++) {
1291 double *values = data.getSampleDataRW(start + qx);
1292 for (int comp = 0; comp < numComp; comp++) {
1293 values[comp] /= 2;
1294 }
1295 }
1296 }
1297 }
1298 #endif //#ifdef ESYS_MPI
1299
1300 //protected
1301 void Rectangle::interpolateElementsOnNodes(escript::Data& out,
1302 const escript::Data& in) const {
1303 const dim_t numComp = in.getDataPointSize();
1304 const dim_t NE0 = m_NE[0];
1305 const dim_t NE1 = m_NE[1];
1306 const int quads = m_order + 1;
1307 const dim_t max_x = (m_order*NE0) + 1;
1308 const dim_t max_y = (m_order*NE1) + 1;
1309 out.requireWrite();
1310 const int inFS = in.getFunctionSpace().getTypeCode();
1311 // the summation portion
1312 if (inFS == ReducedElements) {
1313 for (dim_t colouring = 0; colouring < 2; colouring++) {
1314 #pragma omp parallel for
1315 for (dim_t ey = colouring; ey < NE1; ey += 2) {
1316 for (dim_t ex = 0; ex < NE0; ex++) {
1317 dim_t start = ex*m_order + ey*max_x*m_order;
1318 const double *e_in = in.getSampleDataRO(ex + ey*NE0);
1319 for (int qy = 0; qy < quads; qy++) {
1320 for (int qx = 0; qx < quads; qx++) {
1321 double *n_out = out.getSampleDataRW(start + max_x*qy + qx);
1322 for (int comp = 0; comp < numComp; comp++) {
1323 n_out[comp] += e_in[comp];
1324 }
1325 }
1326 }
1327 }
1328 }
1329 }
1330 } else { //inFS == Elements
1331 for (dim_t colouring = 0; colouring < 2; colouring++) {
1332 #pragma omp parallel for
1333 for (dim_t ey = colouring; ey < NE1; ey += 2) {
1334 for (dim_t ex = 0; ex < NE0; ex++) {
1335 dim_t start = ex*m_order + ey*max_x*m_order;
1336 const double *e_in = in.getSampleDataRO(ex + ey*NE0);
1337 for (int qy = 0; qy < quads; qy++) {
1338 for (int qx = 0; qx < quads; qx++) {
1339 double *n_out = out.getSampleDataRW(start + max_x*qy + qx);
1340 for (int comp = 0; comp < numComp; comp++) {
1341 n_out[comp] += e_in[INDEX3(comp, qx, qy, numComp, quads)];
1342 }
1343 }
1344 }
1345 }
1346 }
1347 }
1348 }
1349 #ifdef ESYS_MPI
1350 //share and average shared edges/corners
1351 balanceNeighbours(out, true);
1352 #endif
1353 // for every internal edge in x
1354 #pragma omp parallel for
1355 for (dim_t qy = 0; qy < max_y; qy++) {
1356 for (dim_t qx = m_order; qx < max_x - m_order; qx += m_order) {
1357 double *n_out = out.getSampleDataRW(qx + qy*max_x);
1358 for (int comp = 0; comp < numComp; comp++) {
1359 n_out[comp] /= 2;
1360 }
1361 }
1362 }
1363
1364 // for every internal edge in y
1365 const dim_t order = m_order;
1366 #pragma omp parallel for
1367 for (dim_t qy = order; qy < max_y - order; qy += order) {
1368 for (dim_t qx = 0; qx < max_x; qx ++) {
1369 double *n_out = out.getSampleDataRW(qx + qy*max_x);
1370 for (int comp = 0; comp < numComp; comp++) {
1371 n_out[comp] /= 2;
1372 }
1373 }
1374 }
1375 }
1376
1377 #ifdef ESYS_MPI
1378 //private
1379 void Rectangle::shareCorners(escript::Data& out, int rx, int ry) const
1380 {
1381 //setup
1382 const int tag = 0;
1383 MPI_Status status;
1384 const int numComp = out.getDataPointSize();
1385 const int count = 4 * numComp;
1386 std::vector<double> outbuf(count, 0);
1387 std::vector<double> inbuf(count, 0);
1388 const int rank = m_mpiInfo->rank;
1389 //precalc bounds so we can loop nicely, can probably be cleaned up
1390 const bool conds[4] = {rx && ry, rx < (m_NX[0] - 1) && ry,
1391 rx && ry < (m_NX[1] - 1),rx < (m_NX[0] - 1) && ry < (m_NX[1] - 1)};
1392 const int ranks[4] = {rank-m_NX[0]-1,rank-m_NX[0]+1,
1393 rank+m_NX[0]-1,rank+m_NX[0]+1};
1394 //fill everything, regardless of whether we're sharing that corner or not
1395 for (int y = 0; y < 2; y++) {
1396 for (int x = 0; x < 2; x++) {
1397 const double *data = out.getSampleDataRO(x*(m_NN[0]-1) + y*(m_NN[1]-1)*m_NN[0]);
1398 std::copy(data, data + numComp, &outbuf[(x + 2*y)*numComp]);
1399 }
1400 }
1401
1402 //share
1403 for (int i = 0; i < 4; i++) {
1404 if (conds[i]) {
1405 MPI_Sendrecv(&outbuf[i], numComp, MPI_DOUBLE, ranks[i], tag,
1406 &inbuf[i], count, MPI_DOUBLE, ranks[i], tag,
1407 m_mpiInfo->comm, &status);
1408 }
1409 }
1410
1411 //unpack
1412 for (int y = 0; y < 2; y++) {
1413 for (int x = 0; x < 2; x++) {
1414 double *data = out.getSampleDataRW(x*(m_NN[0]-1) + y*(m_NN[1]-1)*m_NN[0]);
1415 for (int comp = 0; comp < numComp; comp++) {
1416 data[comp] += inbuf[(x + 2*y)*numComp + comp];
1417 }
1418 }
1419 }
1420 }
1421
1422 //private
1423 void Rectangle::shareVertical(escript::Data& out, int rx, int ry) const
1424 {
1425 const int tag = 0;
1426 MPI_Status status;
1427 const int numComp = out.getDataPointSize();
1428 const dim_t count = m_NN[0]*numComp;
1429 const int up_neighbour = m_mpiInfo->rank + m_NX[0];
1430 const int down_neighbour = m_mpiInfo->rank - m_NX[0];
1431 //allocate some space to recieve
1432 std::vector<double> recv(count);
1433 //get our sources
1434 double *top = out.getSampleDataRW((m_NN[1]-1) * m_NN[0]);
1435 double *bottom = out.getSampleDataRW(0);
1436
1437 if (ry % 2) {
1438 //send to up, read from up
1439 if (ry < m_NX[1] - 1) {
1440 MPI_Send(&top[0], count, MPI_DOUBLE, up_neighbour, tag,
1441 m_mpiInfo->comm);
1442 MPI_Recv(&recv[0], count, MPI_DOUBLE, up_neighbour, tag,
1443 m_mpiInfo->comm, &status);
1444 //unpack up
1445 for (dim_t i = 0; i < count; i++) {
1446 top[i] += recv[i];
1447 }
1448 }
1449 //send down, read down
1450 MPI_Send(&bottom[0], count, MPI_DOUBLE, down_neighbour, tag,
1451 m_mpiInfo->comm);
1452 MPI_Recv(&recv[0], count, MPI_DOUBLE, down_neighbour, tag,
1453 m_mpiInfo->comm, &status);
1454 //unpack bottom
1455 for (dim_t i = 0; i < count; i++) {
1456 bottom[i] += recv[i];
1457 }
1458 } else {
1459 //read down, send down
1460 if (ry) {
1461 MPI_Recv(&recv[0], count, MPI_DOUBLE, down_neighbour, tag,
1462 m_mpiInfo->comm, &status);
1463 MPI_Send(&bottom[0], count, MPI_DOUBLE, down_neighbour, tag,
1464 m_mpiInfo->comm);
1465 //unpack bottom
1466 for (dim_t i = 0; i < count; i++) {
1467 bottom[i] += recv[i];
1468 }
1469 }
1470
1471 //read up, send up
1472 if (ry < m_NX[1] - 1) {
1473 MPI_Recv(&recv[0], count, MPI_DOUBLE, up_neighbour, tag,
1474 m_mpiInfo->comm, &status);
1475 MPI_Send(&top[0], count, MPI_DOUBLE, up_neighbour, tag,
1476 m_mpiInfo->comm);
1477 //unpack up
1478 for (dim_t i = 0; i < count; i++) {
1479 top[i] += recv[i];
1480 }
1481 }
1482 }
1483 }
1484
1485 //private
1486 void Rectangle::shareSides(escript::Data& out, int rx, int ry) const
1487 {
1488 const int tag = 0;
1489 MPI_Status status;
1490 const int numComp = out.getDataPointSize();
1491 const dim_t count = m_NN[1]*numComp;
1492 const int left_neighbour = m_mpiInfo->rank - 1;
1493 const int right_neighbour = m_mpiInfo->rank + 1;
1494 //allocate some space
1495 std::vector<double> left(count,170);
1496 std::vector<double> right(count,17000);
1497 std::vector<double> recv(count,1700);
1498 //fill those values
1499 for (dim_t n = 0; n < m_NN[1]; n++) {
1500 index_t index = n*m_NN[0];
1501 const double *leftData = out.getSampleDataRO(index);
1502 std::copy(leftData, leftData + numComp, &left[n*numComp]);
1503 const double *rightData = out.getSampleDataRO(index+m_NN[0]-1);
1504 std::copy(rightData, rightData + numComp, &right[n*numComp]);
1505 }
1506
1507 if (rx % 2) {
1508 //send to right, read from right
1509 if (rx < m_NX[0] - 1) {
1510 MPI_Send(&right[0], count, MPI_DOUBLE, right_neighbour, tag,
1511 m_mpiInfo->comm);
1512 MPI_Recv(&recv[0], count, MPI_DOUBLE, right_neighbour, tag,
1513 m_mpiInfo->comm, &status);
1514 //unpack to right
1515 for (dim_t i = 0; i < m_NN[1]; i++) {
1516 double *data = out.getSampleDataRW((i + 1) * m_NN[0] - 1);
1517 for (int comp = 0; comp < numComp; comp++) {
1518 data[comp] += recv[i*numComp+comp];
1519 }
1520 }
1521 }
1522 //send left, read left
1523 MPI_Send(&left[0], count, MPI_DOUBLE, left_neighbour, tag,
1524 m_mpiInfo->comm);
1525 MPI_Recv(&recv[0], count, MPI_DOUBLE, left_neighbour, tag,
1526 m_mpiInfo->comm, &status);
1527 //unpack to left
1528 for (dim_t i = 0; i < m_NN[1]; i++) {
1529 double *data = out.getSampleDataRW(i*m_NN[0]);
1530 for (int comp = 0; comp < numComp; comp++) {
1531 data[comp] += recv[i*numComp+comp];
1532 }
1533 }
1534 } else {
1535 //read left, send left
1536 if (rx) {
1537 MPI_Recv(&recv[0], count, MPI_DOUBLE, left_neighbour, tag,
1538 m_mpiInfo->comm, &status);
1539 MPI_Send(&left[0], count, MPI_DOUBLE, left_neighbour, tag,
1540 m_mpiInfo->comm);
1541 //unpack to left
1542 for (dim_t i = 0; i < m_NN[1]; i++) {
1543 double *data = out.getSampleDataRW(i*m_NN[0]);
1544 for (int comp = 0; comp < numComp; comp++) {
1545 data[comp] += recv[i*numComp+comp];
1546 }
1547 }
1548 }
1549
1550 //read right, send right
1551 if (rx < m_NX[0] - 1) {
1552 MPI_Recv(&recv[0], count, MPI_DOUBLE, right_neighbour, tag,
1553 m_mpiInfo->comm, &status);
1554 MPI_Send(&right[0], count, MPI_DOUBLE, right_neighbour, tag,
1555 m_mpiInfo->comm);
1556 //unpack to right
1557 for (dim_t i = 0; i < m_NN[1]; i++) {
1558 double *data = out.getSampleDataRW((i + 1) * m_NN[0] - 1);
1559 for (int comp = 0; comp < numComp; comp++) {
1560 data[comp] += recv[i*numComp+comp];
1561 }
1562 }
1563 }
1564 }
1565 }
1566 #endif //#ifdef ESYS_MPI
1567
1568 dim_t Rectangle::findNode(const double *coords) const
1569 {
1570 const dim_t NOT_MINE = -1;
1571 //is the found element even owned by this rank
1572 for (int dim = 0; dim < m_numDim; dim++) {
1573 double min = m_origin[dim] + m_offset[dim]* m_dx[dim]
1574 - m_dx[dim]/2.; //allows for point outside mapping onto node
1575 double max = m_origin[dim] + (m_offset[dim] + m_NE[dim])*m_dx[dim]
1576 + m_dx[dim]/2.;
1577 if (min > coords[dim] || max < coords[dim]) {
1578 return NOT_MINE;
1579 }
1580 }
1581 // get distance from origin
1582 double x = coords[0] - m_origin[0];
1583 double y = coords[1] - m_origin[1];
1584
1585 //check if the point is even inside the domain
1586 if (x < 0 || y < 0 || x > m_length[0] || y > m_length[1])
1587 return NOT_MINE;
1588
1589 // trim to rank reference point
1590 x -= m_offset[0] * m_dx[0];
1591 y -= m_offset[1] * m_dx[1];
1592
1593 // distance in elements
1594 dim_t ex = (dim_t) floor((x + 0.01*m_dx[0]) / m_dx[0]);
1595 dim_t ey = (dim_t) floor((y + 0.01*m_dx[1]) / m_dx[1]);
1596 // set the min distance high enough to be outside the element plus a bit
1597 dim_t closest = NOT_MINE;
1598 double minDist = 1;
1599 for (int dim = 0; dim < m_numDim; dim++) {
1600 minDist += m_dx[dim]*m_dx[dim];
1601 }
1602 //find the closest node
1603 for (int dx = 0; dx < 2; dx++) {
1604 double xdist = x - (ex + dx)*m_dx[0];
1605 for (int dy = 0; dy < 2; dy++) {
1606 double ydist = y - (ey + dy)*m_dx[1];
1607 double total = xdist*xdist + ydist*ydist;
1608 if (total < minDist) {
1609 closest = (ex+dx)*m_order + (ey+dy)*m_order*m_NN[0];
1610 minDist = total;
1611 }
1612 }
1613 }
1614 //if this happens, we've let a dirac point slip through, which is awful
1615 if (closest == NOT_MINE) {
1616 throw SpeckleyException("Unable to map appropriate dirac point to a node,"
1617 " implementation problem in Rectangle::findNode()");
1618 }
1619 return closest;
1620 }
1621
1622 Assembler_ptr Rectangle::createAssembler(std::string type,
1623 const DataMap& options) const {
1624 if (type.compare("DefaultAssembler") == 0) {
1625 return Assembler_ptr(new DefaultAssembler2D(shared_from_this(), m_dx, m_NE, m_NN));
1626 }
1627 throw SpeckleyException("Speckley::Rectangle does not support the"
1628 " requested assembler");
1629 }
1630
1631 bool Rectangle::probeInterpolationAcross(int fsType_source,
1632 const escript::AbstractDomain& domain, int fsType_target) const
1633 {
1634 #ifdef USE_RIPLEY
1635 return speckley::probeInterpolationAcross(fsType_source, domain,
1636 fsType_target, 2);
1637 #else
1638 return false;
1639 #endif
1640 }
1641
1642 void Rectangle::interpolateAcross(escript::Data& target, const escript::Data& source) const
1643 {
1644 #ifdef USE_RIPLEY
1645 if (coupler == NULL) {
1646 coupler = new RipleyCoupler(this, m_dx, m_mpiInfo->rank);
1647 }
1648 coupler->interpolate(target, source);
1649 #else
1650 throw SpeckleyException("Speckley::Rectangle interpolation to unsupported domain");
1651 #endif
1652 }
1653
1654 } // end of namespace speckley
1655

  ViewVC Help
Powered by ViewVC 1.1.26