/[escript]/trunk/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /trunk/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6144 - (show annotations)
Wed Apr 6 05:25:13 2016 UTC (2 years, 7 months ago) by caltinay
File size: 100478 byte(s)
last round of namespacing defines.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <ripley/Rectangle.h>
18 #include <ripley/DefaultAssembler2D.h>
19 #include <ripley/LameAssembler2D.h>
20 #include <ripley/WaveAssembler2D.h>
21 #include <ripley/blocktools.h>
22 #include <ripley/domainhelpers.h>
23
24 #include <escript/FileWriter.h>
25 #include <escript/Random.h>
26
27 #include <paso/SystemMatrix.h>
28
29 #ifdef ESYS_HAVE_NETCDF
30 #include <netcdfcpp.h>
31 #endif
32
33 #ifdef ESYS_HAVE_SILO
34 #include <silo.h>
35 #ifdef ESYS_MPI
36 #include <pmpio.h>
37 #endif
38 #endif
39
40 #include <boost/math/special_functions/fpclassify.hpp> // for isnan
41 #include <boost/scoped_array.hpp>
42
43 #include <iomanip>
44 #include <limits>
45
46 namespace bp = boost::python;
47 namespace bm = boost::math;
48 using escript::AbstractSystemMatrix;
49 using escript::FileWriter;
50 using escript::IOError;
51 using escript::NotImplementedError;
52 using escript::ValueError;
53 using std::vector;
54 using std::string;
55 using std::min;
56 using std::max;
57 using std::copy;
58 using std::ios;
59 using std::fill;
60
61 namespace ripley {
62
63 Rectangle::Rectangle(dim_t n0, dim_t n1, double x0, double y0, double x1,
64 double y1, int d0, int d1,
65 const vector<double>& points,
66 const vector<int>& tags,
67 const TagMap& tagnamestonums,
68 escript::SubWorld_ptr w) :
69 RipleyDomain(2, w)
70 {
71 if (static_cast<long>(n0 + 1) * static_cast<long>(n1 + 1)
72 > std::numeric_limits<dim_t>::max())
73 throw RipleyException("The number of elements has overflowed, this "
74 "limit may be raised in future releases.");
75
76 if (n0 <= 0 || n1 <= 0)
77 throw ValueError("Number of elements in each spatial dimension "
78 "must be positive");
79
80 // ignore subdivision parameters for serial run
81 if (m_mpiInfo->size == 1) {
82 d0=1;
83 d1=1;
84 }
85
86 bool warn=false;
87 vector<int> factors;
88 int ranks = m_mpiInfo->size;
89 dim_t epr[2] = {n0,n1};
90 int d[2] = {d0,d1};
91 if (d0<=0 || d1<=0) {
92 for (int i = 0; i < 2; i++) {
93 if (d[i] < 1) {
94 d[i] = 1;
95 continue;
96 }
97 epr[i] = -1; // can no longer be max
98 //remove
99 if (ranks % d[i] != 0) {
100 throw ValueError("Invalid number of spatial subdivisions");
101 }
102 ranks /= d[i];
103 }
104 factorise(factors, ranks);
105 if (factors.size() != 0) {
106 warn = true;
107 }
108 }
109
110 while (factors.size() > 0) {
111 int i = epr[0] > epr[1] ? 0 : 1;
112 int f = factors.back();
113 factors.pop_back();
114 d[i] *= f;
115 epr[i] /= f;
116 }
117 d0 = d[0]; d1 = d[1];
118
119 // ensure number of subdivisions is valid and nodes can be distributed
120 // among number of ranks
121 if (d0*d1 != m_mpiInfo->size)
122 throw ValueError("Invalid number of spatial subdivisions");
123
124 if (warn) {
125 std::cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
126 << d1 << "). This may not be optimal!" << std::endl;
127 }
128
129 double l0 = x1-x0;
130 double l1 = y1-y0;
131 m_dx[0] = l0/n0;
132 m_dx[1] = l1/n1;
133
134 if ((n0+1)%d0 > 0) {
135 n0=(dim_t)round((float)(n0+1)/d0+0.5)*d0-1;
136 l0=m_dx[0]*n0;
137 std::cout << "Warning: Adjusted number of elements and length. N0="
138 << n0 << ", l0=" << l0 << std::endl;
139 }
140 if ((n1+1)%d1 > 0) {
141 n1=(dim_t)round((float)(n1+1)/d1+0.5)*d1-1;
142 l1=m_dx[1]*n1;
143 std::cout << "Warning: Adjusted number of elements and length. N1="
144 << n1 << ", l1=" << l1 << std::endl;
145 }
146
147 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2))
148 throw ValueError("Too few elements for the number of ranks");
149
150 m_gNE[0] = n0;
151 m_gNE[1] = n1;
152 m_origin[0] = x0;
153 m_origin[1] = y0;
154 m_length[0] = l0;
155 m_length[1] = l1;
156 m_NX[0] = d0;
157 m_NX[1] = d1;
158
159 // local number of elements (with and without overlap)
160 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
161 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
162 m_NE[0]++;
163 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
164 m_ownNE[0]--;
165
166 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
167 if (m_mpiInfo->rank/d0>0 && m_mpiInfo->rank/d0<d1-1)
168 m_NE[1]++;
169 else if (d1>1 && m_mpiInfo->rank/d0==d1-1)
170 m_ownNE[1]--;
171
172 // local number of nodes
173 m_NN[0] = m_NE[0]+1;
174 m_NN[1] = m_NE[1]+1;
175
176 // bottom-left node is at (offset0,offset1) in global mesh
177 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
178 if (m_offset[0] > 0)
179 m_offset[0]--;
180 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank/d0);
181 if (m_offset[1] > 0)
182 m_offset[1]--;
183
184 populateSampleIds();
185
186 for (TagMap::const_iterator i = tagnamestonums.begin();
187 i != tagnamestonums.end(); i++) {
188 setTagMap(i->first, i->second);
189 }
190 addPoints(points, tags);
191 }
192
193 Rectangle::~Rectangle()
194 {
195 }
196
197 string Rectangle::getDescription() const
198 {
199 return "ripley::Rectangle";
200 }
201
202 bool Rectangle::operator==(const escript::AbstractDomain& other) const
203 {
204 const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
205 if (o) {
206 return (RipleyDomain::operator==(other) &&
207 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1]
208 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1]
209 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1]
210 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1]);
211 }
212
213 return false;
214 }
215
216 void Rectangle::readNcGrid(escript::Data& out, string filename, string varname,
217 const ReaderParameters& params) const
218 {
219 #ifdef ESYS_HAVE_NETCDF
220 // check destination function space
221 dim_t myN0, myN1;
222 if (out.getFunctionSpace().getTypeCode() == Nodes) {
223 myN0 = m_NN[0];
224 myN1 = m_NN[1];
225 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
226 out.getFunctionSpace().getTypeCode() == ReducedElements) {
227 myN0 = m_NE[0];
228 myN1 = m_NE[1];
229 } else
230 throw ValueError("readNcGrid(): invalid function space for output data object");
231
232 if (params.first.size() != 2)
233 throw ValueError("readNcGrid(): argument 'first' must have 2 entries");
234
235 if (params.numValues.size() != 2)
236 throw ValueError("readNcGrid(): argument 'numValues' must have 2 entries");
237
238 if (params.multiplier.size() != 2)
239 throw ValueError("readNcGrid(): argument 'multiplier' must have 2 entries");
240 for (size_t i=0; i<params.multiplier.size(); i++)
241 if (params.multiplier[i]<1)
242 throw ValueError("readNcGrid(): all multipliers must be positive");
243 if (params.reverse.size() != 2)
244 throw ValueError("readNcGrid(): argument 'reverse' must have 2 entries");
245
246 // check file existence and size
247 NcFile f(filename.c_str(), NcFile::ReadOnly);
248 if (!f.is_valid())
249 throw IOError("readNcGrid(): cannot open file");
250
251 NcVar* var = f.get_var(varname.c_str());
252 if (!var)
253 throw IOError("readNcGrid(): invalid variable");
254
255 // TODO: rank>0 data support
256 const int numComp = out.getDataPointSize();
257 if (numComp > 1)
258 throw NotImplementedError("readNcGrid(): only scalar data supported");
259
260 const int dims = var->num_dims();
261 boost::scoped_array<long> edges(var->edges());
262
263 // is this a slice of the data object (dims!=2)?
264 // note the expected ordering of edges (as in numpy: y,x)
265 if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1]))
266 || (dims==1 && params.numValues[1]>1) ) {
267 throw IOError("readNcGrid(): not enough data in file");
268 }
269
270 // check if this rank contributes anything
271 if (params.first[0] >= m_offset[0]+myN0 ||
272 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
273 params.first[1] >= m_offset[1]+myN1 ||
274 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1])
275 return;
276
277 // now determine how much this rank has to write
278
279 // first coordinates in data object to write to
280 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
281 const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
282 // indices to first value in file (not accounting for reverse yet)
283 dim_t idx0 = max(dim_t(0), m_offset[0]-params.first[0]);
284 dim_t idx1 = max(dim_t(0), m_offset[1]-params.first[1]);
285 // number of values to read
286 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
287 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
288
289 // make sure we read the right block if going backwards through file
290 if (params.reverse[0])
291 idx0 = edges[dims-1]-num0-idx0;
292 if (dims>1 && params.reverse[1])
293 idx1 = edges[dims-2]-num1-idx1;
294
295 vector<double> values(num0*num1);
296 if (dims==2) {
297 var->set_cur(idx1, idx0);
298 var->get(&values[0], num1, num0);
299 } else {
300 var->set_cur(idx0);
301 var->get(&values[0], num0);
302 }
303
304 const int dpp = out.getNumDataPointsPerSample();
305 out.requireWrite();
306
307 // helpers for reversing
308 const dim_t x0 = (params.reverse[0] ? num0-1 : 0);
309 const int x_mult = (params.reverse[0] ? -1 : 1);
310 const dim_t y0 = (params.reverse[1] ? num1-1 : 0);
311 const int y_mult = (params.reverse[1] ? -1 : 1);
312
313 for (index_t y=0; y<num1; y++) {
314 #pragma omp parallel for
315 for (index_t x=0; x<num0; x++) {
316 const dim_t baseIndex = first0+x*params.multiplier[0]
317 +(first1+y*params.multiplier[1])*myN0;
318 const dim_t srcIndex = (y0+y_mult*y)*num0+(x0+x_mult*x);
319 if (!bm::isnan(values[srcIndex])) {
320 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
321 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
322 const dim_t dataIndex = baseIndex+m0+m1*myN0;
323 double* dest = out.getSampleDataRW(dataIndex);
324 for (index_t q=0; q<dpp; q++) {
325 *dest++ = values[srcIndex];
326 }
327 }
328 }
329 }
330 }
331 }
332 #else
333 throw RipleyException("readNcGrid(): not compiled with netCDF support");
334 #endif
335 }
336
337 void Rectangle::readBinaryGrid(escript::Data& out, string filename,
338 const ReaderParameters& params) const
339 {
340 // the mapping is not universally correct but should work on our
341 // supported platforms
342 switch (params.dataType) {
343 case DATATYPE_INT32:
344 readBinaryGridImpl<int>(out, filename, params);
345 break;
346 case DATATYPE_FLOAT32:
347 readBinaryGridImpl<float>(out, filename, params);
348 break;
349 case DATATYPE_FLOAT64:
350 readBinaryGridImpl<double>(out, filename, params);
351 break;
352 default:
353 throw ValueError("readBinaryGrid(): invalid or unsupported datatype");
354 }
355 }
356
357 void Rectangle::readBinaryGridFromZipped(escript::Data& out, string filename,
358 const ReaderParameters& params) const
359 {
360 #ifdef ESYS_HAVE_BOOST_IO
361 // the mapping is not universally correct but should work on our
362 // supported platforms
363 switch (params.dataType) {
364 case DATATYPE_INT32:
365 readBinaryGridZippedImpl<int>(out, filename, params);
366 break;
367 case DATATYPE_FLOAT32:
368 readBinaryGridZippedImpl<float>(out, filename, params);
369 break;
370 case DATATYPE_FLOAT64:
371 readBinaryGridZippedImpl<double>(out, filename, params);
372 break;
373 default:
374 throw ValueError("readBinaryGridFromZipped(): invalid or unsupported datatype");
375 }
376 #else
377 throw ValueError("readBinaryGridFromZipped(): not compiled with zip support");
378 #endif
379 }
380
381 template<typename ValueType>
382 void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename,
383 const ReaderParameters& params) const
384 {
385 // check destination function space
386 dim_t myN0, myN1;
387 if (out.getFunctionSpace().getTypeCode() == Nodes) {
388 myN0 = m_NN[0];
389 myN1 = m_NN[1];
390 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
391 out.getFunctionSpace().getTypeCode() == ReducedElements) {
392 myN0 = m_NE[0];
393 myN1 = m_NE[1];
394 } else
395 throw ValueError("readBinaryGrid(): invalid function space for output data object");
396
397 if (params.first.size() != 2)
398 throw ValueError("readBinaryGrid(): argument 'first' must have 2 entries");
399
400 if (params.numValues.size() != 2)
401 throw ValueError("readBinaryGrid(): argument 'numValues' must have 2 entries");
402
403 if (params.multiplier.size() != 2)
404 throw ValueError("readBinaryGrid(): argument 'multiplier' must have 2 entries");
405 for (size_t i=0; i<params.multiplier.size(); i++)
406 if (params.multiplier[i]<1)
407 throw ValueError("readBinaryGrid(): all multipliers must be positive");
408 if (params.reverse[0] != 0 || params.reverse[1] != 0)
409 throw NotImplementedError("readBinaryGrid(): reversing not supported yet");
410
411 // check file existence and size
412 std::ifstream f(filename.c_str(), std::ifstream::binary);
413 if (f.fail()) {
414 throw IOError("readBinaryGrid(): cannot open file");
415 }
416 f.seekg(0, ios::end);
417 const int numComp = out.getDataPointSize();
418 const dim_t filesize = f.tellg();
419 const dim_t reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
420 if (filesize < reqsize) {
421 f.close();
422 throw IOError("readBinaryGrid(): not enough data in file");
423 }
424
425 // check if this rank contributes anything
426 if (params.first[0] >= m_offset[0]+myN0 ||
427 params.first[0]+(params.numValues[0]*params.multiplier[0]) <= m_offset[0] ||
428 params.first[1] >= m_offset[1]+myN1 ||
429 params.first[1]+(params.numValues[1]*params.multiplier[1]) <= m_offset[1]) {
430 f.close();
431 return;
432 }
433
434 // now determine how much this rank has to write
435
436 // first coordinates in data object to write to
437 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
438 const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
439 // indices to first value in file
440 const dim_t idx0 = max(dim_t(0), (m_offset[0]/params.multiplier[0])-params.first[0]);
441 const dim_t idx1 = max(dim_t(0), (m_offset[1]/params.multiplier[1])-params.first[1]);
442 // if restX > 0 the first value in the respective dimension has been
443 // written restX times already in a previous rank so this rank only
444 // contributes (multiplier-rank) copies of that value
445 const dim_t rest0 = m_offset[0]%params.multiplier[0];
446 const dim_t rest1 = m_offset[1]%params.multiplier[1];
447 // number of values to read
448 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
449 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
450
451 out.requireWrite();
452 vector<ValueType> values(num0*numComp);
453 const int dpp = out.getNumDataPointsPerSample();
454
455 for (dim_t y=0; y<num1; y++) {
456 const dim_t fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
457 f.seekg(fileofs*sizeof(ValueType));
458 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
459 const dim_t m1limit = (y==0 ? params.multiplier[1]-rest1 : params.multiplier[1]);
460 dim_t dataYbase = first1+y*params.multiplier[1];
461 if (y>0)
462 dataYbase -= rest1;
463 for (dim_t x=0; x<num0; x++) {
464 const dim_t m0limit = (x==0 ? params.multiplier[0]-rest0 : params.multiplier[0]);
465 dim_t dataXbase = first0+x*params.multiplier[0];
466 if (x>0)
467 dataXbase -= rest0;
468 // write a block of mult0 x mult1 identical values into Data object
469 for (dim_t m1=0; m1<m1limit; m1++) {
470 const dim_t dataY = dataYbase+m1;
471 if (dataY >= myN1)
472 break;
473 for (dim_t m0=0; m0<m0limit; m0++) {
474 const dim_t dataX = dataXbase+m0;
475 if (dataX >= myN0)
476 break;
477 const dim_t dataIndex = dataX+dataY*myN0;
478 double* dest = out.getSampleDataRW(dataIndex);
479 for (int c=0; c<numComp; c++) {
480 ValueType val = values[x*numComp+c];
481
482 if (params.byteOrder != BYTEORDER_NATIVE) {
483 char* cval = reinterpret_cast<char*>(&val);
484 // this will alter val!!
485 if (sizeof(ValueType) > 4) {
486 byte_swap64(cval);
487 } else {
488 byte_swap32(cval);
489 }
490 }
491 if (!bm::isnan(val)) {
492 for (int q=0; q<dpp; q++) {
493 *dest++ = static_cast<double>(val);
494 }
495 }
496 }
497 }
498 }
499 }
500 }
501
502 f.close();
503 }
504
505 #ifdef ESYS_HAVE_BOOST_IO
506 template<typename ValueType>
507 void Rectangle::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
508 const ReaderParameters& params) const
509 {
510 // check destination function space
511 dim_t myN0, myN1;
512 if (out.getFunctionSpace().getTypeCode() == Nodes) {
513 myN0 = m_NN[0];
514 myN1 = m_NN[1];
515 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
516 out.getFunctionSpace().getTypeCode() == ReducedElements) {
517 myN0 = m_NE[0];
518 myN1 = m_NE[1];
519 } else
520 throw ValueError("readBinaryGrid(): invalid function space for output data object");
521
522 // check file existence and size
523 std::ifstream f(filename.c_str(), std::ifstream::binary);
524 if (f.fail()) {
525 throw IOError("readBinaryGridFromZipped(): cannot open file");
526 }
527 f.seekg(0, ios::end);
528 const int numComp = out.getDataPointSize();
529 dim_t filesize = f.tellg();
530 f.seekg(0, ios::beg);
531 vector<char> compressed(filesize);
532 f.read((char*)&compressed[0], filesize);
533 f.close();
534 vector<char> decompressed = unzip(compressed);
535 filesize = decompressed.size();
536 const dim_t reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
537 if (filesize < reqsize) {
538 throw IOError("readBinaryGridFromZipped(): not enough data in file");
539 }
540
541 // check if this rank contributes anything
542 if (params.first[0] >= m_offset[0]+myN0 ||
543 params.first[0]+params.numValues[0] <= m_offset[0] ||
544 params.first[1] >= m_offset[1]+myN1 ||
545 params.first[1]+params.numValues[1] <= m_offset[1]) {
546 f.close();
547 return;
548 }
549
550 // now determine how much this rank has to write
551
552 // first coordinates in data object to write to
553 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
554 const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
555 // indices to first value in file
556 const dim_t idx0 = max(dim_t(0), m_offset[0]-params.first[0]);
557 const dim_t idx1 = max(dim_t(0), m_offset[1]-params.first[1]);
558 // number of values to read
559 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
560 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
561
562 out.requireWrite();
563 vector<ValueType> values(num0*numComp);
564 const int dpp = out.getNumDataPointsPerSample();
565
566 for (dim_t y=0; y<num1; y++) {
567 const dim_t fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
568 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
569 for (dim_t x=0; x<num0; x++) {
570 const dim_t baseIndex = first0+x*params.multiplier[0]
571 +(first1+y*params.multiplier[1])*myN0;
572 for (int m1=0; m1<params.multiplier[1]; m1++) {
573 for (int m0=0; m0<params.multiplier[0]; m0++) {
574 const dim_t dataIndex = baseIndex+m0+m1*myN0;
575 double* dest = out.getSampleDataRW(dataIndex);
576 for (int c=0; c<numComp; c++) {
577 ValueType val = values[x*numComp+c];
578
579 if (params.byteOrder != BYTEORDER_NATIVE) {
580 char* cval = reinterpret_cast<char*>(&val);
581 // this will alter val!!
582 byte_swap32(cval);
583 }
584 if (!bm::isnan(val)) {
585 for (int q=0; q<dpp; q++) {
586 *dest++ = static_cast<double>(val);
587 }
588 }
589 }
590 }
591 }
592 }
593 }
594
595 f.close();
596 }
597 #endif
598
599 void Rectangle::writeBinaryGrid(const escript::Data& in, string filename,
600 int byteOrder, int dataType) const
601 {
602 // the mapping is not universally correct but should work on our
603 // supported platforms
604 switch (dataType) {
605 case DATATYPE_INT32:
606 writeBinaryGridImpl<int>(in, filename, byteOrder);
607 break;
608 case DATATYPE_FLOAT32:
609 writeBinaryGridImpl<float>(in, filename, byteOrder);
610 break;
611 case DATATYPE_FLOAT64:
612 writeBinaryGridImpl<double>(in, filename, byteOrder);
613 break;
614 default:
615 throw ValueError("writeBinaryGrid(): invalid or unsupported datatype");
616 }
617 }
618
619 template<typename ValueType>
620 void Rectangle::writeBinaryGridImpl(const escript::Data& in,
621 const string& filename, int byteOrder) const
622 {
623 // check function space and determine number of points
624 dim_t myN0, myN1;
625 dim_t totalN0, totalN1;
626 dim_t offset0, offset1;
627 if (in.getFunctionSpace().getTypeCode() == Nodes) {
628 myN0 = m_NN[0];
629 myN1 = m_NN[1];
630 totalN0 = m_gNE[0]+1;
631 totalN1 = m_gNE[1]+1;
632 offset0 = m_offset[0];
633 offset1 = m_offset[1];
634 } else if (in.getFunctionSpace().getTypeCode() == DegreesOfFreedom ||
635 in.getFunctionSpace().getTypeCode() == ReducedDegreesOfFreedom) {
636 myN0 = (m_gNE[0]+1)/m_NX[0];
637 myN1 = (m_gNE[1]+1)/m_NX[1];
638 totalN0 = m_gNE[0]+1;
639 totalN1 = m_gNE[1]+1;
640 offset0 = (m_offset[0]>0 ? m_offset[0]+1 : 0);
641 offset1 = (m_offset[1]>0 ? m_offset[1]+1 : 0);
642 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
643 in.getFunctionSpace().getTypeCode() == ReducedElements) {
644 myN0 = m_NE[0];
645 myN1 = m_NE[1];
646 totalN0 = m_gNE[0];
647 totalN1 = m_gNE[1];
648 offset0 = m_offset[0];
649 offset1 = m_offset[1];
650 } else
651 throw ValueError("writeBinaryGrid(): unsupported function space");
652
653 const int numComp = in.getDataPointSize();
654 const int dpp = in.getNumDataPointsPerSample();
655
656 if (numComp > 1 || dpp > 1)
657 throw NotImplementedError("writeBinaryGrid(): only scalar, single-value data supported");
658
659 const dim_t fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1;
660
661 // from here on we know that each sample consists of one value
662 FileWriter fw(m_mpiInfo->comm);
663 fw.openFile(filename, fileSize, true);
664 MPIBarrier();
665
666 for (index_t y=0; y<myN1; y++) {
667 const dim_t fileofs = (offset0+(offset1+y)*totalN0)*sizeof(ValueType);
668 std::ostringstream oss;
669
670 for (index_t x=0; x<myN0; x++) {
671 const double* sample = in.getSampleDataRO(y*myN0+x);
672 ValueType fvalue = static_cast<ValueType>(*sample);
673 if (byteOrder == BYTEORDER_NATIVE) {
674 oss.write((char*)&fvalue, sizeof(fvalue));
675 } else {
676 char* value = reinterpret_cast<char*>(&fvalue);
677 if (sizeof(fvalue)>4) {
678 byte_swap64(value);
679 } else {
680 byte_swap32(value);
681 }
682 oss.write(value, sizeof(fvalue));
683 }
684 }
685 fw.writeAt(oss, fileofs);
686 }
687 fw.close();
688 }
689
690 void Rectangle::write(const std::string& filename) const
691 {
692 throw NotImplementedError("write: not supported");
693 }
694
695 void Rectangle::dump(const string& fileName) const
696 {
697 #ifdef ESYS_HAVE_SILO
698 string fn(fileName);
699 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
700 fn+=".silo";
701 }
702
703 int driver=DB_HDF5;
704 DBfile* dbfile = NULL;
705 const char* blockDirFmt = "/block%04d";
706
707 #ifdef ESYS_MPI
708 PMPIO_baton_t* baton = NULL;
709 const int NUM_SILO_FILES = 1;
710 #endif
711
712 if (m_mpiInfo->size > 1) {
713 #ifdef ESYS_MPI
714 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
715 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
716 PMPIO_DefaultClose, (void*)&driver);
717 // try the fallback driver in case of error
718 if (!baton && driver != DB_PDB) {
719 driver = DB_PDB;
720 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
721 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
722 PMPIO_DefaultClose, (void*)&driver);
723 }
724 if (baton) {
725 char siloPath[64];
726 snprintf(siloPath, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
727 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath);
728 }
729 #endif
730 } else {
731 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
732 getDescription().c_str(), driver);
733 // try the fallback driver in case of error
734 if (!dbfile && driver != DB_PDB) {
735 driver = DB_PDB;
736 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
737 getDescription().c_str(), driver);
738 }
739 char siloPath[64];
740 snprintf(siloPath, 64, blockDirFmt, 0);
741 DBMkDir(dbfile, siloPath);
742 DBSetDir(dbfile, siloPath);
743 }
744
745 if (!dbfile)
746 throw IOError("dump: Could not create Silo file");
747
748 /*
749 if (driver==DB_HDF5) {
750 // gzip level 1 already provides good compression with minimal
751 // performance penalty. Some tests showed that gzip levels >3 performed
752 // rather badly on escript data both in terms of time and space
753 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
754 }
755 */
756
757 const dim_t NN0 = m_NN[0];
758 const dim_t NN1 = m_NN[1];
759 boost::scoped_ptr<double> x(new double[NN0]);
760 boost::scoped_ptr<double> y(new double[NN1]);
761 double* coords[2] = { x.get(), y.get() };
762 #pragma omp parallel
763 {
764 #pragma omp for nowait
765 for (dim_t i0 = 0; i0 < NN0; i0++) {
766 coords[0][i0]=getLocalCoordinate(i0, 0);
767 }
768 #pragma omp for nowait
769 for (dim_t i1 = 0; i1 < NN1; i1++) {
770 coords[1][i1]=getLocalCoordinate(i1, 1);
771 }
772 }
773
774 // casting to int!!
775 vector<int> dims(m_NN, m_NN+2);
776
777 // write mesh
778 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
779 DB_COLLINEAR, NULL);
780
781 // write node ids
782 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
783 NULL, 0, DB_INT, DB_NODECENT, NULL);
784
785 // write element ids
786 dims.assign(m_NE, m_NE+2);
787 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
788 &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
789
790 // rank 0 writes multimesh and multivar
791 if (m_mpiInfo->rank == 0) {
792 vector<string> tempstrings;
793 vector<char*> names;
794 for (dim_t i=0; i<m_mpiInfo->size; i++) {
795 std::stringstream path;
796 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/mesh";
797 tempstrings.push_back(path.str());
798 names.push_back((char*)tempstrings.back().c_str());
799 }
800 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
801 DBSetDir(dbfile, "/");
802 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
803 &types[0], NULL);
804 tempstrings.clear();
805 names.clear();
806 for (dim_t i=0; i<m_mpiInfo->size; i++) {
807 std::stringstream path;
808 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/nodeId";
809 tempstrings.push_back(path.str());
810 names.push_back((char*)tempstrings.back().c_str());
811 }
812 types.assign(m_mpiInfo->size, DB_QUADVAR);
813 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
814 &types[0], NULL);
815 tempstrings.clear();
816 names.clear();
817 for (dim_t i=0; i<m_mpiInfo->size; i++) {
818 std::stringstream path;
819 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/elementId";
820 tempstrings.push_back(path.str());
821 names.push_back((char*)tempstrings.back().c_str());
822 }
823 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
824 &types[0], NULL);
825 }
826
827 if (m_mpiInfo->size > 1) {
828 #ifdef ESYS_MPI
829 PMPIO_HandOffBaton(baton, dbfile);
830 PMPIO_Finish(baton);
831 #endif
832 } else {
833 DBClose(dbfile);
834 }
835
836 #else // ESYS_HAVE_SILO
837 throw RipleyException("dump: no Silo support");
838 #endif
839 }
840
841 const dim_t* Rectangle::borrowSampleReferenceIDs(int fsType) const
842 {
843 switch (fsType) {
844 case Nodes:
845 case ReducedNodes: // FIXME: reduced
846 return &m_nodeId[0];
847 case DegreesOfFreedom:
848 case ReducedDegreesOfFreedom: // FIXME: reduced
849 return &m_dofId[0];
850 case Elements:
851 case ReducedElements:
852 return &m_elementId[0];
853 case FaceElements:
854 case ReducedFaceElements:
855 return &m_faceId[0];
856 case Points:
857 return &m_diracPointNodeIDs[0];
858 default:
859 break;
860 }
861
862 std::stringstream msg;
863 msg << "borrowSampleReferenceIDs: invalid function space type " << fsType;
864 throw ValueError(msg.str());
865 }
866
867 bool Rectangle::ownSample(int fsType, index_t id) const
868 {
869 if (getMPISize()==1)
870 return true;
871
872 switch (fsType) {
873 case Nodes:
874 case ReducedNodes: // FIXME: reduced
875 return (m_dofMap[id] < getNumDOF());
876 case DegreesOfFreedom:
877 case ReducedDegreesOfFreedom:
878 return true;
879 case Elements:
880 case ReducedElements:
881 // check ownership of element's bottom left node
882 return (m_dofMap[id%m_NE[0]+m_NN[0]*(id/m_NE[0])] < getNumDOF());
883 case FaceElements:
884 case ReducedFaceElements:
885 {
886 // determine which face the sample belongs to before
887 // checking ownership of corresponding element's first node
888 dim_t n=0;
889 for (size_t i=0; i<4; i++) {
890 n+=m_faceCount[i];
891 if (id<n) {
892 index_t k;
893 if (i==1)
894 k=m_NN[0]-2;
895 else if (i==3)
896 k=m_NN[0]*(m_NN[1]-2);
897 else
898 k=0;
899 // determine whether to move right or up
900 const index_t delta=(i/2==0 ? m_NN[0] : 1);
901 return (m_dofMap[k+(id-n+m_faceCount[i])*delta] < getNumDOF());
902 }
903 }
904 return false;
905 }
906 default:
907 break;
908 }
909
910 std::stringstream msg;
911 msg << "ownSample: invalid function space type " << fsType;
912 throw ValueError(msg.str());
913 }
914
915 RankVector Rectangle::getOwnerVector(int fsType) const
916 {
917 RankVector owner;
918 const int rank = m_mpiInfo->rank;
919
920 if (fsType == Elements || fsType == ReducedElements) {
921 owner.assign(getNumElements(), rank);
922 if (m_faceCount[0] == 0) {
923 owner[0]=(m_faceCount[2]==0 ? rank-m_NX[0]-1 : rank-1);
924 for (dim_t i=1; i<m_NE[1]; i++)
925 owner[i*m_NE[0]] = rank-1;
926 }
927 if (m_faceCount[2]==0) {
928 const int first=(m_faceCount[0]==0 ? 1 : 0);
929 for (dim_t i=first; i<m_NE[0]; i++)
930 owner[i] = rank-m_NX[0];
931 }
932
933 } else if (fsType == FaceElements || fsType == ReducedFaceElements) {
934 owner.assign(getNumFaceElements(), rank);
935 if (m_faceCount[0] == 0) {
936 if (m_faceCount[2] > 0)
937 owner[m_faceCount[1]] = rank-1;
938 if (m_faceCount[3] > 0)
939 owner[m_faceCount[1]+m_faceCount[2]] = rank-1;
940 }
941 if (m_faceCount[2] == 0) {
942 if (m_faceCount[0] > 0)
943 owner[0] = rank-m_NX[0];
944 if (m_faceCount[1] > 0)
945 owner[m_faceCount[0]] = rank-m_NX[0];
946 }
947
948 } else {
949 throw ValueError("getOwnerVector: only valid for element types");
950 }
951
952 return owner;
953 }
954
955 void Rectangle::setToNormal(escript::Data& out) const
956 {
957 const dim_t NE0=m_NE[0];
958 const dim_t NE1=m_NE[1];
959 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
960 out.requireWrite();
961 #pragma omp parallel
962 {
963 if (m_faceOffset[0] > -1) {
964 #pragma omp for nowait
965 for (index_t k1 = 0; k1 < NE1; ++k1) {
966 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
967 // set vector at two quadrature points
968 *o++ = -1.;
969 *o++ = 0.;
970 *o++ = -1.;
971 *o = 0.;
972 }
973 }
974
975 if (m_faceOffset[1] > -1) {
976 #pragma omp for nowait
977 for (index_t k1 = 0; k1 < NE1; ++k1) {
978 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
979 // set vector at two quadrature points
980 *o++ = 1.;
981 *o++ = 0.;
982 *o++ = 1.;
983 *o = 0.;
984 }
985 }
986
987 if (m_faceOffset[2] > -1) {
988 #pragma omp for nowait
989 for (index_t k0 = 0; k0 < NE0; ++k0) {
990 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
991 // set vector at two quadrature points
992 *o++ = 0.;
993 *o++ = -1.;
994 *o++ = 0.;
995 *o = -1.;
996 }
997 }
998
999 if (m_faceOffset[3] > -1) {
1000 #pragma omp for nowait
1001 for (index_t k0 = 0; k0 < NE0; ++k0) {
1002 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1003 // set vector at two quadrature points
1004 *o++ = 0.;
1005 *o++ = 1.;
1006 *o++ = 0.;
1007 *o = 1.;
1008 }
1009 }
1010 } // end of parallel section
1011 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1012 out.requireWrite();
1013 #pragma omp parallel
1014 {
1015 if (m_faceOffset[0] > -1) {
1016 #pragma omp for nowait
1017 for (index_t k1 = 0; k1 < NE1; ++k1) {
1018 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1019 *o++ = -1.;
1020 *o = 0.;
1021 }
1022 }
1023
1024 if (m_faceOffset[1] > -1) {
1025 #pragma omp for nowait
1026 for (index_t k1 = 0; k1 < NE1; ++k1) {
1027 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1028 *o++ = 1.;
1029 *o = 0.;
1030 }
1031 }
1032
1033 if (m_faceOffset[2] > -1) {
1034 #pragma omp for nowait
1035 for (index_t k0 = 0; k0 < NE0; ++k0) {
1036 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1037 *o++ = 0.;
1038 *o = -1.;
1039 }
1040 }
1041
1042 if (m_faceOffset[3] > -1) {
1043 #pragma omp for nowait
1044 for (index_t k0 = 0; k0 < NE0; ++k0) {
1045 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1046 *o++ = 0.;
1047 *o = 1.;
1048 }
1049 }
1050 } // end of parallel section
1051
1052 } else {
1053 std::stringstream msg;
1054 msg << "setToNormal: invalid function space type "
1055 << out.getFunctionSpace().getTypeCode();
1056 throw ValueError(msg.str());
1057 }
1058 }
1059
1060 void Rectangle::setToSize(escript::Data& out) const
1061 {
1062 if (out.getFunctionSpace().getTypeCode() == Elements
1063 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1064 out.requireWrite();
1065 const dim_t numQuad = out.getNumDataPointsPerSample();
1066 const dim_t numElements = getNumElements();
1067 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]);
1068 #pragma omp parallel for
1069 for (index_t k = 0; k < numElements; ++k) {
1070 double* o = out.getSampleDataRW(k);
1071 fill(o, o+numQuad, size);
1072 }
1073 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1074 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1075 out.requireWrite();
1076 const dim_t numQuad=out.getNumDataPointsPerSample();
1077 const dim_t NE0 = m_NE[0];
1078 const dim_t NE1 = m_NE[1];
1079 #pragma omp parallel
1080 {
1081 if (m_faceOffset[0] > -1) {
1082 #pragma omp for nowait
1083 for (index_t k1 = 0; k1 < NE1; ++k1) {
1084 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1085 fill(o, o+numQuad, m_dx[1]);
1086 }
1087 }
1088
1089 if (m_faceOffset[1] > -1) {
1090 #pragma omp for nowait
1091 for (index_t k1 = 0; k1 < NE1; ++k1) {
1092 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1093 fill(o, o+numQuad, m_dx[1]);
1094 }
1095 }
1096
1097 if (m_faceOffset[2] > -1) {
1098 #pragma omp for nowait
1099 for (index_t k0 = 0; k0 < NE0; ++k0) {
1100 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1101 fill(o, o+numQuad, m_dx[0]);
1102 }
1103 }
1104
1105 if (m_faceOffset[3] > -1) {
1106 #pragma omp for nowait
1107 for (index_t k0 = 0; k0 < NE0; ++k0) {
1108 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1109 fill(o, o+numQuad, m_dx[0]);
1110 }
1111 }
1112 } // end of parallel section
1113
1114 } else {
1115 std::stringstream msg;
1116 msg << "setToSize: invalid function space type "
1117 << out.getFunctionSpace().getTypeCode();
1118 throw ValueError(msg.str());
1119 }
1120 }
1121
1122 void Rectangle::Print_Mesh_Info(const bool full) const
1123 {
1124 RipleyDomain::Print_Mesh_Info(full);
1125 if (full) {
1126 std::cout << " Id Coordinates" << std::endl;
1127 std::cout.precision(15);
1128 std::cout.setf(ios::scientific, ios::floatfield);
1129 for (index_t i=0; i < getNumNodes(); i++) {
1130 std::cout << " " << std::setw(5) << m_nodeId[i]
1131 << " " << getLocalCoordinate(i%m_NN[0], 0)
1132 << " " << getLocalCoordinate(i/m_NN[0], 1) << std::endl;
1133 }
1134 }
1135 }
1136
1137
1138 //protected
1139 void Rectangle::assembleCoordinates(escript::Data& arg) const
1140 {
1141 int numDim = m_numDim;
1142 if (!arg.isDataPointShapeEqual(1, &numDim))
1143 throw ValueError("setToX: Invalid Data object shape");
1144 if (!arg.numSamplesEqual(1, getNumNodes()))
1145 throw ValueError("setToX: Illegal number of samples in Data object");
1146
1147 const dim_t NN0 = m_NN[0];
1148 const dim_t NN1 = m_NN[1];
1149 arg.requireWrite();
1150 #pragma omp parallel for
1151 for (dim_t i1 = 0; i1 < NN1; i1++) {
1152 for (dim_t i0 = 0; i0 < NN0; i0++) {
1153 double* point = arg.getSampleDataRW(i0+NN0*i1);
1154 point[0] = getLocalCoordinate(i0, 0);
1155 point[1] = getLocalCoordinate(i1, 1);
1156 }
1157 }
1158 }
1159
1160 //protected
1161 void Rectangle::assembleGradient(escript::Data& out, const escript::Data& in) const
1162 {
1163 const dim_t numComp = in.getDataPointSize();
1164 const dim_t NE0 = m_NE[0];
1165 const dim_t NE1 = m_NE[1];
1166 const double cx0 = .21132486540518711775/m_dx[0];
1167 const double cx1 = .78867513459481288225/m_dx[0];
1168 const double cx2 = 1./m_dx[0];
1169 const double cy0 = .21132486540518711775/m_dx[1];
1170 const double cy1 = .78867513459481288225/m_dx[1];
1171 const double cy2 = 1./m_dx[1];
1172
1173 if (out.getFunctionSpace().getTypeCode() == Elements) {
1174 out.requireWrite();
1175 #pragma omp parallel
1176 {
1177 vector<double> f_00(numComp);
1178 vector<double> f_01(numComp);
1179 vector<double> f_10(numComp);
1180 vector<double> f_11(numComp);
1181 #pragma omp for
1182 for (index_t k1=0; k1 < NE1; ++k1) {
1183 for (index_t k0=0; k0 < NE0; ++k0) {
1184 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1185 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1186 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1187 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1188 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1189 for (index_t i=0; i < numComp; ++i) {
1190 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1191 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1192 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1193 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1194 o[INDEX3(i,0,2,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1195 o[INDEX3(i,1,2,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1196 o[INDEX3(i,0,3,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1197 o[INDEX3(i,1,3,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1198 } // end of component loop i
1199 } // end of k0 loop
1200 } // end of k1 loop
1201 } // end of parallel section
1202 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1203 out.requireWrite();
1204 #pragma omp parallel
1205 {
1206 vector<double> f_00(numComp);
1207 vector<double> f_01(numComp);
1208 vector<double> f_10(numComp);
1209 vector<double> f_11(numComp);
1210 #pragma omp for
1211 for (index_t k1=0; k1 < NE1; ++k1) {
1212 for (index_t k0=0; k0 < NE0; ++k0) {
1213 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1214 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1215 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1216 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1217 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1218 for (index_t i=0; i < numComp; ++i) {
1219 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1220 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1221 } // end of component loop i
1222 } // end of k0 loop
1223 } // end of k1 loop
1224 } // end of parallel section
1225 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1226 out.requireWrite();
1227 #pragma omp parallel
1228 {
1229 vector<double> f_00(numComp);
1230 vector<double> f_01(numComp);
1231 vector<double> f_10(numComp);
1232 vector<double> f_11(numComp);
1233 if (m_faceOffset[0] > -1) {
1234 #pragma omp for nowait
1235 for (index_t k1=0; k1 < NE1; ++k1) {
1236 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1237 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1238 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
1239 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
1240 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1241 for (index_t i=0; i < numComp; ++i) {
1242 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1243 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1244 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1245 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1246 } // end of component loop i
1247 } // end of k1 loop
1248 } // end of face 0
1249 if (m_faceOffset[1] > -1) {
1250 #pragma omp for nowait
1251 for (index_t k1=0; k1 < NE1; ++k1) {
1252 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
1253 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
1254 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1255 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1256 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1257 for (index_t i=0; i < numComp; ++i) {
1258 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1259 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1260 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1261 o[INDEX3(i,1,1,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1262 } // end of component loop i
1263 } // end of k1 loop
1264 } // end of face 1
1265 if (m_faceOffset[2] > -1) {
1266 #pragma omp for nowait
1267 for (index_t k0=0; k0 < NE0; ++k0) {
1268 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1269 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1270 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1271 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1272 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1273 for (index_t i=0; i < numComp; ++i) {
1274 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1275 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1276 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1277 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1278 } // end of component loop i
1279 } // end of k0 loop
1280 } // end of face 2
1281 if (m_faceOffset[3] > -1) {
1282 #pragma omp for nowait
1283 for (index_t k0=0; k0 < NE0; ++k0) {
1284 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1285 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1286 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1287 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1288 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1289 for (index_t i=0; i < numComp; ++i) {
1290 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1291 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1292 o[INDEX3(i,0,1,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1293 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1294 } // end of component loop i
1295 } // end of k0 loop
1296 } // end of face 3
1297 } // end of parallel section
1298
1299 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1300 out.requireWrite();
1301 #pragma omp parallel
1302 {
1303 vector<double> f_00(numComp);
1304 vector<double> f_01(numComp);
1305 vector<double> f_10(numComp);
1306 vector<double> f_11(numComp);
1307 if (m_faceOffset[0] > -1) {
1308 #pragma omp for nowait
1309 for (index_t k1=0; k1 < NE1; ++k1) {
1310 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1311 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1312 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
1313 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
1314 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1315 for (index_t i=0; i < numComp; ++i) {
1316 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1317 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1318 } // end of component loop i
1319 } // end of k1 loop
1320 } // end of face 0
1321 if (m_faceOffset[1] > -1) {
1322 #pragma omp for nowait
1323 for (index_t k1=0; k1 < NE1; ++k1) {
1324 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
1325 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
1326 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1327 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1328 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1329 for (index_t i=0; i < numComp; ++i) {
1330 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1331 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1332 } // end of component loop i
1333 } // end of k1 loop
1334 } // end of face 1
1335 if (m_faceOffset[2] > -1) {
1336 #pragma omp for nowait
1337 for (index_t k0=0; k0 < NE0; ++k0) {
1338 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1339 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1340 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1341 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1342 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1343 for (index_t i=0; i < numComp; ++i) {
1344 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1345 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1346 } // end of component loop i
1347 } // end of k0 loop
1348 } // end of face 2
1349 if (m_faceOffset[3] > -1) {
1350 #pragma omp for nowait
1351 for (index_t k0=0; k0 < NE0; ++k0) {
1352 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1353 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1354 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1355 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1356 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1357 for (index_t i=0; i < numComp; ++i) {
1358 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1359 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1360 } // end of component loop i
1361 } // end of k0 loop
1362 } // end of face 3
1363 } // end of parallel section
1364 }
1365 }
1366
1367 //protected
1368 void Rectangle::assembleIntegrate(vector<double>& integrals,
1369 const escript::Data& arg) const
1370 {
1371 const dim_t numComp = arg.getDataPointSize();
1372 const index_t left = getFirstInDim(0);
1373 const index_t bottom = getFirstInDim(1);
1374 const int fs=arg.getFunctionSpace().getTypeCode();
1375 if (fs == Elements && arg.actsExpanded()) {
1376 #pragma omp parallel
1377 {
1378 vector<double> int_local(numComp, 0);
1379 const double w = m_dx[0]*m_dx[1]/4.;
1380 #pragma omp for nowait
1381 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1382 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1383 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1384 for (index_t i=0; i < numComp; ++i) {
1385 const double f0 = f[INDEX2(i,0,numComp)];
1386 const double f1 = f[INDEX2(i,1,numComp)];
1387 const double f2 = f[INDEX2(i,2,numComp)];
1388 const double f3 = f[INDEX2(i,3,numComp)];
1389 int_local[i]+=(f0+f1+f2+f3)*w;
1390 } // end of component loop i
1391 } // end of k0 loop
1392 } // end of k1 loop
1393 #pragma omp critical
1394 for (index_t i=0; i<numComp; i++)
1395 integrals[i]+=int_local[i];
1396 } // end of parallel section
1397
1398 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1399 const double w = m_dx[0]*m_dx[1];
1400 #pragma omp parallel
1401 {
1402 vector<double> int_local(numComp, 0);
1403 #pragma omp for nowait
1404 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1405 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1406 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1407 for (index_t i=0; i < numComp; ++i) {
1408 int_local[i]+=f[i]*w;
1409 }
1410 }
1411 }
1412 #pragma omp critical
1413 for (index_t i=0; i<numComp; i++)
1414 integrals[i]+=int_local[i];
1415 } // end of parallel section
1416
1417 } else if (fs == FaceElements && arg.actsExpanded()) {
1418 #pragma omp parallel
1419 {
1420 vector<double> int_local(numComp, 0);
1421 const double w0 = m_dx[0]/2.;
1422 const double w1 = m_dx[1]/2.;
1423 if (m_faceOffset[0] > -1) {
1424 #pragma omp for nowait
1425 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1426 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1427 for (index_t i=0; i < numComp; ++i) {
1428 const double f0 = f[INDEX2(i,0,numComp)];
1429 const double f1 = f[INDEX2(i,1,numComp)];
1430 int_local[i]+=(f0+f1)*w1;
1431 } // end of component loop i
1432 } // end of k1 loop
1433 }
1434
1435 if (m_faceOffset[1] > -1) {
1436 #pragma omp for nowait
1437 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1438 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1439 for (index_t i=0; i < numComp; ++i) {
1440 const double f0 = f[INDEX2(i,0,numComp)];
1441 const double f1 = f[INDEX2(i,1,numComp)];
1442 int_local[i]+=(f0+f1)*w1;
1443 } // end of component loop i
1444 } // end of k1 loop
1445 }
1446
1447 if (m_faceOffset[2] > -1) {
1448 #pragma omp for nowait
1449 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1450 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1451 for (index_t i=0; i < numComp; ++i) {
1452 const double f0 = f[INDEX2(i,0,numComp)];
1453 const double f1 = f[INDEX2(i,1,numComp)];
1454 int_local[i]+=(f0+f1)*w0;
1455 } // end of component loop i
1456 } // end of k0 loop
1457 }
1458
1459 if (m_faceOffset[3] > -1) {
1460 #pragma omp for nowait
1461 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1462 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1463 for (index_t i=0; i < numComp; ++i) {
1464 const double f0 = f[INDEX2(i,0,numComp)];
1465 const double f1 = f[INDEX2(i,1,numComp)];
1466 int_local[i]+=(f0+f1)*w0;
1467 } // end of component loop i
1468 } // end of k0 loop
1469 }
1470 #pragma omp critical
1471 for (index_t i=0; i<numComp; i++)
1472 integrals[i]+=int_local[i];
1473 } // end of parallel section
1474
1475 } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded())) {
1476 #pragma omp parallel
1477 {
1478 vector<double> int_local(numComp, 0);
1479 if (m_faceOffset[0] > -1) {
1480 #pragma omp for nowait
1481 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1482 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1483 for (index_t i=0; i < numComp; ++i) {
1484 int_local[i]+=f[i]*m_dx[1];
1485 }
1486 }
1487 }
1488
1489 if (m_faceOffset[1] > -1) {
1490 #pragma omp for nowait
1491 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1492 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1493 for (index_t i=0; i < numComp; ++i) {
1494 int_local[i]+=f[i]*m_dx[1];
1495 }
1496 }
1497 }
1498
1499 if (m_faceOffset[2] > -1) {
1500 #pragma omp for nowait
1501 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1502 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1503 for (index_t i=0; i < numComp; ++i) {
1504 int_local[i]+=f[i]*m_dx[0];
1505 }
1506 }
1507 }
1508
1509 if (m_faceOffset[3] > -1) {
1510 #pragma omp for nowait
1511 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1512 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1513 for (index_t i=0; i < numComp; ++i) {
1514 int_local[i]+=f[i]*m_dx[0];
1515 }
1516 }
1517 }
1518
1519 #pragma omp critical
1520 for (index_t i=0; i<numComp; i++)
1521 integrals[i]+=int_local[i];
1522 } // end of parallel section
1523 } // function space selector
1524 }
1525
1526 //protected
1527 IndexVector Rectangle::getDiagonalIndices(bool upperOnly) const
1528 {
1529 IndexVector ret;
1530 // only store non-negative indices if requested
1531 if (upperOnly)
1532 ret.resize(5);
1533 else
1534 ret.resize(9);
1535 const dim_t nDOF0 = getNumDOFInAxis(0);
1536 size_t idx = 0;
1537 for (int i1=-1; i1<2; i1++) {
1538 for (int i0=-1; i0<2; i0++) {
1539 const int index = i1*nDOF0 + i0;
1540 if (!upperOnly || index >= 0)
1541 ret[idx++] = index;
1542 }
1543 }
1544
1545 return ret;
1546 }
1547
1548 //protected
1549 void Rectangle::nodesToDOF(escript::Data& out, const escript::Data& in) const
1550 {
1551 const dim_t numComp = in.getDataPointSize();
1552 out.requireWrite();
1553
1554 const index_t left = getFirstInDim(0);
1555 const index_t bottom = getFirstInDim(1);
1556 const dim_t nDOF0 = getNumDOFInAxis(0);
1557 const dim_t nDOF1 = getNumDOFInAxis(1);
1558 #pragma omp parallel for
1559 for (index_t i=0; i<nDOF1; i++) {
1560 for (index_t j=0; j<nDOF0; j++) {
1561 const index_t n=j+left+(i+bottom)*m_NN[0];
1562 const double* src=in.getSampleDataRO(n);
1563 copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0));
1564 }
1565 }
1566 }
1567
1568 //protected
1569 void Rectangle::dofToNodes(escript::Data& out, const escript::Data& in) const
1570 {
1571 const dim_t numComp = in.getDataPointSize();
1572 paso::Coupler_ptr coupler(new paso::Coupler(m_connector, numComp));
1573 // expand data object if necessary to be able to grab the whole data
1574 const_cast<escript::Data*>(&in)->expand();
1575 coupler->startCollect(in.getDataRO());
1576
1577 const dim_t numDOF = getNumDOF();
1578 const dim_t numNodes = getNumNodes();
1579 out.requireWrite();
1580 const double* buffer = coupler->finishCollect();
1581
1582 #pragma omp parallel for
1583 for (index_t i=0; i < numNodes; i++) {
1584 const double* src=(m_dofMap[i]<numDOF ?
1585 in.getSampleDataRO(m_dofMap[i])
1586 : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1587 copy(src, src+numComp, out.getSampleDataRW(i));
1588 }
1589 }
1590
1591 #ifdef ESYS_HAVE_TRILINOS
1592 //protected
1593 esys_trilinos::const_TrilinosGraph_ptr Rectangle::getTrilinosGraph() const
1594 {
1595 if (m_graph.is_null()) {
1596 m_graph = createTrilinosGraph(m_dofId, m_nodeId);
1597 }
1598 return m_graph;
1599 }
1600 #endif
1601
1602 //protected
1603 paso::SystemMatrixPattern_ptr Rectangle::getPasoMatrixPattern(
1604 bool reducedRowOrder,
1605 bool reducedColOrder) const
1606 {
1607 if (m_pattern.get())
1608 return m_pattern;
1609
1610 // first call - create pattern, then return
1611 const dim_t numDOF = getNumDOF();
1612 const dim_t numShared = m_connector->send->numSharedComponents;
1613 const dim_t numNeighbours = m_connector->send->numNeighbors;
1614 const index_t* offsetInShared = m_connector->send->offsetInShared;
1615 const index_t* sendShared = m_connector->send->shared;
1616
1617 // these are for the couple blocks
1618 vector<IndexVector> colIndices(numDOF);
1619 vector<IndexVector> rowIndices(numShared);
1620
1621 for (dim_t i=0; i<numNeighbours; i++) {
1622 const dim_t start = offsetInShared[i];
1623 const dim_t end = offsetInShared[i+1];
1624 for (dim_t j = start; j < end; j++) {
1625 if (j > start)
1626 doublyLink(colIndices, rowIndices, sendShared[j-1], j);
1627 doublyLink(colIndices, rowIndices, sendShared[j], j);
1628 if (j < end-1)
1629 doublyLink(colIndices, rowIndices, sendShared[j+1], j);
1630 }
1631 }
1632 #pragma omp parallel for
1633 for (dim_t i = 0; i < numShared; i++) {
1634 sort(rowIndices[i].begin(), rowIndices[i].end());
1635 }
1636
1637 // create main and couple blocks
1638 paso::Pattern_ptr mainPattern = createPasoPattern(getConnections(), numDOF);
1639 paso::Pattern_ptr colPattern = createPasoPattern(colIndices, numShared);
1640 paso::Pattern_ptr rowPattern = createPasoPattern(rowIndices, numDOF);
1641
1642 // allocate paso distribution
1643 paso::Distribution_ptr distribution(new paso::Distribution(m_mpiInfo,
1644 m_nodeDistribution, 1, 0));
1645
1646 // finally create the system matrix pattern
1647 m_pattern.reset(new paso::SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
1648 distribution, distribution, mainPattern, colPattern, rowPattern,
1649 m_connector, m_connector));
1650 return m_pattern;
1651 }
1652
1653 //private
1654 void Rectangle::populateSampleIds()
1655 {
1656 // degrees of freedom are numbered from left to right, bottom to top in
1657 // each rank, continuing on the next rank (ranks also go left-right,
1658 // bottom-top).
1659 // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which
1660 // helps when writing out data rank after rank.
1661
1662 // build node distribution vector first.
1663 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is
1664 // constant for all ranks in this implementation
1665 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1666 const dim_t numDOF=getNumDOF();
1667 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1668 m_nodeDistribution[k]=k*numDOF;
1669 }
1670 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1671 try {
1672 m_nodeId.resize(getNumNodes());
1673 m_dofId.resize(numDOF);
1674 m_elementId.resize(getNumElements());
1675 } catch (const std::length_error& le) {
1676 throw RipleyException("The system does not have sufficient memory for a domain of this size.");
1677 }
1678
1679 // populate face element counts
1680 //left
1681 if (m_offset[0]==0)
1682 m_faceCount[0]=m_NE[1];
1683 else
1684 m_faceCount[0]=0;
1685 //right
1686 if (m_mpiInfo->rank%m_NX[0]==m_NX[0]-1)
1687 m_faceCount[1]=m_NE[1];
1688 else
1689 m_faceCount[1]=0;
1690 //bottom
1691 if (m_offset[1]==0)
1692 m_faceCount[2]=m_NE[0];
1693 else
1694 m_faceCount[2]=0;
1695 //top
1696 if (m_mpiInfo->rank/m_NX[0]==m_NX[1]-1)
1697 m_faceCount[3]=m_NE[0];
1698 else
1699 m_faceCount[3]=0;
1700
1701 const dim_t NFE = getNumFaceElements();
1702 m_faceId.resize(NFE);
1703
1704 const index_t left = getFirstInDim(0);
1705 const index_t bottom = getFirstInDim(1);
1706 const dim_t nDOF0 = getNumDOFInAxis(0);
1707 const dim_t nDOF1 = getNumDOFInAxis(1);
1708 const dim_t NE0 = m_NE[0];
1709 const dim_t NE1 = m_NE[1];
1710
1711 #define globalNodeId(x,y) \
1712 ((m_offset[0]+x)/nDOF0)*nDOF0*nDOF1+(m_offset[0]+x)%nDOF0 \
1713 + ((m_offset[1]+y)/nDOF1)*nDOF0*nDOF1*m_NX[0]+((m_offset[1]+y)%nDOF1)*nDOF0
1714
1715 // set corner id's outside the parallel region
1716 m_nodeId[0] = globalNodeId(0, 0);
1717 m_nodeId[m_NN[0]-1] = globalNodeId(m_NN[0]-1, 0);
1718 m_nodeId[m_NN[0]*(m_NN[1]-1)] = globalNodeId(0, m_NN[1]-1);
1719 m_nodeId[m_NN[0]*m_NN[1]-1] = globalNodeId(m_NN[0]-1,m_NN[1]-1);
1720 #undef globalNodeId
1721
1722 #pragma omp parallel
1723 {
1724 // populate degrees of freedom and own nodes (identical id)
1725 #pragma omp for nowait
1726 for (dim_t i=0; i<nDOF1; i++) {
1727 for (dim_t j=0; j<nDOF0; j++) {
1728 const index_t nodeIdx=j+left+(i+bottom)*m_NN[0];
1729 const index_t dofIdx=j+i*nDOF0;
1730 m_dofId[dofIdx] = m_nodeId[nodeIdx]
1731 = m_nodeDistribution[m_mpiInfo->rank]+dofIdx;
1732 }
1733 }
1734
1735 // populate the rest of the nodes (shared with other ranks)
1736 if (m_faceCount[0]==0) { // left column
1737 #pragma omp for nowait
1738 for (dim_t i=0; i<nDOF1; i++) {
1739 const index_t nodeIdx=(i+bottom)*m_NN[0];
1740 const index_t dofId=(i+1)*nDOF0-1;
1741 m_nodeId[nodeIdx]
1742 = m_nodeDistribution[m_mpiInfo->rank-1]+dofId;
1743 }
1744 }
1745 if (m_faceCount[1]==0) { // right column
1746 #pragma omp for nowait
1747 for (dim_t i=0; i<nDOF1; i++) {
1748 const index_t nodeIdx=(i+bottom+1)*m_NN[0]-1;
1749 const index_t dofId=i*nDOF0;
1750 m_nodeId[nodeIdx]
1751 = m_nodeDistribution[m_mpiInfo->rank+1]+dofId;
1752 }
1753 }
1754 if (m_faceCount[2]==0) { // bottom row
1755 #pragma omp for nowait
1756 for (dim_t i=0; i<nDOF0; i++) {
1757 const index_t nodeIdx=i+left;
1758 const index_t dofId=nDOF0*(nDOF1-1)+i;
1759 m_nodeId[nodeIdx]
1760 = m_nodeDistribution[m_mpiInfo->rank-m_NX[0]]+dofId;
1761 }
1762 }
1763 if (m_faceCount[3]==0) { // top row
1764 #pragma omp for nowait
1765 for (dim_t i=0; i<nDOF0; i++) {
1766 const index_t nodeIdx=m_NN[0]*(m_NN[1]-1)+i+left;
1767 const index_t dofId=i;
1768 m_nodeId[nodeIdx]
1769 = m_nodeDistribution[m_mpiInfo->rank+m_NX[0]]+dofId;
1770 }
1771 }
1772
1773 // populate element id's
1774 #pragma omp for nowait
1775 for (dim_t i1=0; i1<NE1; i1++) {
1776 for (dim_t i0=0; i0<NE0; i0++) {
1777 m_elementId[i0+i1*NE0]=(m_offset[1]+i1)*m_gNE[0]+m_offset[0]+i0;
1778 }
1779 }
1780
1781 // face elements
1782 #pragma omp for
1783 for (dim_t k=0; k<NFE; k++)
1784 m_faceId[k]=k;
1785 } // end parallel section
1786
1787 m_nodeTags.assign(getNumNodes(), 0);
1788 updateTagsInUse(Nodes);
1789
1790 m_elementTags.assign(getNumElements(), 0);
1791 updateTagsInUse(Elements);
1792
1793 // generate face offset vector and set face tags
1794 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1795 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1796 m_faceOffset.assign(4, -1);
1797 m_faceTags.clear();
1798 index_t offset=0;
1799 for (size_t i=0; i<4; i++) {
1800 if (m_faceCount[i]>0) {
1801 m_faceOffset[i]=offset;
1802 offset+=m_faceCount[i];
1803 m_faceTags.insert(m_faceTags.end(), m_faceCount[i], faceTag[i]);
1804 }
1805 }
1806 setTagMap("left", LEFT);
1807 setTagMap("right", RIGHT);
1808 setTagMap("bottom", BOTTOM);
1809 setTagMap("top", TOP);
1810 updateTagsInUse(FaceElements);
1811
1812 populateDofMap();
1813 }
1814
1815 //private
1816 vector<IndexVector> Rectangle::getConnections(bool includeShared) const
1817 {
1818 // returns a vector v of size numDOF where v[i] is a vector with indices
1819 // of DOFs connected to i (up to 9 in 2D).
1820 // In other words this method returns the occupied (local) matrix columns
1821 // for all (local) matrix rows.
1822 // If includeShared==true then connections to non-owned DOFs are also
1823 // returned (i.e. indices of the column couplings)
1824 const dim_t nDOF0 = getNumDOFInAxis(0);
1825 const dim_t nDOF1 = getNumDOFInAxis(1);
1826 const dim_t numMatrixRows = nDOF0*nDOF1;
1827 vector<IndexVector> indices(numMatrixRows);
1828
1829 if (includeShared) {
1830 const index_t left = getFirstInDim(0);
1831 const index_t bottom = getFirstInDim(1);
1832 const dim_t NN0 = m_NN[0];
1833 const dim_t NN1 = m_NN[1];
1834 #pragma omp parallel for
1835 for (index_t i=0; i < numMatrixRows; i++) {
1836 const index_t x = left + i % nDOF0;
1837 const index_t y = bottom + i / nDOF0;
1838 // loop through potential neighbours and add to index if positions
1839 // are within bounds
1840 for (dim_t i1=y-1; i1<y+2; i1++) {
1841 for (dim_t i0=x-1; i0<x+2; i0++) {
1842 if (i0>=0 && i1>=0 && i0<NN0 && i1<NN1) {
1843 indices[i].push_back(m_dofMap[i1*NN0 + i0]);
1844 }
1845 }
1846 }
1847 sort(indices[i].begin(), indices[i].end());
1848 }
1849 } else {
1850 #pragma omp parallel for
1851 for (index_t i=0; i < numMatrixRows; i++) {
1852 const index_t x = i % nDOF0;
1853 const index_t y = i / nDOF0;
1854 // loop through potential neighbours and add to index if positions
1855 // are within bounds
1856 for (dim_t i1=y-1; i1<y+2; i1++) {
1857 for (dim_t i0=x-1; i0<x+2; i0++) {
1858 if (i0>=0 && i1>=0 && i0<nDOF0 && i1<nDOF1) {
1859 indices[i].push_back(i1*nDOF0 + i0);
1860 }
1861 }
1862 }
1863 }
1864 }
1865 return indices;
1866 }
1867
1868 //private
1869 void Rectangle::populateDofMap()
1870 {
1871 const dim_t nDOF0 = getNumDOFInAxis(0);
1872 const dim_t nDOF1 = getNumDOFInAxis(1);
1873 const index_t left = getFirstInDim(0);
1874 const index_t bottom = getFirstInDim(1);
1875
1876 // populate node->DOF mapping with own degrees of freedom.
1877 // The rest is assigned in the loop further down
1878 m_dofMap.assign(getNumNodes(), 0);
1879 #pragma omp parallel for
1880 for (index_t i=bottom; i<bottom+nDOF1; i++) {
1881 for (index_t j=left; j<left+nDOF0; j++) {
1882 m_dofMap[i*m_NN[0]+j]=(i-bottom)*nDOF0+j-left;
1883 }
1884 }
1885
1886 // build list of shared components and neighbours by looping through
1887 // all potential neighbouring ranks and checking if positions are
1888 // within bounds
1889 const dim_t numDOF=nDOF0*nDOF1;
1890 RankVector neighbour;
1891 IndexVector offsetInShared(1,0);
1892 IndexVector sendShared, recvShared;
1893 const int x=m_mpiInfo->rank%m_NX[0];
1894 const int y=m_mpiInfo->rank/m_NX[0];
1895 // numShared will contain the number of shared DOFs after the following
1896 // blocks
1897 dim_t numShared=0;
1898 // sharing bottom edge
1899 if (y > 0) {
1900 neighbour.push_back((y-1)*m_NX[0] + x);
1901 const dim_t num = nDOF0;
1902 offsetInShared.push_back(offsetInShared.back()+num);
1903 for (dim_t i=0; i<num; i++, numShared++) {
1904 sendShared.push_back(i);
1905 recvShared.push_back(numDOF+numShared);
1906 m_dofMap[left+i]=numDOF+numShared;
1907 }
1908 }
1909 // sharing top edge
1910 if (y < m_NX[1] - 1) {
1911 neighbour.push_back((y+1)*m_NX[0] + x);
1912 const dim_t num = nDOF0;
1913 offsetInShared.push_back(offsetInShared.back()+num);
1914 for (dim_t i=0; i<num; i++, numShared++) {
1915 sendShared.push_back(numDOF-num+i);
1916 recvShared.push_back(numDOF+numShared);
1917 m_dofMap[m_NN[0]*(m_NN[1]-1)+left+i]=numDOF+numShared;
1918 }
1919 }
1920 // sharing left edge
1921 if (x > 0) {
1922 neighbour.push_back(y*m_NX[0] + x-1);
1923 const dim_t num = nDOF1;
1924 offsetInShared.push_back(offsetInShared.back()+num);
1925 for (dim_t i=0; i<num; i++, numShared++) {
1926 sendShared.push_back(i*nDOF0);
1927 recvShared.push_back(numDOF+numShared);
1928 m_dofMap[(bottom+i)*m_NN[0]]=numDOF+numShared;
1929 }
1930 }
1931 // sharing right edge
1932 if (x < m_NX[0] - 1) {
1933 neighbour.push_back(y*m_NX[0] + x+1);
1934 const dim_t num = nDOF1;
1935 offsetInShared.push_back(offsetInShared.back()+num);
1936 for (dim_t i=0; i<num; i++, numShared++) {
1937 sendShared.push_back((i+1)*nDOF0-1);
1938 recvShared.push_back(numDOF+numShared);
1939 m_dofMap[(bottom+1+i)*m_NN[0]-1]=numDOF+numShared;
1940 }
1941 }
1942 // sharing bottom-left node
1943 if (x > 0 && y > 0) {
1944 neighbour.push_back((y-1)*m_NX[0] + x-1);
1945 // sharing a node
1946 offsetInShared.push_back(offsetInShared.back()+1);
1947 sendShared.push_back(0);
1948 recvShared.push_back(numDOF+numShared);
1949 m_dofMap[0]=numDOF+numShared;
1950 ++numShared;
1951 }
1952 // sharing top-left node
1953 if (x > 0 && y < m_NX[1]-1) {
1954 neighbour.push_back((y+1)*m_NX[0] + x-1);
1955 offsetInShared.push_back(offsetInShared.back()+1);
1956 sendShared.push_back(numDOF-nDOF0);
1957 recvShared.push_back(numDOF+numShared);
1958 m_dofMap[m_NN[0]*(m_NN[1]-1)]=numDOF+numShared;
1959 ++numShared;
1960 }
1961 // sharing bottom-right node
1962 if (x < m_NX[0]-1 && y > 0) {
1963 neighbour.push_back((y-1)*m_NX[0] + x+1);
1964 offsetInShared.push_back(offsetInShared.back()+1);
1965 sendShared.push_back(nDOF0-1);
1966 recvShared.push_back(numDOF+numShared);
1967 m_dofMap[m_NN[0]-1]=numDOF+numShared;
1968 ++numShared;
1969 }
1970 // sharing top-right node
1971 if (x < m_NX[0]-1 && y < m_NX[1]-1) {
1972 neighbour.push_back((y+1)*m_NX[0] + x+1);
1973 offsetInShared.push_back(offsetInShared.back()+1);
1974 sendShared.push_back(numDOF-1);
1975 recvShared.push_back(numDOF+numShared);
1976 m_dofMap[m_NN[0]*m_NN[1]-1]=numDOF+numShared;
1977 ++numShared;
1978 }
1979
1980 // TODO: paso::SharedComponents should take vectors to avoid this
1981 int* neighPtr = NULL;
1982 index_t* sendPtr = NULL;
1983 index_t* recvPtr = NULL;
1984 if (neighbour.size() > 0) {
1985 neighPtr = &neighbour[0];
1986 sendPtr = &sendShared[0];
1987 recvPtr = &recvShared[0];
1988 }
1989
1990 // create connector
1991 paso::SharedComponents_ptr snd_shcomp(new paso::SharedComponents(
1992 numDOF, neighbour.size(), neighPtr, sendPtr,
1993 &offsetInShared[0], 1, 0, m_mpiInfo));
1994 paso::SharedComponents_ptr rcv_shcomp(new paso::SharedComponents(
1995 numDOF, neighbour.size(), neighPtr, recvPtr,
1996 &offsetInShared[0], 1, 0, m_mpiInfo));
1997 m_connector.reset(new paso::Connector(snd_shcomp, rcv_shcomp));
1998
1999 // useful debug output
2000 /*
2001 std::cout << "--- rcv_shcomp ---" << std::endl;
2002 std::cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << std::endl;
2003 for (size_t i=0; i<neighbour.size(); i++) {
2004 std::cout << "neighbor[" << i << "]=" << neighbour[i]
2005 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << std::endl;
2006 }
2007 for (size_t i=0; i<recvShared.size(); i++) {
2008 std::cout << "shared[" << i << "]=" << recvShared[i] << std::endl;
2009 }
2010 std::cout << "--- snd_shcomp ---" << std::endl;
2011 for (size_t i=0; i<sendShared.size(); i++) {
2012 std::cout << "shared[" << i << "]=" << sendShared[i] << std::endl;
2013 }
2014 std::cout << "--- dofMap ---" << std::endl;
2015 for (size_t i=0; i<m_dofMap.size(); i++) {
2016 std::cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << std::endl;
2017 }
2018 */
2019 }
2020
2021 //private
2022 void Rectangle::addToMatrixAndRHS(AbstractSystemMatrix* S, escript::Data& F,
2023 const vector<double>& EM_S, const vector<double>& EM_F, bool addS,
2024 bool addF, index_t firstNode, int nEq, int nComp) const
2025 {
2026 IndexVector rowIndex(4);
2027 rowIndex[0] = m_dofMap[firstNode];
2028 rowIndex[1] = m_dofMap[firstNode+1];
2029 rowIndex[2] = m_dofMap[firstNode+m_NN[0]];
2030 rowIndex[3] = m_dofMap[firstNode+m_NN[0]+1];
2031 if (addF) {
2032 double *F_p=F.getSampleDataRW(0);
2033 for (index_t i=0; i<rowIndex.size(); i++) {
2034 if (rowIndex[i]<getNumDOF()) {
2035 for (int eq=0; eq<nEq; eq++) {
2036 F_p[INDEX2(eq, rowIndex[i], nEq)]+=EM_F[INDEX2(eq,i,nEq)];
2037 }
2038 }
2039 }
2040 }
2041 if (addS) {
2042 addToSystemMatrix(S, rowIndex, nEq, EM_S);
2043 }
2044 }
2045
2046 //protected
2047 void Rectangle::interpolateNodesOnElements(escript::Data& out,
2048 const escript::Data& in,
2049 bool reduced) const
2050 {
2051 const dim_t numComp = in.getDataPointSize();
2052 const dim_t NE0 = m_NE[0];
2053 const dim_t NE1 = m_NE[1];
2054 if (reduced) {
2055 out.requireWrite();
2056 const double c0 = 0.25;
2057 #pragma omp parallel
2058 {
2059 vector<double> f_00(numComp);
2060 vector<double> f_01(numComp);
2061 vector<double> f_10(numComp);
2062 vector<double> f_11(numComp);
2063 #pragma omp for
2064 for (index_t k1=0; k1 < NE1; ++k1) {
2065 for (index_t k0=0; k0 < NE0; ++k0) {
2066 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
2067 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
2068 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
2069 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
2070 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
2071 for (index_t i=0; i < numComp; ++i) {
2072 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
2073 } /* end of component loop i */
2074 } /* end of k0 loop */
2075 } /* end of k1 loop */
2076 } /* end of parallel section */
2077 } else {
2078 out.requireWrite();
2079 const double c0 = 0.16666666666666666667;
2080 const double c1 = 0.044658198738520451079;
2081 const double c2 = 0.62200846792814621559;
2082 #pragma omp parallel
2083 {
2084 vector<double> f_00(numComp);
2085 vector<double> f_01(numComp);
2086 vector<double> f_10(numComp);
2087 vector<double> f_11(numComp);
2088 #pragma omp for
2089 for (index_t k1=0; k1 < NE1; ++k1) {
2090 for (index_t k0=0; k0 < NE0; ++k0) {
2091 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
2092 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
2093 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
2094 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
2095 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
2096 for (index_t i=0; i < numComp; ++i) {
2097 o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_10[i]) + c1*f_11[i] + c2*f_00[i];
2098 o[INDEX2(i,numComp,1)] = c0*(f_00[i] + f_11[i]) + c1*f_01[i] + c2*f_10[i];
2099 o[INDEX2(i,numComp,2)] = c0*(f_00[i] + f_11[i]) + c1*f_10[i] + c2*f_01[i];
2100 o[INDEX2(i,numComp,3)] = c0*(f_01[i] + f_10[i]) + c1*f_00[i] + c2*f_11[i];
2101 } /* end of component loop i */
2102 } /* end of k0 loop */
2103 } /* end of k1 loop */
2104 } /* end of parallel section */
2105 }
2106 }
2107
2108 //protected
2109 void Rectangle::interpolateNodesOnFaces(escript::Data& out,
2110 const escript::Data& in,
2111 bool reduced) const
2112 {
2113 const dim_t numComp = in.getDataPointSize();
2114 const dim_t NE0 = m_NE[0];
2115 const dim_t NE1 = m_NE[1];
2116 if (reduced) {
2117 out.requireWrite();
2118 #pragma omp parallel
2119 {
2120 vector<double> f_00(numComp);
2121 vector<double> f_01(numComp);
2122 vector<double> f_10(numComp);
2123 vector<double> f_11(numComp);
2124 if (m_faceOffset[0] > -1) {
2125 #pragma omp for nowait
2126 for (index_t k1=0; k1 < NE1; ++k1) {
2127 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
2128 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
2129 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
2130 for (index_t i=0; i < numComp; ++i) {
2131 o[INDEX2(i,numComp,0)] = (f_00[i] + f_01[i])/2;
2132 } /* end of component loop i */
2133 } /* end of k1 loop */
2134 } /* end of face 0 */
2135 if (m_faceOffset[1] > -1) {
2136 #pragma omp for nowait
2137 for (index_t k1=0; k1 < NE1; ++k1) {
2138 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
2139 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
2140 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
2141 for (index_t i=0; i < numComp; ++i) {
2142 o[INDEX2(i,numComp,0)] = (f_10[i] + f_11[i])/2;
2143 } /* end of component loop i */
2144 } /* end of k1 loop */
2145 } /* end of face 1 */
2146 if (m_faceOffset[2] > -1) {
2147 #pragma omp for nowait
2148 for (index_t k0=0; k0 < NE0; ++k0) {
2149 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
2150 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
2151 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
2152 for (index_t i=0; i < numComp; ++i) {
2153 o[INDEX2(i,numComp,0)] = (f_00[i] + f_10[i])/2;
2154 } /* end of component loop i */
2155 } /* end of k0 loop */
2156 } /* end of face 2 */
2157 if (m_faceOffset[3] > -1) {
2158 #pragma omp for nowait
2159 for (index_t k0=0; k0 < NE0; ++k0) {
2160 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
2161 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
2162 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
2163 for (index_t i=0; i < numComp; ++i) {
2164 o[INDEX2(i,numComp,0)] = (f_01[i] + f_11[i])/2;
2165 } /* end of component loop i */
2166 } /* end of k0 loop */
2167 } /* end of face 3 */
2168 } /* end of parallel section */
2169 } else {
2170 out.requireWrite();
2171 const double c0 = 0.21132486540518711775;
2172 const double c1 = 0.78867513459481288225;
2173 #pragma omp parallel
2174 {
2175 vector<double> f_00(numComp);
2176 vector<double> f_01(numComp);
2177 vector<double> f_10(numComp);
2178 vector<double> f_11(numComp);
2179 if (m_faceOffset[0] > -1) {
2180 #pragma omp for nowait
2181 for (index_t k1=0; k1 < NE1; ++k1) {
2182 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
2183 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
2184 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
2185 for (index_t i=0; i < numComp; ++i) {
2186 o[INDEX2(i,numComp,0)] = c0*f_01[i] + c1*f_00[i];
2187 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_01[i];
2188 } /* end of component loop i */
2189 } /* end of k1 loop */
2190 } /* end of face 0 */
2191 if (m_faceOffset[1] > -1) {
2192 #pragma omp for nowait
2193 for (index_t k1=0; k1 < NE1; ++k1) {
2194 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
2195 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
2196 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
2197 for (index_t i=0; i < numComp; ++i) {
2198 o[INDEX2(i,numComp,0)] = c1*f_10[i] + c0*f_11[i];
2199 o[INDEX2(i,numComp,1)] = c1*f_11[i] + c0*f_10[i];
2200 } /* end of component loop i */
2201 } /* end of k1 loop */
2202 } /* end of face 1 */
2203 if (m_faceOffset[2] > -1) {
2204 #pragma omp for nowait
2205 for (index_t k0=0; k0 < NE0; ++k0) {
2206 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
2207 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
2208 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
2209 for (index_t i=0; i < numComp; ++i) {
2210 o[INDEX2(i,numComp,0)] = c0*f_10[i] + c1*f_00[i];
2211 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_10[i];
2212 } /* end of component loop i */
2213 } /* end of k0 loop */
2214 } /* end of face 2 */
2215 if (m_faceOffset[3] > -1) {
2216 #pragma omp for nowait
2217 for (index_t k0=0; k0 < NE0; ++k0) {
2218 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
2219 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
2220 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
2221 for (index_t i=0; i < numComp; ++i) {
2222 o[INDEX2(i,numComp,0)] = c0*f_11[i] + c1*f_01[i];
2223 o[INDEX2(i,numComp,1)] = c0*f_01[i] + c1*f_11[i];
2224 } /* end of component loop i */
2225 } /* end of k0 loop */
2226 } /* end of face 3 */
2227 } /* end of parallel section */
2228 }
2229 }
2230
2231
2232
2233 namespace
2234 {
2235 // Calculates a Gaussian blur convolution matrix for 2D
2236 // See wiki article on the subject
2237 double* get2DGauss(unsigned radius, double sigma)
2238 {
2239 double* arr = new double[(radius*2+1)*(radius*2+1)];
2240 const double common = M_1_PI * 0.5 / (sigma*sigma);
2241 const int r = static_cast<int>(radius);
2242 double total = 0;
2243 for (int y = -r; y <= r; ++y) {
2244 for (int x = -r; x <= r; ++x) {
2245 arr[(x+r)+(y+r)*(r*2+1)]=common*exp(-(x*x+y*y)/(2*sigma*sigma));
2246 total+=arr[(x+r)+(y+r)*(r*2+1)];
2247 }
2248 }
2249 const double invtotal = 1/total;
2250 for (size_t p=0; p<(radius*2+1)*(radius*2+1); ++p) {
2251 arr[p] *= invtotal;
2252 }
2253 return arr;
2254 }
2255
2256 // applies conv to source to get a point.
2257 // (xp, yp) are the coords in the source matrix not the destination matrix
2258 double Convolve2D(double* conv, double* source, size_t xp, size_t yp,
2259 unsigned radius, size_t width)
2260 {
2261 const size_t bx = xp-radius, by=yp-radius;
2262 const size_t sbase = bx+by*width;
2263 double result = 0;
2264 for (int y=0; y<2*radius+1; ++y) {
2265 for (int x=0; x<2*radius+1; ++x) {
2266 result += conv[x+y*(2*radius+1)] * source[sbase + x+y*width];
2267 }
2268 }
2269 return result;
2270 }
2271 }
2272
2273 /* This is a wrapper for filtered (and non-filtered) randoms
2274 * For detailed doco see randomFillWorker
2275 */
2276 escript::Data Rectangle::randomFill(const escript::DataTypes::ShapeType& shape,
2277 const escript::FunctionSpace& what, long seed,
2278 const bp::tuple& filter) const
2279 {
2280 int numvals=escript::DataTypes::noValues(shape);
2281 if (len(filter) > 0 && numvals != 1)
2282 throw NotImplementedError("Ripley only supports filters for scalar data.");
2283
2284 escript::Data res = randomFillWorker(shape, seed, filter);
2285 if (res.getFunctionSpace() != what) {
2286 escript::Data r(res, what);
2287 return r;
2288 }
2289 return res;
2290 }
2291
2292
2293 /* This routine produces a Data object filled with smoothed random data.
2294 * The dimensions of the rectangle being filled are internal[0] x internal[1]
2295 * points. A parameter radius gives the size of the stencil used for the
2296 * smoothing. A point on the left hand edge for example, will still require
2297 * `radius` extra points to the left in order to complete the stencil.
2298 *
2299 * All local calculation is done on an array called `src`, with
2300 * dimensions = ext[0] * ext[1], where ext[i]= internal[i]+2*radius.
2301 *
2302 * Now for MPI there is overlap to deal with. We need to share both the
2303 * overlapping values themselves but also the external region.
2304 *
2305 * In a hypothetical 1-D case:
2306 *
2307 * 1234567 would be split into two ranks thus:
2308 * 123(4) (4)567 [4 being a shared element]
2309 *
2310 * If the radius is 2. There will be padding elements on the outside:
2311 * pp123(4) (4)567pp
2312 *
2313 * To ensure that 4 can be correctly computed on both ranks, values from the
2314 * other rank need to be known.
2315 *
2316 * pp123(4)56 23(4)567pp
2317 *
2318 * Now in our case, we set all the values 23456 on the left rank and send them
2319 * to the right hand rank.
2320 *
2321 * So the edges _may_ need to be shared at a distance `inset` from all
2322 * boundaries.
2323 *
2324 * inset=2*radius+1
2325 * This is to ensure that values at distance `radius` from the
2326 * shared/overlapped element that ripley has.
2327 */
2328 escript::Data Rectangle::randomFillWorker(
2329 const escript::DataTypes::ShapeType& shape, long seed,
2330 const bp::tuple& filter) const
2331 {
2332 unsigned int radius=0; // these are only used by gaussian
2333 double sigma=0.5;
2334
2335 unsigned int numvals=escript::DataTypes::noValues(shape);
2336
2337 if (len(filter) == 0) {
2338 // nothing special required here yet
2339 } else if (len(filter) == 3) {
2340 bp::extract<string> ex(filter[0]);
2341 if (!ex.check() || (ex()!="gaussian")) {
2342 throw ValueError("Unsupported random filter");
2343 }
2344 bp::extract<unsigned int> ex1(filter[1]);
2345 if (!ex1.check()) {
2346 throw ValueError("Radius of Gaussian filter must be a positive integer.");
2347 }
2348 radius = ex1();
2349 sigma = 0.5;
2350 bp::extract<double> ex2(filter[2]);
2351 if (!ex2.check() || (sigma=ex2()) <= 0) {
2352 throw ValueError("Sigma must be a positive floating point number.");
2353 }
2354 } else {
2355 throw ValueError("Unsupported random filter for Rectangle.");
2356 }
2357
2358 // number of points in the internal region
2359 // that is, the ones we need smoothed versions of
2360 const dim_t internal[2] = { m_NN[0], m_NN[1] };
2361 size_t ext[2];
2362 ext[0]=(size_t)internal[0]+2*radius; // includes points we need as input
2363 ext[1]=(size_t)internal[1]+2*radius; // for smoothing
2364
2365 // now we check to see if the radius is acceptable
2366 // That is, would not cross multiple ranks in MPI
2367
2368 if (2*radius >= internal[0]-4) {
2369 throw ValueError("Radius of gaussian filter is too large for X dimension of a rank");
2370 }
2371 if (2*radius >= internal[1]-4) {
2372 throw ValueError("Radius of gaussian filter is too large for Y dimension of a rank");
2373 }
2374
2375 double* src = new double[ext[0]*ext[1]*numvals];
2376 escript::randomFillArray(seed, src, ext[0]*ext[1]*numvals);
2377
2378 #ifdef ESYS_MPI
2379 if ((internal[0] < 5) || (internal[1] < 5)) {
2380 // since the dimensions are equal for all ranks, this exception
2381 // will be thrown on all ranks
2382 throw RipleyException("Random Data in Ripley requires at least five elements per side per rank.");
2383 }
2384 dim_t X = m_mpiInfo->rank%m_NX[0];
2385 dim_t Y = m_mpiInfo->rank/m_NX[0];
2386 #endif
2387
2388 /*
2389 // if we wanted to test a repeating pattern
2390 size_t basex=0;
2391 size_t basey=0;
2392 #ifdef ESYS_MPI
2393 basex=X*m_gNE[0]/m_NX[0];
2394 basey=Y*m_gNE[1]/m_NX[1];
2395 #endif
2396
2397 escript::patternFillArray2D(ext[0], ext[1], src, 4, basex, basey, numvals);
2398 */
2399
2400 #ifdef ESYS_MPI
2401 BlockGrid2 grid(m_NX[0]-1, m_NX[1]-1);
2402 // it's +2 not +1 because a whole element is shared (and hence there is
2403 // an overlap of two points both of which need to have "radius" points on
2404 // either side.
2405 size_t inset=2*radius+2;
2406
2407 // how wide is the x-dimension between the two insets
2408 size_t xmidlen=ext[0]-2*inset;
2409 size_t ymidlen=ext[1]-2*inset;
2410
2411 Block2 block(ext[0], ext[1], inset, xmidlen, ymidlen, numvals);
2412
2413 // a non-tight upper bound on how many we need
2414 MPI_Request reqs[40];
2415 MPI_Status stats[40];
2416 short rused=0;
2417
2418 messvec incoms;
2419 messvec outcoms;
2420
2421 grid.generateInNeighbours(X, Y, incoms);
2422 grid.generateOutNeighbours(X, Y, outcoms);
2423
2424 block.copyAllToBuffer(src);
2425
2426 int comserr = 0;
2427 for (size_t