/[escript]/trunk/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /trunk/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4851 - (show annotations)
Wed Apr 9 03:30:36 2014 UTC (5 years ago) by sshaw
File size: 93752 byte(s)
added exceptions to ripley for more informative deaths to negative element counts
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <algorithm>
18
19 #include <ripley/Rectangle.h>
20 #include <paso/SystemMatrix.h>
21 #include <esysUtils/esysFileWriter.h>
22 #include <ripley/DefaultAssembler2D.h>
23 #include <ripley/WaveAssembler2D.h>
24 #include <ripley/LameAssembler2D.h>
25 #include <ripley/domainhelpers.h>
26 #include <boost/scoped_array.hpp>
27 #include "esysUtils/EsysRandom.h"
28 #include "blocktools.h"
29
30 #ifdef USE_NETCDF
31 #include <netcdfcpp.h>
32 #endif
33
34 #if USE_SILO
35 #include <silo.h>
36 #ifdef ESYS_MPI
37 #include <pmpio.h>
38 #endif
39 #endif
40
41 #include <iomanip>
42
43 using namespace std;
44 using esysUtils::FileWriter;
45
46 namespace ripley {
47
48 Rectangle::Rectangle(int n0, int n1, double x0, double y0, double x1,
49 double y1, int d0, int d1,
50 const std::vector<double>& points,
51 const std::vector<int>& tags,
52 const simap_t& tagnamestonums) :
53 RipleyDomain(2)
54 {
55 if (n0 <= 0 || n1 <= 0)
56 throw RipleyException("Number of elements in each spatial dimension "
57 "must be positive");
58
59 // ignore subdivision parameters for serial run
60 if (m_mpiInfo->size == 1) {
61 d0=1;
62 d1=1;
63 }
64
65 bool warn=false;
66 std::vector<int> factors;
67 int ranks = m_mpiInfo->size;
68 int epr[2] = {n0,n1};
69 int d[2] = {d0,d1};
70 if (d0<=0 || d1<=0) {
71 for (int i = 0; i < 2; i++) {
72 if (d[i] < 1) {
73 d[i] = 1;
74 continue;
75 }
76 epr[i] = -1; // can no longer be max
77 //remove
78 if (ranks % d[i] != 0) {
79 throw RipleyException("Invalid number of spatial subdivisions");
80 }
81 ranks /= d[i];
82 }
83 factorise(factors, ranks);
84 if (factors.size() != 0) {
85 warn = true;
86 }
87 }
88
89 while (factors.size() > 0) {
90 int i = epr[0] > epr[1] ? 0 : 1;
91 int f = factors.back();
92 factors.pop_back();
93 d[i] *= f;
94 epr[i] /= f;
95 }
96 d0 = d[0]; d1 = d[1];
97
98 // ensure number of subdivisions is valid and nodes can be distributed
99 // among number of ranks
100 if (d0*d1 != m_mpiInfo->size)
101 throw RipleyException("Invalid number of spatial subdivisions");
102
103 if (warn) {
104 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
105 << d1 << "). This may not be optimal!" << endl;
106 }
107
108 double l0 = x1-x0;
109 double l1 = y1-y0;
110 m_dx[0] = l0/n0;
111 m_dx[1] = l1/n1;
112
113 if ((n0+1)%d0 > 0) {
114 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
115 l0=m_dx[0]*n0;
116 cout << "Warning: Adjusted number of elements and length. N0="
117 << n0 << ", l0=" << l0 << endl;
118 }
119 if ((n1+1)%d1 > 0) {
120 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
121 l1=m_dx[1]*n1;
122 cout << "Warning: Adjusted number of elements and length. N1="
123 << n1 << ", l1=" << l1 << endl;
124 }
125
126 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2))
127 throw RipleyException("Too few elements for the number of ranks");
128
129 m_gNE[0] = n0;
130 m_gNE[1] = n1;
131 m_origin[0] = x0;
132 m_origin[1] = y0;
133 m_length[0] = l0;
134 m_length[1] = l1;
135 m_NX[0] = d0;
136 m_NX[1] = d1;
137
138 // local number of elements (with and without overlap)
139 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
140 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
141 m_NE[0]++;
142 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
143 m_ownNE[0]--;
144
145 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
146 if (m_mpiInfo->rank/d0>0 && m_mpiInfo->rank/d0<d1-1)
147 m_NE[1]++;
148 else if (d1>1 && m_mpiInfo->rank/d0==d1-1)
149 m_ownNE[1]--;
150
151 // local number of nodes
152 m_NN[0] = m_NE[0]+1;
153 m_NN[1] = m_NE[1]+1;
154
155 // bottom-left node is at (offset0,offset1) in global mesh
156 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
157 if (m_offset[0] > 0)
158 m_offset[0]--;
159 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank/d0);
160 if (m_offset[1] > 0)
161 m_offset[1]--;
162
163 populateSampleIds();
164 createPattern();
165 assembler = new DefaultAssembler2D(this, m_dx, m_NX, m_NE, m_NN);
166 for (map<string, int>::const_iterator i = tagnamestonums.begin();
167 i != tagnamestonums.end(); i++) {
168 setTagMap(i->first, i->second);
169 }
170 addPoints(tags.size(), &points[0], &tags[0]);
171 }
172
173 Rectangle::~Rectangle()
174 {
175 delete assembler;
176 }
177
178 string Rectangle::getDescription() const
179 {
180 return "ripley::Rectangle";
181 }
182
183 bool Rectangle::operator==(const AbstractDomain& other) const
184 {
185 const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
186 if (o) {
187 return (RipleyDomain::operator==(other) &&
188 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1]
189 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1]
190 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1]
191 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1]);
192 }
193
194 return false;
195 }
196
197 void Rectangle::readNcGrid(escript::Data& out, string filename, string varname,
198 const ReaderParameters& params) const
199 {
200 #ifdef USE_NETCDF
201 // check destination function space
202 int myN0, myN1;
203 if (out.getFunctionSpace().getTypeCode() == Nodes) {
204 myN0 = m_NN[0];
205 myN1 = m_NN[1];
206 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
207 out.getFunctionSpace().getTypeCode() == ReducedElements) {
208 myN0 = m_NE[0];
209 myN1 = m_NE[1];
210 } else
211 throw RipleyException("readNcGrid(): invalid function space for output data object");
212
213 if (params.first.size() != 2)
214 throw RipleyException("readNcGrid(): argument 'first' must have 2 entries");
215
216 if (params.numValues.size() != 2)
217 throw RipleyException("readNcGrid(): argument 'numValues' must have 2 entries");
218
219 if (params.multiplier.size() != 2)
220 throw RipleyException("readNcGrid(): argument 'multiplier' must have 2 entries");
221 for (size_t i=0; i<params.multiplier.size(); i++)
222 if (params.multiplier[i]<1)
223 throw RipleyException("readNcGrid(): all multipliers must be positive");
224 if (params.reverse.size() != 2)
225 throw RipleyException("readNcGrid(): argument 'reverse' must have 2 entries");
226
227 // check file existence and size
228 NcFile f(filename.c_str(), NcFile::ReadOnly);
229 if (!f.is_valid())
230 throw RipleyException("readNcGrid(): cannot open file");
231
232 NcVar* var = f.get_var(varname.c_str());
233 if (!var)
234 throw RipleyException("readNcGrid(): invalid variable");
235
236 // TODO: rank>0 data support
237 const int numComp = out.getDataPointSize();
238 if (numComp > 1)
239 throw RipleyException("readNcGrid(): only scalar data supported");
240
241 const int dims = var->num_dims();
242 boost::scoped_array<long> edges(var->edges());
243
244 // is this a slice of the data object (dims!=2)?
245 // note the expected ordering of edges (as in numpy: y,x)
246 if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1]))
247 || (dims==1 && params.numValues[1]>1) ) {
248 throw RipleyException("readNcGrid(): not enough data in file");
249 }
250
251 // check if this rank contributes anything
252 if (params.first[0] >= m_offset[0]+myN0 ||
253 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
254 params.first[1] >= m_offset[1]+myN1 ||
255 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1])
256 return;
257
258 // now determine how much this rank has to write
259
260 // first coordinates in data object to write to
261 const int first0 = max(0, params.first[0]-m_offset[0]);
262 const int first1 = max(0, params.first[1]-m_offset[1]);
263 // indices to first value in file (not accounting for reverse yet)
264 int idx0 = max(0, m_offset[0]-params.first[0]);
265 int idx1 = max(0, m_offset[1]-params.first[1]);
266 // number of values to read
267 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
268 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
269
270 // make sure we read the right block if going backwards through file
271 if (params.reverse[0])
272 idx0 = edges[dims-1]-num0-idx0;
273 if (dims>1 && params.reverse[1])
274 idx1 = edges[dims-2]-num1-idx1;
275
276 vector<double> values(num0*num1);
277 if (dims==2) {
278 var->set_cur(idx1, idx0);
279 var->get(&values[0], num1, num0);
280 } else {
281 var->set_cur(idx0);
282 var->get(&values[0], num0);
283 }
284
285 const int dpp = out.getNumDataPointsPerSample();
286 out.requireWrite();
287
288 // helpers for reversing
289 const int x0 = (params.reverse[0] ? num0-1 : 0);
290 const int x_mult = (params.reverse[0] ? -1 : 1);
291 const int y0 = (params.reverse[1] ? num1-1 : 0);
292 const int y_mult = (params.reverse[1] ? -1 : 1);
293
294 for (index_t y=0; y<num1; y++) {
295 #pragma omp parallel for
296 for (index_t x=0; x<num0; x++) {
297 const int baseIndex = first0+x*params.multiplier[0]
298 +(first1+y*params.multiplier[1])*myN0;
299 const int srcIndex = (y0+y_mult*y)*num0+(x0+x_mult*x);
300 if (!isnan(values[srcIndex])) {
301 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
302 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
303 const int dataIndex = baseIndex+m0+m1*myN0;
304 double* dest = out.getSampleDataRW(dataIndex);
305 for (index_t q=0; q<dpp; q++) {
306 *dest++ = values[srcIndex];
307 }
308 }
309 }
310 }
311 }
312 }
313 #else
314 throw RipleyException("readNcGrid(): not compiled with netCDF support");
315 #endif
316 }
317
318 void Rectangle::readBinaryGrid(escript::Data& out, string filename,
319 const ReaderParameters& params) const
320 {
321 // the mapping is not universally correct but should work on our
322 // supported platforms
323 switch (params.dataType) {
324 case DATATYPE_INT32:
325 readBinaryGridImpl<int>(out, filename, params);
326 break;
327 case DATATYPE_FLOAT32:
328 readBinaryGridImpl<float>(out, filename, params);
329 break;
330 case DATATYPE_FLOAT64:
331 readBinaryGridImpl<double>(out, filename, params);
332 break;
333 default:
334 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
335 }
336 }
337
338 #ifdef USE_BOOSTIO
339 void Rectangle::readBinaryGridFromZipped(escript::Data& out, string filename,
340 const ReaderParameters& params) const
341 {
342 // the mapping is not universally correct but should work on our
343 // supported platforms
344 switch (params.dataType) {
345 case DATATYPE_INT32:
346 readBinaryGridZippedImpl<int>(out, filename, params);
347 break;
348 case DATATYPE_FLOAT32:
349 readBinaryGridZippedImpl<float>(out, filename, params);
350 break;
351 case DATATYPE_FLOAT64:
352 readBinaryGridZippedImpl<double>(out, filename, params);
353 break;
354 default:
355 throw RipleyException("readBinaryGridFromZipped(): invalid or unsupported datatype");
356 }
357 }
358 #endif
359
360 template<typename ValueType>
361 void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename,
362 const ReaderParameters& params) const
363 {
364 // check destination function space
365 int myN0, myN1;
366 if (out.getFunctionSpace().getTypeCode() == Nodes) {
367 myN0 = m_NN[0];
368 myN1 = m_NN[1];
369 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
370 out.getFunctionSpace().getTypeCode() == ReducedElements) {
371 myN0 = m_NE[0];
372 myN1 = m_NE[1];
373 } else
374 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
375
376 // check file existence and size
377 ifstream f(filename.c_str(), ifstream::binary);
378 if (f.fail()) {
379 throw RipleyException("readBinaryGrid(): cannot open file");
380 }
381 f.seekg(0, ios::end);
382 const int numComp = out.getDataPointSize();
383 const int filesize = f.tellg();
384 const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
385 if (filesize < reqsize) {
386 f.close();
387 throw RipleyException("readBinaryGrid(): not enough data in file");
388 }
389
390 // check if this rank contributes anything
391 if (params.first[0] >= m_offset[0]+myN0 ||
392 params.first[0]+params.numValues[0] <= m_offset[0] ||
393 params.first[1] >= m_offset[1]+myN1 ||
394 params.first[1]+params.numValues[1] <= m_offset[1]) {
395 f.close();
396 return;
397 }
398
399 // now determine how much this rank has to write
400
401 // first coordinates in data object to write to
402 const int first0 = max(0, params.first[0]-m_offset[0]);
403 const int first1 = max(0, params.first[1]-m_offset[1]);
404 // indices to first value in file
405 const int idx0 = max(0, m_offset[0]-params.first[0]);
406 const int idx1 = max(0, m_offset[1]-params.first[1]);
407 // number of values to read
408 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
409 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
410
411 out.requireWrite();
412 vector<ValueType> values(num0*numComp);
413 const int dpp = out.getNumDataPointsPerSample();
414
415 for (int y=0; y<num1; y++) {
416 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
417 f.seekg(fileofs*sizeof(ValueType));
418 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
419 for (int x=0; x<num0; x++) {
420 const int baseIndex = first0+x*params.multiplier[0]
421 +(first1+y*params.multiplier[1])*myN0;
422 for (int m1=0; m1<params.multiplier[1]; m1++) {
423 for (int m0=0; m0<params.multiplier[0]; m0++) {
424 const int dataIndex = baseIndex+m0+m1*myN0;
425 double* dest = out.getSampleDataRW(dataIndex);
426 for (int c=0; c<numComp; c++) {
427 ValueType val = values[x*numComp+c];
428
429 if (params.byteOrder != BYTEORDER_NATIVE) {
430 char* cval = reinterpret_cast<char*>(&val);
431 // this will alter val!!
432 byte_swap32(cval);
433 }
434 if (!std::isnan(val)) {
435 for (int q=0; q<dpp; q++) {
436 *dest++ = static_cast<double>(val);
437 }
438 }
439 }
440 }
441 }
442 }
443 }
444
445 f.close();
446 }
447
448 #ifdef USE_BOOSTIO
449 template<typename ValueType>
450 void Rectangle::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
451 const ReaderParameters& params) const
452 {
453 // check destination function space
454 int myN0, myN1;
455 if (out.getFunctionSpace().getTypeCode() == Nodes) {
456 myN0 = m_NN[0];
457 myN1 = m_NN[1];
458 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
459 out.getFunctionSpace().getTypeCode() == ReducedElements) {
460 myN0 = m_NE[0];
461 myN1 = m_NE[1];
462 } else
463 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
464
465 // check file existence and size
466 ifstream f(filename.c_str(), ifstream::binary);
467 if (f.fail()) {
468 throw RipleyException("readBinaryGridFromZipped(): cannot open file");
469 }
470 f.seekg(0, ios::end);
471 const int numComp = out.getDataPointSize();
472 int filesize = f.tellg();
473 f.seekg(0, ios::beg);
474 std::vector<char> compressed(filesize);
475 f.read((char*)&compressed[0], filesize);
476 f.close();
477 std::vector<char> decompressed = unzip(compressed);
478 filesize = decompressed.size();
479 const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
480 if (filesize < reqsize) {
481 throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
482 }
483
484 // check if this rank contributes anything
485 if (params.first[0] >= m_offset[0]+myN0 ||
486 params.first[0]+params.numValues[0] <= m_offset[0] ||
487 params.first[1] >= m_offset[1]+myN1 ||
488 params.first[1]+params.numValues[1] <= m_offset[1]) {
489 f.close();
490 return;
491 }
492
493 // now determine how much this rank has to write
494
495 // first coordinates in data object to write to
496 const int first0 = max(0, params.first[0]-m_offset[0]);
497 const int first1 = max(0, params.first[1]-m_offset[1]);
498 // indices to first value in file
499 const int idx0 = max(0, m_offset[0]-params.first[0]);
500 const int idx1 = max(0, m_offset[1]-params.first[1]);
501 // number of values to read
502 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
503 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
504
505 out.requireWrite();
506 vector<ValueType> values(num0*numComp);
507 const int dpp = out.getNumDataPointsPerSample();
508
509 for (int y=0; y<num1; y++) {
510 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
511 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
512 for (int x=0; x<num0; x++) {
513 const int baseIndex = first0+x*params.multiplier[0]
514 +(first1+y*params.multiplier[1])*myN0;
515 for (int m1=0; m1<params.multiplier[1]; m1++) {
516 for (int m0=0; m0<params.multiplier[0]; m0++) {
517 const int dataIndex = baseIndex+m0+m1*myN0;
518 double* dest = out.getSampleDataRW(dataIndex);
519 for (int c=0; c<numComp; c++) {
520 ValueType val = values[x*numComp+c];
521
522 if (params.byteOrder != BYTEORDER_NATIVE) {
523 char* cval = reinterpret_cast<char*>(&val);
524 // this will alter val!!
525 byte_swap32(cval);
526 }
527 if (!std::isnan(val)) {
528 for (int q=0; q<dpp; q++) {
529 *dest++ = static_cast<double>(val);
530 }
531 }
532 }
533 }
534 }
535 }
536 }
537
538 f.close();
539 }
540 #endif
541
542 void Rectangle::writeBinaryGrid(const escript::Data& in, string filename,
543 int byteOrder, int dataType) const
544 {
545 // the mapping is not universally correct but should work on our
546 // supported platforms
547 switch (dataType) {
548 case DATATYPE_INT32:
549 writeBinaryGridImpl<int>(in, filename, byteOrder);
550 break;
551 case DATATYPE_FLOAT32:
552 writeBinaryGridImpl<float>(in, filename, byteOrder);
553 break;
554 case DATATYPE_FLOAT64:
555 writeBinaryGridImpl<double>(in, filename, byteOrder);
556 break;
557 default:
558 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
559 }
560 }
561
562 template<typename ValueType>
563 void Rectangle::writeBinaryGridImpl(const escript::Data& in,
564 const string& filename, int byteOrder) const
565 {
566 // check function space and determine number of points
567 int myN0, myN1;
568 int totalN0, totalN1;
569 if (in.getFunctionSpace().getTypeCode() == Nodes) {
570 myN0 = m_NN[0];
571 myN1 = m_NN[1];
572 totalN0 = m_gNE[0]+1;
573 totalN1 = m_gNE[1]+1;
574 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
575 in.getFunctionSpace().getTypeCode() == ReducedElements) {
576 myN0 = m_NE[0];
577 myN1 = m_NE[1];
578 totalN0 = m_gNE[0];
579 totalN1 = m_gNE[1];
580 } else
581 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
582
583 const int numComp = in.getDataPointSize();
584 const int dpp = in.getNumDataPointsPerSample();
585
586 if (numComp > 1 || dpp > 1)
587 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
588
589 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1;
590
591 // from here on we know that each sample consists of one value
592 FileWriter fw;
593 fw.openFile(filename, fileSize);
594 MPIBarrier();
595
596 for (index_t y=0; y<myN1; y++) {
597 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0)*sizeof(ValueType);
598 ostringstream oss;
599
600 for (index_t x=0; x<myN0; x++) {
601 const double* sample = in.getSampleDataRO(y*myN0+x);
602 ValueType fvalue = static_cast<ValueType>(*sample);
603 if (byteOrder == BYTEORDER_NATIVE) {
604 oss.write((char*)&fvalue, sizeof(fvalue));
605 } else {
606 char* value = reinterpret_cast<char*>(&fvalue);
607 oss.write(byte_swap32(value), sizeof(fvalue));
608 }
609 }
610 fw.writeAt(oss, fileofs);
611 }
612 fw.close();
613 }
614
615 void Rectangle::dump(const string& fileName) const
616 {
617 #if USE_SILO
618 string fn(fileName);
619 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
620 fn+=".silo";
621 }
622
623 int driver=DB_HDF5;
624 DBfile* dbfile = NULL;
625 const char* blockDirFmt = "/block%04d";
626
627 #ifdef ESYS_MPI
628 PMPIO_baton_t* baton = NULL;
629 const int NUM_SILO_FILES = 1;
630 #endif
631
632 if (m_mpiInfo->size > 1) {
633 #ifdef ESYS_MPI
634 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
635 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
636 PMPIO_DefaultClose, (void*)&driver);
637 // try the fallback driver in case of error
638 if (!baton && driver != DB_PDB) {
639 driver = DB_PDB;
640 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
641 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
642 PMPIO_DefaultClose, (void*)&driver);
643 }
644 if (baton) {
645 char siloPath[64];
646 snprintf(siloPath, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
647 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath);
648 }
649 #endif
650 } else {
651 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
652 getDescription().c_str(), driver);
653 // try the fallback driver in case of error
654 if (!dbfile && driver != DB_PDB) {
655 driver = DB_PDB;
656 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
657 getDescription().c_str(), driver);
658 }
659 char siloPath[64];
660 snprintf(siloPath, 64, blockDirFmt, 0);
661 DBMkDir(dbfile, siloPath);
662 DBSetDir(dbfile, siloPath);
663 }
664
665 if (!dbfile)
666 throw RipleyException("dump: Could not create Silo file");
667
668 /*
669 if (driver==DB_HDF5) {
670 // gzip level 1 already provides good compression with minimal
671 // performance penalty. Some tests showed that gzip levels >3 performed
672 // rather badly on escript data both in terms of time and space
673 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
674 }
675 */
676
677 boost::scoped_ptr<double> x(new double[m_NN[0]]);
678 boost::scoped_ptr<double> y(new double[m_NN[1]]);
679 double* coords[2] = { x.get(), y.get() };
680 #pragma omp parallel
681 {
682 #pragma omp for nowait
683 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
684 coords[0][i0]=getLocalCoordinate(i0, 0);
685 }
686 #pragma omp for nowait
687 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
688 coords[1][i1]=getLocalCoordinate(i1, 1);
689 }
690 }
691 int* dims = const_cast<int*>(getNumNodesPerDim());
692
693 // write mesh
694 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 2, DB_DOUBLE,
695 DB_COLLINEAR, NULL);
696
697 // write node ids
698 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 2,
699 NULL, 0, DB_INT, DB_NODECENT, NULL);
700
701 // write element ids
702 dims = const_cast<int*>(getNumElementsPerDim());
703 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
704 dims, 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
705
706 // rank 0 writes multimesh and multivar
707 if (m_mpiInfo->rank == 0) {
708 vector<string> tempstrings;
709 vector<char*> names;
710 for (dim_t i=0; i<m_mpiInfo->size; i++) {
711 stringstream path;
712 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
713 tempstrings.push_back(path.str());
714 names.push_back((char*)tempstrings.back().c_str());
715 }
716 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
717 DBSetDir(dbfile, "/");
718 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
719 &types[0], NULL);
720 tempstrings.clear();
721 names.clear();
722 for (dim_t i=0; i<m_mpiInfo->size; i++) {
723 stringstream path;
724 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
725 tempstrings.push_back(path.str());
726 names.push_back((char*)tempstrings.back().c_str());
727 }
728 types.assign(m_mpiInfo->size, DB_QUADVAR);
729 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
730 &types[0], NULL);
731 tempstrings.clear();
732 names.clear();
733 for (dim_t i=0; i<m_mpiInfo->size; i++) {
734 stringstream path;
735 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
736 tempstrings.push_back(path.str());
737 names.push_back((char*)tempstrings.back().c_str());
738 }
739 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
740 &types[0], NULL);
741 }
742
743 if (m_mpiInfo->size > 1) {
744 #ifdef ESYS_MPI
745 PMPIO_HandOffBaton(baton, dbfile);
746 PMPIO_Finish(baton);
747 #endif
748 } else {
749 DBClose(dbfile);
750 }
751
752 #else // USE_SILO
753 throw RipleyException("dump: no Silo support");
754 #endif
755 }
756
757 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
758 {
759 switch (fsType) {
760 case Nodes:
761 case ReducedNodes: // FIXME: reduced
762 return &m_nodeId[0];
763 case DegreesOfFreedom:
764 case ReducedDegreesOfFreedom: // FIXME: reduced
765 return &m_dofId[0];
766 case Elements:
767 case ReducedElements:
768 return &m_elementId[0];
769 case FaceElements:
770 case ReducedFaceElements:
771 return &m_faceId[0];
772 case Points:
773 return &m_diracPointNodeIDs[0];
774 default:
775 break;
776 }
777
778 stringstream msg;
779 msg << "borrowSampleReferenceIDs: invalid function space type " << fsType;
780 throw RipleyException(msg.str());
781 }
782
783 bool Rectangle::ownSample(int fsType, index_t id) const
784 {
785 if (getMPISize()==1)
786 return true;
787
788 switch (fsType) {
789 case Nodes:
790 case ReducedNodes: // FIXME: reduced
791 return (m_dofMap[id] < getNumDOF());
792 case DegreesOfFreedom:
793 case ReducedDegreesOfFreedom:
794 return true;
795 case Elements:
796 case ReducedElements:
797 // check ownership of element's bottom left node
798 return (m_dofMap[id%m_NE[0]+m_NN[0]*(id/m_NE[0])] < getNumDOF());
799 case FaceElements:
800 case ReducedFaceElements:
801 {
802 // determine which face the sample belongs to before
803 // checking ownership of corresponding element's first node
804 dim_t n=0;
805 for (size_t i=0; i<4; i++) {
806 n+=m_faceCount[i];
807 if (id<n) {
808 index_t k;
809 if (i==1)
810 k=m_NN[0]-2;
811 else if (i==3)
812 k=m_NN[0]*(m_NN[1]-2);
813 else
814 k=0;
815 // determine whether to move right or up
816 const index_t delta=(i/2==0 ? m_NN[0] : 1);
817 return (m_dofMap[k+(id-n+m_faceCount[i])*delta] < getNumDOF());
818 }
819 }
820 return false;
821 }
822 default:
823 break;
824 }
825
826 stringstream msg;
827 msg << "ownSample: invalid function space type " << fsType;
828 throw RipleyException(msg.str());
829 }
830
831 void Rectangle::setToNormal(escript::Data& out) const
832 {
833 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
834 out.requireWrite();
835 #pragma omp parallel
836 {
837 if (m_faceOffset[0] > -1) {
838 #pragma omp for nowait
839 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
840 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
841 // set vector at two quadrature points
842 *o++ = -1.;
843 *o++ = 0.;
844 *o++ = -1.;
845 *o = 0.;
846 }
847 }
848
849 if (m_faceOffset[1] > -1) {
850 #pragma omp for nowait
851 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
852 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
853 // set vector at two quadrature points
854 *o++ = 1.;
855 *o++ = 0.;
856 *o++ = 1.;
857 *o = 0.;
858 }
859 }
860
861 if (m_faceOffset[2] > -1) {
862 #pragma omp for nowait
863 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
864 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
865 // set vector at two quadrature points
866 *o++ = 0.;
867 *o++ = -1.;
868 *o++ = 0.;
869 *o = -1.;
870 }
871 }
872
873 if (m_faceOffset[3] > -1) {
874 #pragma omp for nowait
875 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
876 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
877 // set vector at two quadrature points
878 *o++ = 0.;
879 *o++ = 1.;
880 *o++ = 0.;
881 *o = 1.;
882 }
883 }
884 } // end of parallel section
885 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
886 out.requireWrite();
887 #pragma omp parallel
888 {
889 if (m_faceOffset[0] > -1) {
890 #pragma omp for nowait
891 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
892 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
893 *o++ = -1.;
894 *o = 0.;
895 }
896 }
897
898 if (m_faceOffset[1] > -1) {
899 #pragma omp for nowait
900 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
901 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
902 *o++ = 1.;
903 *o = 0.;
904 }
905 }
906
907 if (m_faceOffset[2] > -1) {
908 #pragma omp for nowait
909 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
910 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
911 *o++ = 0.;
912 *o = -1.;
913 }
914 }
915
916 if (m_faceOffset[3] > -1) {
917 #pragma omp for nowait
918 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
919 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
920 *o++ = 0.;
921 *o = 1.;
922 }
923 }
924 } // end of parallel section
925
926 } else {
927 stringstream msg;
928 msg << "setToNormal: invalid function space type "
929 << out.getFunctionSpace().getTypeCode();
930 throw RipleyException(msg.str());
931 }
932 }
933
934 void Rectangle::setToSize(escript::Data& out) const
935 {
936 if (out.getFunctionSpace().getTypeCode() == Elements
937 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
938 out.requireWrite();
939 const dim_t numQuad=out.getNumDataPointsPerSample();
940 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]);
941 #pragma omp parallel for
942 for (index_t k = 0; k < getNumElements(); ++k) {
943 double* o = out.getSampleDataRW(k);
944 fill(o, o+numQuad, size);
945 }
946 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
947 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
948 out.requireWrite();
949 const dim_t numQuad=out.getNumDataPointsPerSample();
950 #pragma omp parallel
951 {
952 if (m_faceOffset[0] > -1) {
953 #pragma omp for nowait
954 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
955 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
956 fill(o, o+numQuad, m_dx[1]);
957 }
958 }
959
960 if (m_faceOffset[1] > -1) {
961 #pragma omp for nowait
962 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
963 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
964 fill(o, o+numQuad, m_dx[1]);
965 }
966 }
967
968 if (m_faceOffset[2] > -1) {
969 #pragma omp for nowait
970 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
971 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
972 fill(o, o+numQuad, m_dx[0]);
973 }
974 }
975
976 if (m_faceOffset[3] > -1) {
977 #pragma omp for nowait
978 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
979 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
980 fill(o, o+numQuad, m_dx[0]);
981 }
982 }
983 } // end of parallel section
984
985 } else {
986 stringstream msg;
987 msg << "setToSize: invalid function space type "
988 << out.getFunctionSpace().getTypeCode();
989 throw RipleyException(msg.str());
990 }
991 }
992
993 void Rectangle::Print_Mesh_Info(const bool full) const
994 {
995 RipleyDomain::Print_Mesh_Info(full);
996 if (full) {
997 cout << " Id Coordinates" << endl;
998 cout.precision(15);
999 cout.setf(ios::scientific, ios::floatfield);
1000 for (index_t i=0; i < getNumNodes(); i++) {
1001 cout << " " << setw(5) << m_nodeId[i]
1002 << " " << getLocalCoordinate(i%m_NN[0], 0)
1003 << " " << getLocalCoordinate(i/m_NN[0], 1) << endl;
1004 }
1005 }
1006 }
1007
1008
1009 //protected
1010 void Rectangle::assembleCoordinates(escript::Data& arg) const
1011 {
1012 escriptDataC x = arg.getDataC();
1013 int numDim = m_numDim;
1014 if (!isDataPointShapeEqual(&x, 1, &numDim))
1015 throw RipleyException("setToX: Invalid Data object shape");
1016 if (!numSamplesEqual(&x, 1, getNumNodes()))
1017 throw RipleyException("setToX: Illegal number of samples in Data object");
1018
1019 arg.requireWrite();
1020 #pragma omp parallel for
1021 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1022 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1023 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1);
1024 point[0] = getLocalCoordinate(i0, 0);
1025 point[1] = getLocalCoordinate(i1, 1);
1026 }
1027 }
1028 }
1029
1030 //protected
1031 void Rectangle::assembleGradient(escript::Data& out, const escript::Data& in) const
1032 {
1033 const dim_t numComp = in.getDataPointSize();
1034 const double cx0 = .21132486540518711775/m_dx[0];
1035 const double cx1 = .78867513459481288225/m_dx[0];
1036 const double cx2 = 1./m_dx[0];
1037 const double cy0 = .21132486540518711775/m_dx[1];
1038 const double cy1 = .78867513459481288225/m_dx[1];
1039 const double cy2 = 1./m_dx[1];
1040
1041 if (out.getFunctionSpace().getTypeCode() == Elements) {
1042 out.requireWrite();
1043 #pragma omp parallel
1044 {
1045 vector<double> f_00(numComp);
1046 vector<double> f_01(numComp);
1047 vector<double> f_10(numComp);
1048 vector<double> f_11(numComp);
1049 #pragma omp for
1050 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1051 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1052 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1053 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1054 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1055 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1056 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1057 for (index_t i=0; i < numComp; ++i) {
1058 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1059 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1060 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1061 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1062 o[INDEX3(i,0,2,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1063 o[INDEX3(i,1,2,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1064 o[INDEX3(i,0,3,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1065 o[INDEX3(i,1,3,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1066 } // end of component loop i
1067 } // end of k0 loop
1068 } // end of k1 loop
1069 } // end of parallel section
1070 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1071 out.requireWrite();
1072 #pragma omp parallel
1073 {
1074 vector<double> f_00(numComp);
1075 vector<double> f_01(numComp);
1076 vector<double> f_10(numComp);
1077 vector<double> f_11(numComp);
1078 #pragma omp for
1079 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1080 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1081 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1082 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1083 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1084 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1085 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1086 for (index_t i=0; i < numComp; ++i) {
1087 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1088 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1089 } // end of component loop i
1090 } // end of k0 loop
1091 } // end of k1 loop
1092 } // end of parallel section
1093 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1094 out.requireWrite();
1095 #pragma omp parallel
1096 {
1097 vector<double> f_00(numComp);
1098 vector<double> f_01(numComp);
1099 vector<double> f_10(numComp);
1100 vector<double> f_11(numComp);
1101 if (m_faceOffset[0] > -1) {
1102 #pragma omp for nowait
1103 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1104 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1105 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1106 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
1107 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
1108 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1109 for (index_t i=0; i < numComp; ++i) {
1110 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1111 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1112 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1113 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1114 } // end of component loop i
1115 } // end of k1 loop
1116 } // end of face 0
1117 if (m_faceOffset[1] > -1) {
1118 #pragma omp for nowait
1119 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1120 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
1121 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
1122 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1123 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1124 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1125 for (index_t i=0; i < numComp; ++i) {
1126 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1127 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1128 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1129 o[INDEX3(i,1,1,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1130 } // end of component loop i
1131 } // end of k1 loop
1132 } // end of face 1
1133 if (m_faceOffset[2] > -1) {
1134 #pragma omp for nowait
1135 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1136 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1137 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1138 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1139 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1140 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1141 for (index_t i=0; i < numComp; ++i) {
1142 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1143 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1144 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1145 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1146 } // end of component loop i
1147 } // end of k0 loop
1148 } // end of face 2
1149 if (m_faceOffset[3] > -1) {
1150 #pragma omp for nowait
1151 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1152 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1153 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1154 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1155 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1156 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1157 for (index_t i=0; i < numComp; ++i) {
1158 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1159 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1160 o[INDEX3(i,0,1,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1161 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1162 } // end of component loop i
1163 } // end of k0 loop
1164 } // end of face 3
1165 } // end of parallel section
1166
1167 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1168 out.requireWrite();
1169 #pragma omp parallel
1170 {
1171 vector<double> f_00(numComp);
1172 vector<double> f_01(numComp);
1173 vector<double> f_10(numComp);
1174 vector<double> f_11(numComp);
1175 if (m_faceOffset[0] > -1) {
1176 #pragma omp for nowait
1177 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1178 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1179 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1180 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
1181 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
1182 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1183 for (index_t i=0; i < numComp; ++i) {
1184 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1185 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1186 } // end of component loop i
1187 } // end of k1 loop
1188 } // end of face 0
1189 if (m_faceOffset[1] > -1) {
1190 #pragma omp for nowait
1191 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1192 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
1193 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
1194 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1195 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1196 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1197 for (index_t i=0; i < numComp; ++i) {
1198 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1199 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1200 } // end of component loop i
1201 } // end of k1 loop
1202 } // end of face 1
1203 if (m_faceOffset[2] > -1) {
1204 #pragma omp for nowait
1205 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1206 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1207 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1208 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1209 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1210 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1211 for (index_t i=0; i < numComp; ++i) {
1212 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1213 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1214 } // end of component loop i
1215 } // end of k0 loop
1216 } // end of face 2
1217 if (m_faceOffset[3] > -1) {
1218 #pragma omp for nowait
1219 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1220 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1221 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1222 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1223 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1224 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1225 for (index_t i=0; i < numComp; ++i) {
1226 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1227 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1228 } // end of component loop i
1229 } // end of k0 loop
1230 } // end of face 3
1231 } // end of parallel section
1232 }
1233 }
1234
1235 //protected
1236 void Rectangle::assembleIntegrate(vector<double>& integrals,
1237 const escript::Data& arg) const
1238 {
1239 const dim_t numComp = arg.getDataPointSize();
1240 const index_t left = (m_offset[0]==0 ? 0 : 1);
1241 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1242 const int fs=arg.getFunctionSpace().getTypeCode();
1243 if (fs == Elements && arg.actsExpanded()) {
1244 #pragma omp parallel
1245 {
1246 vector<double> int_local(numComp, 0);
1247 const double w = m_dx[0]*m_dx[1]/4.;
1248 #pragma omp for nowait
1249 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1250 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1251 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1252 for (index_t i=0; i < numComp; ++i) {
1253 const double f0 = f[INDEX2(i,0,numComp)];
1254 const double f1 = f[INDEX2(i,1,numComp)];
1255 const double f2 = f[INDEX2(i,2,numComp)];
1256 const double f3 = f[INDEX2(i,3,numComp)];
1257 int_local[i]+=(f0+f1+f2+f3)*w;
1258 } // end of component loop i
1259 } // end of k0 loop
1260 } // end of k1 loop
1261 #pragma omp critical
1262 for (index_t i=0; i<numComp; i++)
1263 integrals[i]+=int_local[i];
1264 } // end of parallel section
1265
1266 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1267 const double w = m_dx[0]*m_dx[1];
1268 #pragma omp parallel
1269 {
1270 vector<double> int_local(numComp, 0);
1271 #pragma omp for nowait
1272 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1273 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1274 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1275 for (index_t i=0; i < numComp; ++i) {
1276 int_local[i]+=f[i]*w;
1277 }
1278 }
1279 }
1280 #pragma omp critical
1281 for (index_t i=0; i<numComp; i++)
1282 integrals[i]+=int_local[i];
1283 } // end of parallel section
1284
1285 } else if (fs == FaceElements && arg.actsExpanded()) {
1286 #pragma omp parallel
1287 {
1288 vector<double> int_local(numComp, 0);
1289 const double w0 = m_dx[0]/2.;
1290 const double w1 = m_dx[1]/2.;
1291 if (m_faceOffset[0] > -1) {
1292 #pragma omp for nowait
1293 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1294 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1295 for (index_t i=0; i < numComp; ++i) {
1296 const double f0 = f[INDEX2(i,0,numComp)];
1297 const double f1 = f[INDEX2(i,1,numComp)];
1298 int_local[i]+=(f0+f1)*w1;
1299 } // end of component loop i
1300 } // end of k1 loop
1301 }
1302
1303 if (m_faceOffset[1] > -1) {
1304 #pragma omp for nowait
1305 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1306 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1307 for (index_t i=0; i < numComp; ++i) {
1308 const double f0 = f[INDEX2(i,0,numComp)];
1309 const double f1 = f[INDEX2(i,1,numComp)];
1310 int_local[i]+=(f0+f1)*w1;
1311 } // end of component loop i
1312 } // end of k1 loop
1313 }
1314
1315 if (m_faceOffset[2] > -1) {
1316 #pragma omp for nowait
1317 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1318 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1319 for (index_t i=0; i < numComp; ++i) {
1320 const double f0 = f[INDEX2(i,0,numComp)];
1321 const double f1 = f[INDEX2(i,1,numComp)];
1322 int_local[i]+=(f0+f1)*w0;
1323 } // end of component loop i
1324 } // end of k0 loop
1325 }
1326
1327 if (m_faceOffset[3] > -1) {
1328 #pragma omp for nowait
1329 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1330 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1331 for (index_t i=0; i < numComp; ++i) {
1332 const double f0 = f[INDEX2(i,0,numComp)];
1333 const double f1 = f[INDEX2(i,1,numComp)];
1334 int_local[i]+=(f0+f1)*w0;
1335 } // end of component loop i
1336 } // end of k0 loop
1337 }
1338 #pragma omp critical
1339 for (index_t i=0; i<numComp; i++)
1340 integrals[i]+=int_local[i];
1341 } // end of parallel section
1342
1343 } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded())) {
1344 #pragma omp parallel
1345 {
1346 vector<double> int_local(numComp, 0);
1347 if (m_faceOffset[0] > -1) {
1348 #pragma omp for nowait
1349 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1350 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1351 for (index_t i=0; i < numComp; ++i) {
1352 int_local[i]+=f[i]*m_dx[1];
1353 }
1354 }
1355 }
1356
1357 if (m_faceOffset[1] > -1) {
1358 #pragma omp for nowait
1359 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1360 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1361 for (index_t i=0; i < numComp; ++i) {
1362 int_local[i]+=f[i]*m_dx[1];
1363 }
1364 }
1365 }
1366
1367 if (m_faceOffset[2] > -1) {
1368 #pragma omp for nowait
1369 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1370 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1371 for (index_t i=0; i < numComp; ++i) {
1372 int_local[i]+=f[i]*m_dx[0];
1373 }
1374 }
1375 }
1376
1377 if (m_faceOffset[3] > -1) {
1378 #pragma omp for nowait
1379 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1380 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1381 for (index_t i=0; i < numComp; ++i) {
1382 int_local[i]+=f[i]*m_dx[0];
1383 }
1384 }
1385 }
1386
1387 #pragma omp critical
1388 for (index_t i=0; i<numComp; i++)
1389 integrals[i]+=int_local[i];
1390 } // end of parallel section
1391 } // function space selector
1392 }
1393
1394 //protected
1395 dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const
1396 {
1397 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1398 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1399 const int x=node%nDOF0;
1400 const int y=node/nDOF0;
1401 dim_t num=0;
1402 // loop through potential neighbours and add to index if positions are
1403 // within bounds
1404 for (int i1=-1; i1<2; i1++) {
1405 for (int i0=-1; i0<2; i0++) {
1406 // skip node itself
1407 if (i0==0 && i1==0)
1408 continue;
1409 // location of neighbour node
1410 const int nx=x+i0;
1411 const int ny=y+i1;
1412 if (nx>=0 && ny>=0 && nx<nDOF0 && ny<nDOF1) {
1413 index.push_back(ny*nDOF0+nx);
1414 num++;
1415 }
1416 }
1417 }
1418
1419 return num;
1420 }
1421
1422 //protected
1423 void Rectangle::nodesToDOF(escript::Data& out, const escript::Data& in) const
1424 {
1425 const dim_t numComp = in.getDataPointSize();
1426 out.requireWrite();
1427
1428 const index_t left = (m_offset[0]==0 ? 0 : 1);
1429 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1430 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1431 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1432 #pragma omp parallel for
1433 for (index_t i=0; i<nDOF1; i++) {
1434 for (index_t j=0; j<nDOF0; j++) {
1435 const index_t n=j+left+(i+bottom)*m_NN[0];
1436 const double* src=in.getSampleDataRO(n);
1437 copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0));
1438 }
1439 }
1440 }
1441
1442 //protected
1443 void Rectangle::dofToNodes(escript::Data& out, const escript::Data& in) const
1444 {
1445 const dim_t numComp = in.getDataPointSize();
1446 paso::Coupler_ptr coupler(new paso::Coupler(m_connector, numComp));
1447 // expand data object if necessary to be able to grab the whole data
1448 const_cast<escript::Data*>(&in)->expand();
1449 coupler->startCollect(in.getSampleDataRO(0));
1450
1451 const dim_t numDOF = getNumDOF();
1452 out.requireWrite();
1453 const double* buffer = coupler->finishCollect();
1454
1455 #pragma omp parallel for
1456 for (index_t i=0; i<getNumNodes(); i++) {
1457 const double* src=(m_dofMap[i]<numDOF ?
1458 in.getSampleDataRO(m_dofMap[i])
1459 : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1460 copy(src, src+numComp, out.getSampleDataRW(i));
1461 }
1462 }
1463
1464 //private
1465 void Rectangle::populateSampleIds()
1466 {
1467 // degrees of freedom are numbered from left to right, bottom to top in
1468 // each rank, continuing on the next rank (ranks also go left-right,
1469 // bottom-top).
1470 // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which
1471 // helps when writing out data rank after rank.
1472
1473 // build node distribution vector first.
1474 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is
1475 // constant for all ranks in this implementation
1476 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1477 const dim_t numDOF=getNumDOF();
1478 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1479 m_nodeDistribution[k]=k*numDOF;
1480 }
1481 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1482 m_nodeId.resize(getNumNodes());
1483 m_dofId.resize(numDOF);
1484 m_elementId.resize(getNumElements());
1485
1486 // populate face element counts
1487 //left
1488 if (m_offset[0]==0)
1489 m_faceCount[0]=m_NE[1];
1490 else
1491 m_faceCount[0]=0;
1492 //right
1493 if (m_mpiInfo->rank%m_NX[0]==m_NX[0]-1)
1494 m_faceCount[1]=m_NE[1];
1495 else
1496 m_faceCount[1]=0;
1497 //bottom
1498 if (m_offset[1]==0)
1499 m_faceCount[2]=m_NE[0];
1500 else
1501 m_faceCount[2]=0;
1502 //top
1503 if (m_mpiInfo->rank/m_NX[0]==m_NX[1]-1)
1504 m_faceCount[3]=m_NE[0];
1505 else
1506 m_faceCount[3]=0;
1507
1508 m_faceId.resize(getNumFaceElements());
1509
1510 const index_t left = (m_offset[0]==0 ? 0 : 1);
1511 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1512 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1513 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1514
1515 #define globalNodeId(x,y) \
1516 ((m_offset[0]+x)/nDOF0)*nDOF0*nDOF1+(m_offset[0]+x)%nDOF0 \
1517 + ((m_offset[1]+y)/nDOF1)*nDOF0*nDOF1*m_NX[0]+((m_offset[1]+y)%nDOF1)*nDOF0
1518
1519 // set corner id's outside the parallel region
1520 m_nodeId[0] = globalNodeId(0, 0);
1521 m_nodeId[m_NN[0]-1] = globalNodeId(m_NN[0]-1, 0);
1522 m_nodeId[m_NN[0]*(m_NN[1]-1)] = globalNodeId(0, m_NN[1]-1);
1523 m_nodeId[m_NN[0]*m_NN[1]-1] = globalNodeId(m_NN[0]-1,m_NN[1]-1);
1524 #undef globalNodeId
1525
1526 #pragma omp parallel
1527 {
1528 // populate degrees of freedom and own nodes (identical id)
1529 #pragma omp for nowait
1530 for (dim_t i=0; i<nDOF1; i++) {
1531 for (dim_t j=0; j<nDOF0; j++) {
1532 const index_t nodeIdx=j+left+(i+bottom)*m_NN[0];
1533 const index_t dofIdx=j+i*nDOF0;
1534 m_dofId[dofIdx] = m_nodeId[nodeIdx]
1535 = m_nodeDistribution[m_mpiInfo->rank]+dofIdx;
1536 }
1537 }
1538
1539 // populate the rest of the nodes (shared with other ranks)
1540 if (m_faceCount[0]==0) { // left column
1541 #pragma omp for nowait
1542 for (dim_t i=0; i<nDOF1; i++) {
1543 const index_t nodeIdx=(i+bottom)*m_NN[0];
1544 const index_t dofId=(i+1)*nDOF0-1;
1545 m_nodeId[nodeIdx]
1546 = m_nodeDistribution[m_mpiInfo->rank-1]+dofId;
1547 }
1548 }
1549 if (m_faceCount[1]==0) { // right column
1550 #pragma omp for nowait
1551 for (dim_t i=0; i<nDOF1; i++) {
1552 const index_t nodeIdx=(i+bottom+1)*m_NN[0]-1;
1553 const index_t dofId=i*nDOF0;
1554 m_nodeId[nodeIdx]
1555 = m_nodeDistribution[m_mpiInfo->rank+1]+dofId;
1556 }
1557 }
1558 if (m_faceCount[2]==0) { // bottom row
1559 #pragma omp for nowait
1560 for (dim_t i=0; i<nDOF0; i++) {
1561 const index_t nodeIdx=i+left;
1562 const index_t dofId=nDOF0*(nDOF1-1)+i;
1563 m_nodeId[nodeIdx]
1564 = m_nodeDistribution[m_mpiInfo->rank-m_NX[0]]+dofId;
1565 }
1566 }
1567 if (m_faceCount[3]==0) { // top row
1568 #pragma omp for nowait
1569 for (dim_t i=0; i<nDOF0; i++) {
1570 const index_t nodeIdx=m_NN[0]*(m_NN[1]-1)+i+left;
1571 const index_t dofId=i;
1572 m_nodeId[nodeIdx]
1573 = m_nodeDistribution[m_mpiInfo->rank+m_NX[0]]+dofId;
1574 }
1575 }
1576
1577 // populate element id's
1578 #pragma omp for nowait
1579 for (dim_t i1=0; i1<m_NE[1]; i1++) {
1580 for (dim_t i0=0; i0<m_NE[0]; i0++) {
1581 m_elementId[i0+i1*m_NE[0]]=(m_offset[1]+i1)*m_gNE[0]+m_offset[0]+i0;
1582 }
1583 }
1584
1585 // face elements
1586 #pragma omp for
1587 for (dim_t k=0; k<getNumFaceElements(); k++)
1588 m_faceId[k]=k;
1589 } // end parallel section
1590
1591 m_nodeTags.assign(getNumNodes(), 0);
1592 updateTagsInUse(Nodes);
1593
1594 m_elementTags.assign(getNumElements(), 0);
1595 updateTagsInUse(Elements);
1596
1597 // generate face offset vector and set face tags
1598 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1599 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1600 m_faceOffset.assign(4, -1);
1601 m_faceTags.clear();
1602 index_t offset=0;
1603 for (size_t i=0; i<4; i++) {
1604 if (m_faceCount[i]>0) {
1605 m_faceOffset[i]=offset;
1606 offset+=m_faceCount[i];
1607 m_faceTags.insert(m_faceTags.end(), m_faceCount[i], faceTag[i]);
1608 }
1609 }
1610 setTagMap("left", LEFT);
1611 setTagMap("right", RIGHT);
1612 setTagMap("bottom", BOTTOM);
1613 setTagMap("top", TOP);
1614 updateTagsInUse(FaceElements);
1615 }
1616
1617 //private
1618 void Rectangle::createPattern()
1619 {
1620 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1621 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1622 const index_t left = (m_offset[0]==0 ? 0 : 1);
1623 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1624
1625 // populate node->DOF mapping with own degrees of freedom.
1626 // The rest is assigned in the loop further down
1627 m_dofMap.assign(getNumNodes(), 0);
1628 #pragma omp parallel for
1629 for (index_t i=bottom; i<bottom+nDOF1; i++) {
1630 for (index_t j=left; j<left+nDOF0; j++) {
1631 m_dofMap[i*m_NN[0]+j]=(i-bottom)*nDOF0+j-left;
1632 }
1633 }
1634
1635 // build list of shared components and neighbours by looping through
1636 // all potential neighbouring ranks and checking if positions are
1637 // within bounds
1638 const dim_t numDOF=nDOF0*nDOF1;
1639 vector<IndexVector> colIndices(numDOF); // for the couple blocks
1640 RankVector neighbour;
1641 IndexVector offsetInShared(1,0);
1642 IndexVector sendShared, recvShared;
1643 int numShared=0, expectedShared=0;
1644 const int x=m_mpiInfo->rank%m_NX[0];
1645 const int y=m_mpiInfo->rank/m_NX[0];
1646 if (x > 0)
1647 expectedShared += nDOF1;
1648 if (x < m_NX[0] - 1)
1649 expectedShared += nDOF1;
1650 if (y > 0)
1651 expectedShared += nDOF0;
1652 if (y < m_NX[1] - 1)
1653 expectedShared += nDOF0;
1654 if (x > 0 && y > 0) expectedShared++;
1655 if (x > 0 && y < m_NX[1] - 1) expectedShared++;
1656 if (x < m_NX[0] - 1 && y > 0) expectedShared++;
1657 if (x < m_NX[0] - 1 && y < m_NX[1] - 1) expectedShared++;
1658
1659 vector<IndexVector> rowIndices(expectedShared);
1660
1661 for (int i1=-1; i1<2; i1++) {
1662 for (int i0=-1; i0<2; i0++) {
1663 // skip this rank
1664 if (i0==0 && i1==0)
1665 continue;
1666 // location of neighbour rank
1667 const int nx=x+i0;
1668 const int ny=y+i1;
1669 if (nx>=0 && ny>=0 && nx<m_NX[0] && ny<m_NX[1]) {
1670 neighbour.push_back(ny*m_NX[0]+nx);
1671 if (i0==0) {
1672 // sharing top or bottom edge
1673 const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0);
1674 const int firstNode=(i1==-1 ? left : m_NN[0]*(m_NN[1]-1)+left);
1675 offsetInShared.push_back(offsetInShared.back()+nDOF0);
1676 for (dim_t i=0; i<nDOF0; i++, numShared++) {
1677 sendShared.push_back(firstDOF+i);
1678 recvShared.push_back(numDOF+numShared);
1679 if (i>0)
1680 doublyLink(colIndices, rowIndices, firstDOF+i-1, numShared);
1681 doublyLink(colIndices, rowIndices, firstDOF+i, numShared);
1682 if (i<nDOF0-1)
1683 doublyLink(colIndices, rowIndices, firstDOF+i+1, numShared);
1684 m_dofMap[firstNode+i]=numDOF+numShared;
1685 }
1686 } else if (i1==0) {
1687 // sharing left or right edge
1688 const int firstDOF=(i0==-1 ? 0 : nDOF0-1);
1689 const int firstNode=(i0==-1 ? bottom*m_NN[0] : (bottom+1)*m_NN[0]-1);
1690 offsetInShared.push_back(offsetInShared.back()+nDOF1);
1691 for (dim_t i=0; i<nDOF1; i++, numShared++) {
1692 sendShared.push_back(firstDOF+i*nDOF0);
1693 recvShared.push_back(numDOF+numShared);
1694 if (i>0)
1695 doublyLink(colIndices, rowIndices, firstDOF+(i-1)*nDOF0, numShared);
1696 doublyLink(colIndices, rowIndices, firstDOF+i*nDOF0, numShared);
1697 if (i<nDOF1-1)
1698 doublyLink(colIndices, rowIndices, firstDOF+(i+1)*nDOF0, numShared);
1699 m_dofMap[firstNode+i*m_NN[0]]=numDOF+numShared;
1700 }
1701 } else {
1702 // sharing a node
1703 const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0);
1704 const int node=(i0+1)/2*(m_NN[0]-1)+(i1+1)/2*m_NN[0]*(m_NN[1]-1);
1705 offsetInShared.push_back(offsetInShared.back()+1);
1706 sendShared.push_back(dof);
1707 recvShared.push_back(numDOF+numShared);
1708 doublyLink(colIndices, rowIndices, dof, numShared);
1709 m_dofMap[node]=numDOF+numShared;
1710 ++numShared;
1711 }
1712 }
1713 }
1714 }
1715
1716 #pragma omp parallel for
1717 for (int i = 0; i < numShared; i++) {
1718 std::sort(rowIndices[i].begin(), rowIndices[i].end());
1719 }
1720
1721 // create connector
1722 paso::SharedComponents_ptr snd_shcomp(new paso::SharedComponents(
1723 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
1724 &offsetInShared[0], 1, 0, m_mpiInfo));
1725 paso::SharedComponents_ptr rcv_shcomp(new paso::SharedComponents(
1726 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
1727 &offsetInShared[0], 1, 0, m_mpiInfo));
1728 m_connector.reset(new paso::Connector(snd_shcomp, rcv_shcomp));
1729
1730 // create main and couple blocks
1731 paso::Pattern_ptr mainPattern = createMainPattern();
1732 paso::Pattern_ptr colPattern, rowPattern;
1733 createCouplePatterns(colIndices, rowIndices, numShared, colPattern, rowPattern);
1734
1735 // allocate paso distribution
1736 paso::Distribution_ptr distribution(new paso::Distribution(m_mpiInfo,
1737 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0));
1738
1739 // finally create the system matrix
1740 m_pattern.reset(new paso::SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
1741 distribution, distribution, mainPattern, colPattern, rowPattern,
1742 m_connector, m_connector));
1743
1744 // useful debug output
1745 /*
1746 cout << "--- rcv_shcomp ---" << endl;
1747 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
1748 for (size_t i=0; i<neighbour.size(); i++) {
1749 cout << "neighbor[" << i << "]=" << neighbour[i]
1750 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
1751 }
1752 for (size_t i=0; i<recvShared.size(); i++) {
1753 cout << "shared[" << i << "]=" << recvShared[i] << endl;
1754 }
1755 cout << "--- snd_shcomp ---" << endl;
1756 for (size_t i=0; i<sendShared.size(); i++) {
1757 cout << "shared[" << i << "]=" << sendShared[i] << endl;
1758 }
1759 cout << "--- dofMap ---" << endl;
1760 for (size_t i=0; i<m_dofMap.size(); i++) {
1761 cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl;
1762 }
1763 cout << "--- colIndices ---" << endl;
1764 for (size_t i=0; i<colIndices.size(); i++) {
1765 cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl;
1766 }
1767 */
1768
1769 /*
1770 cout << "--- main_pattern ---" << endl;
1771 cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl;
1772 for (size_t i=0; i<mainPattern->numOutput+1; i++) {
1773 cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl;
1774 }
1775 for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) {
1776 cout << "index[" << i << "]=" << mainPattern->index[i] << endl;
1777 }
1778 */
1779
1780 /*
1781 cout << "--- colCouple_pattern ---" << endl;
1782 cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl;
1783 for (size_t i=0; i<colPattern->numOutput+1; i++) {
1784 cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl;
1785 }
1786 for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) {
1787 cout << "index[" << i << "]=" << colPattern->index[i] << endl;
1788 }
1789 */
1790
1791 /*
1792 cout << "--- rowCouple_pattern ---" << endl;
1793 cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl;
1794 for (size_t i=0; i<rowPattern->numOutput+1; i++) {
1795 cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl;
1796 }
1797 for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) {
1798 cout << "index[" << i << "]=" << rowPattern->index[i] << endl;
1799 }
1800 */
1801 }
1802
1803 //private
1804 void Rectangle::addToMatrixAndRHS(paso::SystemMatrix_ptr S, escript::Data& F,
1805 const vector<double>& EM_S, const vector<double>& EM_F, bool addS,
1806 bool addF, index_t firstNode, dim_t nEq, dim_t nComp) const
1807 {
1808 IndexVector rowIndex;
1809 rowIndex.push_back(m_dofMap[firstNode]);
1810 rowIndex.push_back(m_dofMap[firstNode+1]);
1811 rowIndex.push_back(m_dofMap[firstNode+m_NN[0]]);
1812 rowIndex.push_back(m_dofMap[firstNode+m_NN[0]+1]);
1813 if (addF) {
1814 double *F_p=F.getSampleDataRW(0);
1815 for (index_t i=0; i<rowIndex.size(); i++) {
1816 if (rowIndex[i]<getNumDOF()) {
1817 for (index_t eq=0; eq<nEq; eq++) {
1818 F_p[INDEX2(eq, rowIndex[i], nEq)]+=EM_F[INDEX2(eq,i,nEq)];
1819 }
1820 }
1821 }
1822 }
1823 if (addS) {
1824 addToSystemMatrix(S, rowIndex, nEq, rowIndex, nComp, EM_S);
1825 }
1826 }
1827
1828 //protected
1829 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1830 const escript::Data& in,
1831 bool reduced) const
1832 {
1833 const dim_t numComp = in.getDataPointSize();
1834 if (reduced) {
1835 out.requireWrite();
1836 const double c0 = 0.25;
1837 #pragma omp parallel
1838 {
1839 vector<double> f_00(numComp);
1840 vector<double> f_01(numComp);
1841 vector<double> f_10(numComp);
1842 vector<double> f_11(numComp);
1843 #pragma omp for
1844 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1845 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1846 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1847 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1848 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1849 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1850 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1851 for (index_t i=0; i < numComp; ++i) {
1852 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1853 } /* end of component loop i */
1854 } /* end of k0 loop */
1855 } /* end of k1 loop */
1856 } /* end of parallel section */
1857 } else {
1858 out.requireWrite();
1859 const double c0 = 0.16666666666666666667;
1860 const double c1 = 0.044658198738520451079;
1861 const double c2 = 0.62200846792814621559;
1862 #pragma omp parallel
1863 {
1864 vector<double> f_00(numComp);
1865 vector<double> f_01(numComp);
1866 vector<double> f_10(numComp);
1867 vector<double> f_11(numComp);
1868 #pragma omp for
1869 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1870 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1871 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1872 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1873 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1874 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1875 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1876 for (index_t i=0; i < numComp; ++i) {
1877 o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_10[i]) + c1*f_11[i] + c2*f_00[i];
1878 o[INDEX2(i,numComp,1)] = c0*(f_00[i] + f_11[i]) + c1*f_01[i] + c2*f_10[i];
1879 o[INDEX2(i,numComp,2)] = c0*(f_00[i] + f_11[i]) + c1*f_10[i] + c2*f_01[i];
1880 o[INDEX2(i,numComp,3)] = c0*(f_01[i] + f_10[i]) + c1*f_00[i] + c2*f_11[i];
1881 } /* end of component loop i */
1882 } /* end of k0 loop */
1883 } /* end of k1 loop */
1884 } /* end of parallel section */
1885 }
1886 }
1887
1888 //protected
1889 void Rectangle::interpolateNodesOnFaces(escript::Data& out,
1890 const escript::Data& in,
1891 bool reduced) const
1892 {
1893 const dim_t numComp = in.getDataPointSize();
1894 if (reduced) {
1895 out.requireWrite();
1896 #pragma omp parallel
1897 {
1898 vector<double> f_00(numComp);
1899 vector<double> f_01(numComp);
1900 vector<double> f_10(numComp);
1901 vector<double> f_11(numComp);
1902 if (m_faceOffset[0] > -1) {
1903 #pragma omp for nowait
1904 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1905 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1906 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1907 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1908 for (index_t i=0; i < numComp; ++i) {
1909 o[INDEX2(i,numComp,0)] = (f_00[i] + f_01[i])/2;
1910 } /* end of component loop i */
1911 } /* end of k1 loop */
1912 } /* end of face 0 */
1913 if (m_faceOffset[1] > -1) {
1914 #pragma omp for nowait
1915 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1916 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1917 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1918 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1919 for (index_t i=0; i < numComp; ++i) {
1920 o[INDEX2(i,numComp,0)] = (f_10[i] + f_11[i])/2;
1921 } /* end of component loop i */
1922 } /* end of k1 loop */
1923 } /* end of face 1 */
1924 if (m_faceOffset[2] > -1) {
1925 #pragma omp for nowait
1926 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1927 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1928 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1929 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1930 for (index_t i=0; i < numComp; ++i) {
1931 o[INDEX2(i,numComp,0)] = (f_00[i] + f_10[i])/2;
1932 } /* end of component loop i */
1933 } /* end of k0 loop */
1934 } /* end of face 2 */
1935 if (m_faceOffset[3] > -1) {
1936 #pragma omp for nowait
1937 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1938 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1939 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1940 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1941 for (index_t i=0; i < numComp; ++i) {
1942 o[INDEX2(i,numComp,0)] = (f_01[i] + f_11[i])/2;
1943 } /* end of component loop i */
1944 } /* end of k0 loop */
1945 } /* end of face 3 */
1946 } /* end of parallel section */
1947 } else {
1948 out.requireWrite();
1949 const double c0 = 0.21132486540518711775;
1950 const double c1 = 0.78867513459481288225;
1951 #pragma omp parallel
1952 {
1953 vector<double> f_00(numComp);
1954 vector<double> f_01(numComp);
1955 vector<double> f_10(numComp);
1956 vector<double> f_11(numComp);
1957 if (m_faceOffset[0] > -1) {
1958 #pragma omp for nowait
1959 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1960 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1961 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1962 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1963 for (index_t i=0; i < numComp; ++i) {
1964 o[INDEX2(i,numComp,0)] = c0*f_01[i] + c1*f_00[i];
1965 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_01[i];
1966 } /* end of component loop i */
1967 } /* end of k1 loop */
1968 } /* end of face 0 */
1969 if (m_faceOffset[1] > -1) {
1970 #pragma omp for nowait
1971 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1972 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1973 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1974 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1975 for (index_t i=0; i < numComp; ++i) {
1976 o[INDEX2(i,numComp,0)] = c1*f_10[i] + c0*f_11[i];
1977 o[INDEX2(i,numComp,1)] = c1*f_11[i] + c0*f_10[i];
1978 } /* end of component loop i */
1979 } /* end of k1 loop */
1980 } /* end of face 1 */
1981 if (m_faceOffset[2] > -1) {
1982 #pragma omp for nowait
1983 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1984 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1985 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1986 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1987 for (index_t i=0; i < numComp; ++i) {
1988 o[INDEX2(i,numComp,0)] = c0*f_10[i] + c1*f_00[i];
1989 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_10[i];
1990 } /* end of component loop i */
1991 } /* end of k0 loop */
1992 } /* end of face 2 */
1993 if (m_faceOffset[3] > -1) {
1994 #pragma omp for nowait
1995 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1996 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1997 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1998 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1999 for (index_t i=0; i < numComp; ++i) {
2000 o[INDEX2(i,numComp,0)] = c0*f_11[i] + c1*f_01[i];
2001 o[INDEX2(i,numComp,1)] = c0*f_01[i] + c1*f_11[i];
2002 } /* end of component loop i */
2003 } /* end of k0 loop */
2004 } /* end of face 3 */
2005 } /* end of parallel section */
2006 }
2007 }
2008
2009
2010
2011 namespace
2012 {
2013 // Calculates a gaussian blur convolution matrix for 2D
2014 // See wiki article on the subject
2015 double* get2DGauss(unsigned radius, double sigma)
2016 {
2017 double* arr=new double[(radius*2+1)*(radius*2+1)];
2018 double common=M_1_PI*0.5*1/(sigma*sigma);
2019 double total=0;
2020 int r=static_cast<int>(radius);
2021 for (int y=-r;y<=r;++y)
2022 {
2023 for (int x=-r;x<=r;++x)
2024 {
2025 arr[(x+r)+(y+r)*(r*2+1)]=common*exp(-(x*x+y*y)/(2*sigma*sigma));
2026 total+=arr[(x+r)+(y+r)*(r*2+1)];
2027 }
2028 }
2029 double invtotal=1/total;
2030 for (size_t p=0;p<(radius*2+1)*(radius*2+1);++p)
2031 {
2032 arr[p]*=invtotal;
2033 }
2034 return arr;
2035 }
2036
2037 // applies conv to source to get a point.
2038 // (xp, yp) are the coords in the source matrix not the destination matrix
2039 double Convolve2D(double* conv, double* source, size_t xp, size_t yp, unsigned radius, size_t width)
2040 {
2041 size_t bx=xp-radius, by=yp-radius;
2042 size_t sbase=bx+by*width;
2043 double result=0;
2044 for (int y=0;y<2*radius+1;++y)
2045 {
2046 for (int x=0;x<2*radius+1;++x)
2047 {
2048 result+=conv[x+y*(2*radius+1)] * source[sbase + x+y*width];
2049 }
2050 }
2051 return result;
2052 }
2053 }
2054
2055
2056 /* This is a wrapper for filtered (and non-filtered) randoms
2057 * For detailed doco see randomFillWorker
2058 */
2059 escript::Data Rectangle::randomFill(const escript::DataTypes::ShapeType& shape,
2060 const escript::FunctionSpace& what,
2061 long seed, const boost::python::tuple& filter) const
2062 {
2063 int numvals=escript::DataTypes::noValues(shape);
2064 if (len(filter)>0 && (numvals!=1))
2065 {
2066 throw RipleyException("Ripley only supports filters for scalar data.");
2067 }
2068 escript::Data res=randomFillWorker(shape, seed, filter);
2069 if (res.getFunctionSpace()!=what)
2070 {
2071 escript::Data r=escript::Data(res, what);
2072 return r;
2073 }
2074 return res;
2075 }
2076
2077
2078 /* This routine produces a Data object filled with smoothed random data.
2079 The dimensions of the rectangle being filled are internal[0] x internal[1] points.
2080 A parameter radius gives the size of the stencil used for the smoothing.
2081 A point on the left hand edge for example, will still require `radius` extra points to the left
2082 in order to complete the stencil.
2083
2084 All local calculation is done on an array called `src`, with
2085 dimensions = ext[0] * ext[1].
2086 Where ext[i]= internal[i]+2*radius.
2087
2088 Now for MPI there is overlap to deal with. We need to share both the overlapping
2089 values themselves but also the external region.
2090
2091 In a hypothetical 1-D case:
2092
2093
2094 1234567
2095 would be split into two ranks thus:
2096 123(4) (4)567 [4 being a shared element]
2097
2098 If the radius is 2. There will be padding elements on the outside:
2099
2100 pp123(4) (4)567pp
2101
2102 To ensure that 4 can be correctly computed on both ranks, values from the other rank
2103 need to be known.
2104
2105 pp123(4)56 23(4)567pp
2106
2107 Now in our case, we wout set all the values 23456 on the left rank and send them to the
2108 right hand rank.
2109
2110 So the edges _may_ need to be shared at a distance `inset` from all boundaries.
2111 inset=2*radius+1
2112 This is to ensure that values at distance `radius` from the shared/overlapped element
2113 that ripley has.
2114
2115
2116 */
2117 escript::Data Rectangle::randomFillWorker(const escript::DataTypes::ShapeType& shape,
2118 long seed, const boost::python::tuple& filter) const
2119 {
2120 unsigned int radius=0; // these are only used by gaussian
2121 double sigma=0.5;
2122
2123 unsigned int numvals=escript::DataTypes::noValues(shape);
2124
2125
2126 if (len(filter)==0)
2127 {
2128 // nothing special required here yet
2129 }
2130 else if (len(filter)==3)
2131 {
2132 boost::python::extract<string> ex(filter[0]);
2133 if (!ex.check() || (ex()!="gaussian"))
2134 {
2135 throw RipleyException("Unsupported random filter");
2136 }
2137 boost::python::extract<unsigned int> ex1(filter[1]);
2138 if (!ex1.check())
2139 {
2140 throw RipleyException("Radius of gaussian filter must be a positive integer.");
2141 }
2142 radius=ex1();
2143 sigma=0.5;
2144 boost::python::extract<double> ex2(filter[2]);
2145 if (!ex2.check() || (sigma=ex2())<=0)
2146 {
2147 throw RipleyException("Sigma must be a postive floating point number.");
2148 }
2149 }
2150 else
2151 {
2152 throw RipleyException("Unsupported random filter for Rectangle.");
2153 }
2154
2155 // number of points in the internal region
2156 // that is, the ones we need smoothed versions of
2157 const dim_t internal[2] = { m_NN[0], m_NN[1] };
2158 size_t ext[2];
2159 ext[0]=(size_t)internal[0]+2*radius; // includes points we need as input
2160 ext[1]=(size_t)internal[1]+2*radius; // for smoothing
2161
2162 // now we check to see if the radius is acceptable
2163 // That is, would not cross multiple ranks in MPI
2164
2165 if (2*radius>=internal[0]-4)
2166 {
2167 throw RipleyException("Radius of gaussian filter is too large for X dimension of a rank");
2168 }
2169 if (2*radius>=internal[1]-4)
2170 {
2171 throw RipleyException("Radius of gaussian filter is too large for Y dimension of a rank");
2172 }
2173
2174 double* src=new double[ext[0]*ext[1]*numvals];
2175 esysUtils::randomFillArray(seed, src, ext[0]*ext[1]*numvals);
2176
2177
2178 #ifdef ESYS_MPI
2179 if ((internal[0]<5) || (internal[1]<5))
2180 {
2181 // since the dimensions are equal for all ranks, this exception
2182 // will be thrown on all ranks
2183 throw RipleyException("Random Data in Ripley requires at least five elements per side per rank.");
2184 }
2185 dim_t X=m_mpiInfo->rank%m_NX[0];
2186 dim_t Y=m_mpiInfo->rank%(m_NX[0]*m_NX[1])/m_NX[0];
2187 #endif
2188
2189 /*
2190 // if we wanted to test a repeating pattern
2191 size_t basex=0;
2192 size_t basey=0;
2193 #ifdef ESYS_MPI
2194 basex=X*m_gNE[0]/m_NX[0];
2195 basey=Y*m_gNE[1]/m_NX[1];
2196 #endif
2197
2198 esysUtils::patternFillArray2D(ext[0], ext[1], src, 4, basex, basey, numvals);
2199 */
2200
2201
2202 #ifdef ESYS_MPI
2203
2204 BlockGrid2 grid(m_NX[0]-1, m_NX[1]-1);
2205 size_t inset=2*radius+2; // Its +2 not +1 because a whole element is shared (and hence
2206 // there is an overlap of two points both of which need to have "radius" points on either side.
2207
2208 size_t xmidlen=ext[0]-2*inset; // how wide is the x-dimension between the two insets
2209 size_t ymidlen=ext[1]-2*inset;
2210
2211 Block2 block(ext[0], ext[1], inset, xmidlen, ymidlen, numvals);
2212
2213 MPI_Request reqs[40]; // a non-tight upper bound on how many we need
2214 MPI_Status stats[40];
2215 short rused=0;
2216
2217 messvec incoms;
2218 messvec outcoms;
2219
2220 grid.generateInNeighbours(X, Y, incoms);
2221 grid.generateOutNeighbours(X, Y, outcoms);
2222
2223 block.copyAllToBuffer(src);
2224
2225 int comserr=0;
2226 for (size_t i=0;i<incoms.size();++i)
2227 {
2228 message& m=incoms[i];
2229 comserr|=MPI_Irecv(block.getInBuffer(m.destbuffid), block.getBuffSize(m.destbuffid) , MPI_DOUBLE, m.sourceID, m.tag, m_mpiInfo->comm, reqs+(rused++));
2230 block.setUsed(m.destbuffid);
2231 }
2232
2233 for (size_t i=0;i<outcoms.size();++i)
2234 {
2235 message& m=outcoms[i];
2236 comserr|=MPI_Isend(block.getOutBuffer(m.srcbuffid), block.getBuffSize(m.srcbuffid) , MPI_DOUBLE, m.destID, m.tag, m_mpiInfo->comm, reqs+(rused++));
2237 }
2238
2239 if (!comserr)
2240 {
2241 comserr=MPI_Waitall(rused, reqs, stats);
2242 }
2243
2244 if (comserr)
2245 {
2246 // Yes this is throwing an exception as a result of an MPI error.
2247 // and no we don't inform the other ranks that we are doing this.
2248 // however, we have no reason to believe coms work at this point anyway
2249 throw RipleyException("Error in coms for randomFill");
2250 }
2251
2252 block.copyUsedFromBuffer(src);
2253
2254 #endif
2255
2256 if (radius==0 || numvals>1) // the truth of either should imply the truth of the other but let's be safe
2257 {
2258
2259 escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
2260 escript::Data resdat(0, shape, fs , true);
2261 // don't need to check for exwrite because we just made it
2262 escript::DataVector& dv=resdat.getExpandedVectorReference();
2263
2264
2265 // now we need to copy values over
2266 for (size_t y=0;y<(internal[1]);++y)
2267 {
2268 for (size_t x=0;x<(internal[0]);++x)
2269 {
2270 for (unsigned int i=0;i<numvals;++i)
2271 {
2272 dv[i+(x+y*(internal[0]))*numvals]=src[i+(x+y*ext[0])*numvals];
2273 }
2274 }
2275 }
2276 delete[] src;
2277 return resdat;
2278 }
2279 else // filter enabled
2280 {
2281 escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
2282 escript::Data resdat(0, escript::DataTypes::scalarShape, fs , true);
2283 // don't need to check for exwrite because we just made it
2284 escript::DataVector& dv=resdat.getExpandedVectorReference();
2285 double* convolution=get2DGauss(radius, sigma);
2286 for (size_t y=0;y<(internal[1]);++y)
2287 {
2288 for (size_t x=0;x<(internal[0]);++x)
2289 {
2290 dv[x+y*(internal[0])]=Convolve2D(convolution, src, x+radius, y+radius, radius, ext[0]);
2291
2292 }
2293 }
2294 delete[] convolution;
2295 delete[] src;
2296 return resdat;
2297 }
2298 }
2299
2300 int Rectangle::findNode(const double *coords) const {
2301 const int NOT_MINE = -1;
2302 //is the found element even owned by this rank
2303 // (inside owned or shared elements but will map to an owned element)
2304 for (int dim = 0; dim < m_numDim; dim++) {
2305 double min = m_origin[dim] + m_offset[dim]* m_dx[dim]
2306 - m_dx[dim]/2.; //allows for point outside mapping onto node
2307 double max = m_origin[dim] + (m_offset[dim] + m_NE[dim])*m_dx[dim]
2308 + m_dx[dim]/2.;
2309 if (min > coords[dim] || max < coords[dim]) {
2310 return NOT_MINE;
2311 }
2312 }
2313 // get distance from origin
2314 double x = coords[0] - m_origin[0];
2315 double y = coords[1] - m_origin[1];
2316
2317 //check if the point is even inside the domain
2318 if (x < 0 || y < 0 || x > m_length[0] || y > m_length[1])
2319 return NOT_MINE;
2320
2321 // distance in elements
2322 int ex = (int) floor(x / m_dx[0] + 0.01*m_dx[0]);
2323 int ey = (int) floor(y / m_dx[1] + 0.01*m_dx[1]);
2324 // set the min distance high enough to be outside the element plus a bit
2325 int closest = NOT_MINE;
2326 double minDist = 1;
2327 for (int dim = 0; dim < m_numDim; dim++) {
2328 minDist += m_dx[dim]*m_dx[dim];
2329 }
2330 //find the closest node
2331 for (int dx = 0; dx < 1; dx++) {
2332 double xdist = (x - (ex + dx)*m_dx[0]);
2333 for (int dy = 0; dy < 1; dy++) {
2334 double ydist = (y - (ey + dy)*m_dx[1]);
2335 double total = xdist*xdist + ydist*ydist;
2336 if (total < minDist) {
2337 closest = INDEX2(ex+dx-m_offset[0], ey+dy-m_offset[1], m_NN[0]);
2338 minDist = total;
2339 }
2340 }
2341 }
2342 //if this happens, we've let a dirac point slip through, which is awful
2343 if (closest == NOT_MINE) {
2344 throw RipleyException("Unable to map appropriate dirac point to a node,"
2345 " implementation problem in Rectangle::findNode()");
2346 }
2347 return closest;
2348 }
2349
2350 void Rectangle::setAssembler(std::string type, std::map<std::string,
2351 escript::Data> constants) {
2352 if (type.compare("WaveAssembler") == 0) {
2353 if (assembler_type != WAVE_ASSEMBLER && assembler_type != DEFAULT_ASSEMBLER)
2354 throw RipleyException("Domain already using a different custom assembler");
2355 assembler_type = WAVE_ASSEMBLER;
2356 delete assembler;
2357 assembler = new WaveAssembler2D(this, m_dx, m_NX, m_NE, m_NN, constants);
2358 } else if (type.compare("LameAssembler") == 0) {
2359 if (assembler_type != LAME_ASSEMBLER && assembler_type != DEFAULT_ASSEMBLER)
2360 throw RipleyException("Domain already using a different custom assembler");
2361 assembler_type = LAME_ASSEMBLER;
2362 delete assembler;
2363 assembler = new LameAssembler2D(this, m_dx, m_NX, m_NE, m_NN);
2364 } else { //else ifs would go before this for other types
2365 throw RipleyException("Ripley::Rectangle does not support the"
2366 " requested assembler");
2367 }
2368 }
2369
2370 } // end of namespace ripley
2371

  ViewVC Help
Powered by ViewVC 1.1.26