/[escript]/trunk/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /trunk/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4769 - (show annotations)
Wed Mar 19 03:43:52 2014 UTC (5 years, 7 months ago) by sshaw
File size: 93932 byte(s)
added missing ifdefs for library presence
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <algorithm>
18
19 #include <ripley/Rectangle.h>
20 #include <paso/SystemMatrix.h>
21 #include <esysUtils/esysFileWriter.h>
22 #include <ripley/DefaultAssembler2D.h>
23 #include <ripley/WaveAssembler2D.h>
24 #include <ripley/LameAssembler2D.h>
25 #include <ripley/domainhelpers.h>
26 #include <boost/scoped_array.hpp>
27 #include "esysUtils/EsysRandom.h"
28 #include "blocktools.h"
29
30 #ifdef USE_NETCDF
31 #include <netcdfcpp.h>
32 #endif
33
34 #if USE_SILO
35 #include <silo.h>
36 #ifdef ESYS_MPI
37 #include <pmpio.h>
38 #endif
39 #endif
40
41 #include <iomanip>
42
43 using namespace std;
44 using esysUtils::FileWriter;
45
46 namespace ripley {
47
48 Rectangle::Rectangle(int n0, int n1, double x0, double y0, double x1,
49 double y1, int d0, int d1,
50 const std::vector<double>& points,
51 const std::vector<int>& tags,
52 const simap_t& tagnamestonums) :
53 RipleyDomain(2)
54 {
55 // ignore subdivision parameters for serial run
56 if (m_mpiInfo->size == 1) {
57 d0=1;
58 d1=1;
59 }
60
61 bool warn=false;
62 std::vector<int> factors;
63 int ranks = m_mpiInfo->size;
64 int epr[2] = {n0,n1};
65 int d[2] = {d0,d1};
66 if (d0<=0 || d1<=0) {
67 for (int i = 0; i < 2; i++) {
68 if (d[i] < 1) {
69 d[i] = 1;
70 continue;
71 }
72 epr[i] = -1; // can no longer be max
73 //remove
74 if (ranks % d[i] != 0) {
75 throw RipleyException("Invalid number of spatial subdivisions");
76 }
77 ranks /= d[i];
78 }
79 factorise(factors, ranks);
80 if (factors.size() != 0) {
81 warn = true;
82 }
83 }
84
85 while (factors.size() > 0) {
86 int i = epr[0] > epr[1] ? 0 : 1;
87 int f = factors.back();
88 factors.pop_back();
89 d[i] *= f;
90 epr[i] /= f;
91 }
92 d0 = d[0]; d1 = d[1];
93
94 // ensure number of subdivisions is valid and nodes can be distributed
95 // among number of ranks
96 if (d0*d1 != m_mpiInfo->size)
97 throw RipleyException("Invalid number of spatial subdivisions");
98
99 if (warn) {
100 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
101 << d1 << "). This may not be optimal!" << endl;
102 }
103
104 double l0 = x1-x0;
105 double l1 = y1-y0;
106 m_dx[0] = l0/n0;
107 m_dx[1] = l1/n1;
108
109 if ((n0+1)%d0 > 0) {
110 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
111 l0=m_dx[0]*n0;
112 cout << "Warning: Adjusted number of elements and length. N0="
113 << n0 << ", l0=" << l0 << endl;
114 }
115 if ((n1+1)%d1 > 0) {
116 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
117 l1=m_dx[1]*n1;
118 cout << "Warning: Adjusted number of elements and length. N1="
119 << n1 << ", l1=" << l1 << endl;
120 }
121
122 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2))
123 throw RipleyException("Too few elements for the number of ranks");
124
125 m_gNE[0] = n0;
126 m_gNE[1] = n1;
127 m_origin[0] = x0;
128 m_origin[1] = y0;
129 m_length[0] = l0;
130 m_length[1] = l1;
131 m_NX[0] = d0;
132 m_NX[1] = d1;
133
134 // local number of elements (with and without overlap)
135 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
136 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
137 m_NE[0]++;
138 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
139 m_ownNE[0]--;
140
141 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
142 if (m_mpiInfo->rank/d0>0 && m_mpiInfo->rank/d0<d1-1)
143 m_NE[1]++;
144 else if (d1>1 && m_mpiInfo->rank/d0==d1-1)
145 m_ownNE[1]--;
146
147 // local number of nodes
148 m_NN[0] = m_NE[0]+1;
149 m_NN[1] = m_NE[1]+1;
150
151 // bottom-left node is at (offset0,offset1) in global mesh
152 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
153 if (m_offset[0] > 0)
154 m_offset[0]--;
155 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank/d0);
156 if (m_offset[1] > 0)
157 m_offset[1]--;
158
159 populateSampleIds();
160 createPattern();
161 assembler = new DefaultAssembler2D(this, m_dx, m_NX, m_NE, m_NN);
162 for (map<string, int>::const_iterator i = tagnamestonums.begin();
163 i != tagnamestonums.end(); i++) {
164 setTagMap(i->first, i->second);
165 }
166 addPoints(tags.size(), &points[0], &tags[0]);
167 }
168
169 Rectangle::~Rectangle()
170 {
171 Paso_SystemMatrixPattern_free(m_pattern);
172 Paso_Connector_free(m_connector);
173 delete assembler;
174 }
175
176 string Rectangle::getDescription() const
177 {
178 return "ripley::Rectangle";
179 }
180
181 bool Rectangle::operator==(const AbstractDomain& other) const
182 {
183 const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
184 if (o) {
185 return (RipleyDomain::operator==(other) &&
186 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1]
187 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1]
188 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1]
189 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1]);
190 }
191
192 return false;
193 }
194
195 void Rectangle::readNcGrid(escript::Data& out, string filename, string varname,
196 const ReaderParameters& params) const
197 {
198 #ifdef USE_NETCDF
199 // check destination function space
200 int myN0, myN1;
201 if (out.getFunctionSpace().getTypeCode() == Nodes) {
202 myN0 = m_NN[0];
203 myN1 = m_NN[1];
204 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
205 out.getFunctionSpace().getTypeCode() == ReducedElements) {
206 myN0 = m_NE[0];
207 myN1 = m_NE[1];
208 } else
209 throw RipleyException("readNcGrid(): invalid function space for output data object");
210
211 if (params.first.size() != 2)
212 throw RipleyException("readNcGrid(): argument 'first' must have 2 entries");
213
214 if (params.numValues.size() != 2)
215 throw RipleyException("readNcGrid(): argument 'numValues' must have 2 entries");
216
217 if (params.multiplier.size() != 2)
218 throw RipleyException("readNcGrid(): argument 'multiplier' must have 2 entries");
219 for (size_t i=0; i<params.multiplier.size(); i++)
220 if (params.multiplier[i]<1)
221 throw RipleyException("readNcGrid(): all multipliers must be positive");
222 if (params.reverse.size() != 2)
223 throw RipleyException("readNcGrid(): argument 'reverse' must have 2 entries");
224
225 // check file existence and size
226 NcFile f(filename.c_str(), NcFile::ReadOnly);
227 if (!f.is_valid())
228 throw RipleyException("readNcGrid(): cannot open file");
229
230 NcVar* var = f.get_var(varname.c_str());
231 if (!var)
232 throw RipleyException("readNcGrid(): invalid variable");
233
234 // TODO: rank>0 data support
235 const int numComp = out.getDataPointSize();
236 if (numComp > 1)
237 throw RipleyException("readNcGrid(): only scalar data supported");
238
239 const int dims = var->num_dims();
240 boost::scoped_array<long> edges(var->edges());
241
242 // is this a slice of the data object (dims!=2)?
243 // note the expected ordering of edges (as in numpy: y,x)
244 if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1]))
245 || (dims==1 && params.numValues[1]>1) ) {
246 throw RipleyException("readNcGrid(): not enough data in file");
247 }
248
249 // check if this rank contributes anything
250 if (params.first[0] >= m_offset[0]+myN0 ||
251 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
252 params.first[1] >= m_offset[1]+myN1 ||
253 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1])
254 return;
255
256 // now determine how much this rank has to write
257
258 // first coordinates in data object to write to
259 const int first0 = max(0, params.first[0]-m_offset[0]);
260 const int first1 = max(0, params.first[1]-m_offset[1]);
261 // indices to first value in file (not accounting for reverse yet)
262 int idx0 = max(0, m_offset[0]-params.first[0]);
263 int idx1 = max(0, m_offset[1]-params.first[1]);
264 // number of values to read
265 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
266 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
267
268 // make sure we read the right block if going backwards through file
269 if (params.reverse[0])
270 idx0 = edges[dims-1]-num0-idx0;
271 if (dims>1 && params.reverse[1])
272 idx1 = edges[dims-2]-num1-idx1;
273
274 vector<double> values(num0*num1);
275 if (dims==2) {
276 var->set_cur(idx1, idx0);
277 var->get(&values[0], num1, num0);
278 } else {
279 var->set_cur(idx0);
280 var->get(&values[0], num0);
281 }
282
283 const int dpp = out.getNumDataPointsPerSample();
284 out.requireWrite();
285
286 // helpers for reversing
287 const int x0 = (params.reverse[0] ? num0-1 : 0);
288 const int x_mult = (params.reverse[0] ? -1 : 1);
289 const int y0 = (params.reverse[1] ? num1-1 : 0);
290 const int y_mult = (params.reverse[1] ? -1 : 1);
291
292 for (index_t y=0; y<num1; y++) {
293 #pragma omp parallel for
294 for (index_t x=0; x<num0; x++) {
295 const int baseIndex = first0+x*params.multiplier[0]
296 +(first1+y*params.multiplier[1])*myN0;
297 const int srcIndex = (y0+y_mult*y)*num0+(x0+x_mult*x);
298 if (!isnan(values[srcIndex])) {
299 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
300 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
301 const int dataIndex = baseIndex+m0+m1*myN0;
302 double* dest = out.getSampleDataRW(dataIndex);
303 for (index_t q=0; q<dpp; q++) {
304 *dest++ = values[srcIndex];
305 }
306 }
307 }
308 }
309 }
310 }
311 #else
312 throw RipleyException("readNcGrid(): not compiled with netCDF support");
313 #endif
314 }
315
316 void Rectangle::readBinaryGrid(escript::Data& out, string filename,
317 const ReaderParameters& params) const
318 {
319 // the mapping is not universally correct but should work on our
320 // supported platforms
321 switch (params.dataType) {
322 case DATATYPE_INT32:
323 readBinaryGridImpl<int>(out, filename, params);
324 break;
325 case DATATYPE_FLOAT32:
326 readBinaryGridImpl<float>(out, filename, params);
327 break;
328 case DATATYPE_FLOAT64:
329 readBinaryGridImpl<double>(out, filename, params);
330 break;
331 default:
332 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
333 }
334 }
335
336 #ifdef USE_BOOSTIO
337 void Rectangle::readBinaryGridFromZipped(escript::Data& out, string filename,
338 const ReaderParameters& params) const
339 {
340 // the mapping is not universally correct but should work on our
341 // supported platforms
342 switch (params.dataType) {
343 case DATATYPE_INT32:
344 readBinaryGridZippedImpl<int>(out, filename, params);
345 break;
346 case DATATYPE_FLOAT32:
347 readBinaryGridZippedImpl<float>(out, filename, params);
348 break;
349 case DATATYPE_FLOAT64:
350 readBinaryGridZippedImpl<double>(out, filename, params);
351 break;
352 default:
353 throw RipleyException("readBinaryGridFromZipped(): invalid or unsupported datatype");
354 }
355 }
356 #endif
357
358 template<typename ValueType>
359 void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename,
360 const ReaderParameters& params) const
361 {
362 // check destination function space
363 int myN0, myN1;
364 if (out.getFunctionSpace().getTypeCode() == Nodes) {
365 myN0 = m_NN[0];
366 myN1 = m_NN[1];
367 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
368 out.getFunctionSpace().getTypeCode() == ReducedElements) {
369 myN0 = m_NE[0];
370 myN1 = m_NE[1];
371 } else
372 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
373
374 // check file existence and size
375 ifstream f(filename.c_str(), ifstream::binary);
376 if (f.fail()) {
377 throw RipleyException("readBinaryGrid(): cannot open file");
378 }
379 f.seekg(0, ios::end);
380 const int numComp = out.getDataPointSize();
381 const int filesize = f.tellg();
382 const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
383 if (filesize < reqsize) {
384 f.close();
385 throw RipleyException("readBinaryGrid(): not enough data in file");
386 }
387
388 // check if this rank contributes anything
389 if (params.first[0] >= m_offset[0]+myN0 ||
390 params.first[0]+params.numValues[0] <= m_offset[0] ||
391 params.first[1] >= m_offset[1]+myN1 ||
392 params.first[1]+params.numValues[1] <= m_offset[1]) {
393 f.close();
394 return;
395 }
396
397 // now determine how much this rank has to write
398
399 // first coordinates in data object to write to
400 const int first0 = max(0, params.first[0]-m_offset[0]);
401 const int first1 = max(0, params.first[1]-m_offset[1]);
402 // indices to first value in file
403 const int idx0 = max(0, m_offset[0]-params.first[0]);
404 const int idx1 = max(0, m_offset[1]-params.first[1]);
405 // number of values to read
406 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
407 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
408
409 out.requireWrite();
410 vector<ValueType> values(num0*numComp);
411 const int dpp = out.getNumDataPointsPerSample();
412
413 for (int y=0; y<num1; y++) {
414 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
415 f.seekg(fileofs*sizeof(ValueType));
416 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
417 for (int x=0; x<num0; x++) {
418 const int baseIndex = first0+x*params.multiplier[0]
419 +(first1+y*params.multiplier[1])*myN0;
420 for (int m1=0; m1<params.multiplier[1]; m1++) {
421 for (int m0=0; m0<params.multiplier[0]; m0++) {
422 const int dataIndex = baseIndex+m0+m1*myN0;
423 double* dest = out.getSampleDataRW(dataIndex);
424 for (int c=0; c<numComp; c++) {
425 ValueType val = values[x*numComp+c];
426
427 if (params.byteOrder != BYTEORDER_NATIVE) {
428 char* cval = reinterpret_cast<char*>(&val);
429 // this will alter val!!
430 byte_swap32(cval);
431 }
432 if (!std::isnan(val)) {
433 for (int q=0; q<dpp; q++) {
434 *dest++ = static_cast<double>(val);
435 }
436 }
437 }
438 }
439 }
440 }
441 }
442
443 f.close();
444 }
445
446 #ifdef USE_BOOSTIO
447 template<typename ValueType>
448 void Rectangle::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
449 const ReaderParameters& params) const
450 {
451 // check destination function space
452 int myN0, myN1;
453 if (out.getFunctionSpace().getTypeCode() == Nodes) {
454 myN0 = m_NN[0];
455 myN1 = m_NN[1];
456 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
457 out.getFunctionSpace().getTypeCode() == ReducedElements) {
458 myN0 = m_NE[0];
459 myN1 = m_NE[1];
460 } else
461 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
462
463 // check file existence and size
464 ifstream f(filename.c_str(), ifstream::binary);
465 if (f.fail()) {
466 throw RipleyException("readBinaryGridFromZipped(): cannot open file");
467 }
468 f.seekg(0, ios::end);
469 const int numComp = out.getDataPointSize();
470 int filesize = f.tellg();
471 f.seekg(0, ios::beg);
472 std::vector<char> compressed(filesize);
473 f.read((char*)&compressed[0], filesize);
474 f.close();
475 std::vector<char> decompressed = unzip(compressed);
476 filesize = decompressed.size();
477 const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
478 if (filesize < reqsize) {
479 throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
480 }
481
482 // check if this rank contributes anything
483 if (params.first[0] >= m_offset[0]+myN0 ||
484 params.first[0]+params.numValues[0] <= m_offset[0] ||
485 params.first[1] >= m_offset[1]+myN1 ||
486 params.first[1]+params.numValues[1] <= m_offset[1]) {
487 f.close();
488 return;
489 }
490
491 // now determine how much this rank has to write
492
493 // first coordinates in data object to write to
494 const int first0 = max(0, params.first[0]-m_offset[0]);
495 const int first1 = max(0, params.first[1]-m_offset[1]);
496 // indices to first value in file
497 const int idx0 = max(0, m_offset[0]-params.first[0]);
498 const int idx1 = max(0, m_offset[1]-params.first[1]);
499 // number of values to read
500 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
501 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
502
503 out.requireWrite();
504 vector<ValueType> values(num0*numComp);
505 const int dpp = out.getNumDataPointsPerSample();
506
507 for (int y=0; y<num1; y++) {
508 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
509 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
510 for (int x=0; x<num0; x++) {
511 const int baseIndex = first0+x*params.multiplier[0]
512 +(first1+y*params.multiplier[1])*myN0;
513 for (int m1=0; m1<params.multiplier[1]; m1++) {
514 for (int m0=0; m0<params.multiplier[0]; m0++) {
515 const int dataIndex = baseIndex+m0+m1*myN0;
516 double* dest = out.getSampleDataRW(dataIndex);
517 for (int c=0; c<numComp; c++) {
518 ValueType val = values[x*numComp+c];
519
520 if (params.byteOrder != BYTEORDER_NATIVE) {
521 char* cval = reinterpret_cast<char*>(&val);
522 // this will alter val!!
523 byte_swap32(cval);
524 }
525 if (!std::isnan(val)) {
526 for (int q=0; q<dpp; q++) {
527 *dest++ = static_cast<double>(val);
528 }
529 }
530 }
531 }
532 }
533 }
534 }
535
536 f.close();
537 }
538 #endif
539
540 void Rectangle::writeBinaryGrid(const escript::Data& in, string filename,
541 int byteOrder, int dataType) const
542 {
543 // the mapping is not universally correct but should work on our
544 // supported platforms
545 switch (dataType) {
546 case DATATYPE_INT32:
547 writeBinaryGridImpl<int>(in, filename, byteOrder);
548 break;
549 case DATATYPE_FLOAT32:
550 writeBinaryGridImpl<float>(in, filename, byteOrder);
551 break;
552 case DATATYPE_FLOAT64:
553 writeBinaryGridImpl<double>(in, filename, byteOrder);
554 break;
555 default:
556 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
557 }
558 }
559
560 template<typename ValueType>
561 void Rectangle::writeBinaryGridImpl(const escript::Data& in,
562 const string& filename, int byteOrder) const
563 {
564 // check function space and determine number of points
565 int myN0, myN1;
566 int totalN0, totalN1;
567 if (in.getFunctionSpace().getTypeCode() == Nodes) {
568 myN0 = m_NN[0];
569 myN1 = m_NN[1];
570 totalN0 = m_gNE[0]+1;
571 totalN1 = m_gNE[1]+1;
572 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
573 in.getFunctionSpace().getTypeCode() == ReducedElements) {
574 myN0 = m_NE[0];
575 myN1 = m_NE[1];
576 totalN0 = m_gNE[0];
577 totalN1 = m_gNE[1];
578 } else
579 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
580
581 const int numComp = in.getDataPointSize();
582 const int dpp = in.getNumDataPointsPerSample();
583
584 if (numComp > 1 || dpp > 1)
585 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
586
587 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1;
588
589 // from here on we know that each sample consists of one value
590 FileWriter fw;
591 fw.openFile(filename, fileSize);
592 MPIBarrier();
593
594 for (index_t y=0; y<myN1; y++) {
595 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0)*sizeof(ValueType);
596 ostringstream oss;
597
598 for (index_t x=0; x<myN0; x++) {
599 const double* sample = in.getSampleDataRO(y*myN0+x);
600 ValueType fvalue = static_cast<ValueType>(*sample);
601 if (byteOrder == BYTEORDER_NATIVE) {
602 oss.write((char*)&fvalue, sizeof(fvalue));
603 } else {
604 char* value = reinterpret_cast<char*>(&fvalue);
605 oss.write(byte_swap32(value), sizeof(fvalue));
606 }
607 }
608 fw.writeAt(oss, fileofs);
609 }
610 fw.close();
611 }
612
613 void Rectangle::dump(const string& fileName) const
614 {
615 #if USE_SILO
616 string fn(fileName);
617 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
618 fn+=".silo";
619 }
620
621 int driver=DB_HDF5;
622 DBfile* dbfile = NULL;
623 const char* blockDirFmt = "/block%04d";
624
625 #ifdef ESYS_MPI
626 PMPIO_baton_t* baton = NULL;
627 const int NUM_SILO_FILES = 1;
628 #endif
629
630 if (m_mpiInfo->size > 1) {
631 #ifdef ESYS_MPI
632 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
633 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
634 PMPIO_DefaultClose, (void*)&driver);
635 // try the fallback driver in case of error
636 if (!baton && driver != DB_PDB) {
637 driver = DB_PDB;
638 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
639 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
640 PMPIO_DefaultClose, (void*)&driver);
641 }
642 if (baton) {
643 char siloPath[64];
644 snprintf(siloPath, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
645 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath);
646 }
647 #endif
648 } else {
649 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
650 getDescription().c_str(), driver);
651 // try the fallback driver in case of error
652 if (!dbfile && driver != DB_PDB) {
653 driver = DB_PDB;
654 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
655 getDescription().c_str(), driver);
656 }
657 char siloPath[64];
658 snprintf(siloPath, 64, blockDirFmt, 0);
659 DBMkDir(dbfile, siloPath);
660 DBSetDir(dbfile, siloPath);
661 }
662
663 if (!dbfile)
664 throw RipleyException("dump: Could not create Silo file");
665
666 /*
667 if (driver==DB_HDF5) {
668 // gzip level 1 already provides good compression with minimal
669 // performance penalty. Some tests showed that gzip levels >3 performed
670 // rather badly on escript data both in terms of time and space
671 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
672 }
673 */
674
675 boost::scoped_ptr<double> x(new double[m_NN[0]]);
676 boost::scoped_ptr<double> y(new double[m_NN[1]]);
677 double* coords[2] = { x.get(), y.get() };
678 #pragma omp parallel
679 {
680 #pragma omp for nowait
681 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
682 coords[0][i0]=getLocalCoordinate(i0, 0);
683 }
684 #pragma omp for nowait
685 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
686 coords[1][i1]=getLocalCoordinate(i1, 1);
687 }
688 }
689 int* dims = const_cast<int*>(getNumNodesPerDim());
690
691 // write mesh
692 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 2, DB_DOUBLE,
693 DB_COLLINEAR, NULL);
694
695 // write node ids
696 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 2,
697 NULL, 0, DB_INT, DB_NODECENT, NULL);
698
699 // write element ids
700 dims = const_cast<int*>(getNumElementsPerDim());
701 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
702 dims, 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
703
704 // rank 0 writes multimesh and multivar
705 if (m_mpiInfo->rank == 0) {
706 vector<string> tempstrings;
707 vector<char*> names;
708 for (dim_t i=0; i<m_mpiInfo->size; i++) {
709 stringstream path;
710 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
711 tempstrings.push_back(path.str());
712 names.push_back((char*)tempstrings.back().c_str());
713 }
714 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
715 DBSetDir(dbfile, "/");
716 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
717 &types[0], NULL);
718 tempstrings.clear();
719 names.clear();
720 for (dim_t i=0; i<m_mpiInfo->size; i++) {
721 stringstream path;
722 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
723 tempstrings.push_back(path.str());
724 names.push_back((char*)tempstrings.back().c_str());
725 }
726 types.assign(m_mpiInfo->size, DB_QUADVAR);
727 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
728 &types[0], NULL);
729 tempstrings.clear();
730 names.clear();
731 for (dim_t i=0; i<m_mpiInfo->size; i++) {
732 stringstream path;
733 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
734 tempstrings.push_back(path.str());
735 names.push_back((char*)tempstrings.back().c_str());
736 }
737 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
738 &types[0], NULL);
739 }
740
741 if (m_mpiInfo->size > 1) {
742 #ifdef ESYS_MPI
743 PMPIO_HandOffBaton(baton, dbfile);
744 PMPIO_Finish(baton);
745 #endif
746 } else {
747 DBClose(dbfile);
748 }
749
750 #else // USE_SILO
751 throw RipleyException("dump: no Silo support");
752 #endif
753 }
754
755 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
756 {
757 switch (fsType) {
758 case Nodes:
759 case ReducedNodes: // FIXME: reduced
760 return &m_nodeId[0];
761 case DegreesOfFreedom:
762 case ReducedDegreesOfFreedom: // FIXME: reduced
763 return &m_dofId[0];
764 case Elements:
765 case ReducedElements:
766 return &m_elementId[0];
767 case FaceElements:
768 case ReducedFaceElements:
769 return &m_faceId[0];
770 case Points:
771 return &m_diracPointNodeIDs[0];
772 default:
773 break;
774 }
775
776 stringstream msg;
777 msg << "borrowSampleReferenceIDs: invalid function space type " << fsType;
778 throw RipleyException(msg.str());
779 }
780
781 bool Rectangle::ownSample(int fsType, index_t id) const
782 {
783 if (getMPISize()==1)
784 return true;
785
786 switch (fsType) {
787 case Nodes:
788 case ReducedNodes: // FIXME: reduced
789 return (m_dofMap[id] < getNumDOF());
790 case DegreesOfFreedom:
791 case ReducedDegreesOfFreedom:
792 return true;
793 case Elements:
794 case ReducedElements:
795 // check ownership of element's bottom left node
796 return (m_dofMap[id%m_NE[0]+m_NN[0]*(id/m_NE[0])] < getNumDOF());
797 case FaceElements:
798 case ReducedFaceElements:
799 {
800 // determine which face the sample belongs to before
801 // checking ownership of corresponding element's first node
802 dim_t n=0;
803 for (size_t i=0; i<4; i++) {
804 n+=m_faceCount[i];
805 if (id<n) {
806 index_t k;
807 if (i==1)
808 k=m_NN[0]-2;
809 else if (i==3)
810 k=m_NN[0]*(m_NN[1]-2);
811 else
812 k=0;
813 // determine whether to move right or up
814 const index_t delta=(i/2==0 ? m_NN[0] : 1);
815 return (m_dofMap[k+(id-n+m_faceCount[i])*delta] < getNumDOF());
816 }
817 }
818 return false;
819 }
820 default:
821 break;
822 }
823
824 stringstream msg;
825 msg << "ownSample: invalid function space type " << fsType;
826 throw RipleyException(msg.str());
827 }
828
829 void Rectangle::setToNormal(escript::Data& out) const
830 {
831 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
832 out.requireWrite();
833 #pragma omp parallel
834 {
835 if (m_faceOffset[0] > -1) {
836 #pragma omp for nowait
837 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
838 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
839 // set vector at two quadrature points
840 *o++ = -1.;
841 *o++ = 0.;
842 *o++ = -1.;
843 *o = 0.;
844 }
845 }
846
847 if (m_faceOffset[1] > -1) {
848 #pragma omp for nowait
849 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
850 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
851 // set vector at two quadrature points
852 *o++ = 1.;
853 *o++ = 0.;
854 *o++ = 1.;
855 *o = 0.;
856 }
857 }
858
859 if (m_faceOffset[2] > -1) {
860 #pragma omp for nowait
861 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
862 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
863 // set vector at two quadrature points
864 *o++ = 0.;
865 *o++ = -1.;
866 *o++ = 0.;
867 *o = -1.;
868 }
869 }
870
871 if (m_faceOffset[3] > -1) {
872 #pragma omp for nowait
873 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
874 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
875 // set vector at two quadrature points
876 *o++ = 0.;
877 *o++ = 1.;
878 *o++ = 0.;
879 *o = 1.;
880 }
881 }
882 } // end of parallel section
883 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
884 out.requireWrite();
885 #pragma omp parallel
886 {
887 if (m_faceOffset[0] > -1) {
888 #pragma omp for nowait
889 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
890 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
891 *o++ = -1.;
892 *o = 0.;
893 }
894 }
895
896 if (m_faceOffset[1] > -1) {
897 #pragma omp for nowait
898 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
899 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
900 *o++ = 1.;
901 *o = 0.;
902 }
903 }
904
905 if (m_faceOffset[2] > -1) {
906 #pragma omp for nowait
907 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
908 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
909 *o++ = 0.;
910 *o = -1.;
911 }
912 }
913
914 if (m_faceOffset[3] > -1) {
915 #pragma omp for nowait
916 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
917 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
918 *o++ = 0.;
919 *o = 1.;
920 }
921 }
922 } // end of parallel section
923
924 } else {
925 stringstream msg;
926 msg << "setToNormal: invalid function space type "
927 << out.getFunctionSpace().getTypeCode();
928 throw RipleyException(msg.str());
929 }
930 }
931
932 void Rectangle::setToSize(escript::Data& out) const
933 {
934 if (out.getFunctionSpace().getTypeCode() == Elements
935 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
936 out.requireWrite();
937 const dim_t numQuad=out.getNumDataPointsPerSample();
938 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]);
939 #pragma omp parallel for
940 for (index_t k = 0; k < getNumElements(); ++k) {
941 double* o = out.getSampleDataRW(k);
942 fill(o, o+numQuad, size);
943 }
944 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
945 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
946 out.requireWrite();
947 const dim_t numQuad=out.getNumDataPointsPerSample();
948 #pragma omp parallel
949 {
950 if (m_faceOffset[0] > -1) {
951 #pragma omp for nowait
952 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
953 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
954 fill(o, o+numQuad, m_dx[1]);
955 }
956 }
957
958 if (m_faceOffset[1] > -1) {
959 #pragma omp for nowait
960 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
961 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
962 fill(o, o+numQuad, m_dx[1]);
963 }
964 }
965
966 if (m_faceOffset[2] > -1) {
967 #pragma omp for nowait
968 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
969 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
970 fill(o, o+numQuad, m_dx[0]);
971 }
972 }
973
974 if (m_faceOffset[3] > -1) {
975 #pragma omp for nowait
976 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
977 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
978 fill(o, o+numQuad, m_dx[0]);
979 }
980 }
981 } // end of parallel section
982
983 } else {
984 stringstream msg;
985 msg << "setToSize: invalid function space type "
986 << out.getFunctionSpace().getTypeCode();
987 throw RipleyException(msg.str());
988 }
989 }
990
991 void Rectangle::Print_Mesh_Info(const bool full) const
992 {
993 RipleyDomain::Print_Mesh_Info(full);
994 if (full) {
995 cout << " Id Coordinates" << endl;
996 cout.precision(15);
997 cout.setf(ios::scientific, ios::floatfield);
998 for (index_t i=0; i < getNumNodes(); i++) {
999 cout << " " << setw(5) << m_nodeId[i]
1000 << " " << getLocalCoordinate(i%m_NN[0], 0)
1001 << " " << getLocalCoordinate(i/m_NN[0], 1) << endl;
1002 }
1003 }
1004 }
1005
1006
1007 //protected
1008 void Rectangle::assembleCoordinates(escript::Data& arg) const
1009 {
1010 escriptDataC x = arg.getDataC();
1011 int numDim = m_numDim;
1012 if (!isDataPointShapeEqual(&x, 1, &numDim))
1013 throw RipleyException("setToX: Invalid Data object shape");
1014 if (!numSamplesEqual(&x, 1, getNumNodes()))
1015 throw RipleyException("setToX: Illegal number of samples in Data object");
1016
1017 arg.requireWrite();
1018 #pragma omp parallel for
1019 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1020 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1021 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1);
1022 point[0] = getLocalCoordinate(i0, 0);
1023 point[1] = getLocalCoordinate(i1, 1);
1024 }
1025 }
1026 }
1027
1028 //protected
1029 void Rectangle::assembleGradient(escript::Data& out, const escript::Data& in) const
1030 {
1031 const dim_t numComp = in.getDataPointSize();
1032 const double cx0 = .21132486540518711775/m_dx[0];
1033 const double cx1 = .78867513459481288225/m_dx[0];
1034 const double cx2 = 1./m_dx[0];
1035 const double cy0 = .21132486540518711775/m_dx[1];
1036 const double cy1 = .78867513459481288225/m_dx[1];
1037 const double cy2 = 1./m_dx[1];
1038
1039 if (out.getFunctionSpace().getTypeCode() == Elements) {
1040 out.requireWrite();
1041 #pragma omp parallel
1042 {
1043 vector<double> f_00(numComp);
1044 vector<double> f_01(numComp);
1045 vector<double> f_10(numComp);
1046 vector<double> f_11(numComp);
1047 #pragma omp for
1048 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1049 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1050 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1051 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1052 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1053 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1054 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1055 for (index_t i=0; i < numComp; ++i) {
1056 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1057 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1058 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1059 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1060 o[INDEX3(i,0,2,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1061 o[INDEX3(i,1,2,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1062 o[INDEX3(i,0,3,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1063 o[INDEX3(i,1,3,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1064 } // end of component loop i
1065 } // end of k0 loop
1066 } // end of k1 loop
1067 } // end of parallel section
1068 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1069 out.requireWrite();
1070 #pragma omp parallel
1071 {
1072 vector<double> f_00(numComp);
1073 vector<double> f_01(numComp);
1074 vector<double> f_10(numComp);
1075 vector<double> f_11(numComp);
1076 #pragma omp for
1077 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1078 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1079 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1080 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1081 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1082 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1083 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1084 for (index_t i=0; i < numComp; ++i) {
1085 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1086 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1087 } // end of component loop i
1088 } // end of k0 loop
1089 } // end of k1 loop
1090 } // end of parallel section
1091 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1092 out.requireWrite();
1093 #pragma omp parallel
1094 {
1095 vector<double> f_00(numComp);
1096 vector<double> f_01(numComp);
1097 vector<double> f_10(numComp);
1098 vector<double> f_11(numComp);
1099 if (m_faceOffset[0] > -1) {
1100 #pragma omp for nowait
1101 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1102 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1103 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1104 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
1105 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
1106 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1107 for (index_t i=0; i < numComp; ++i) {
1108 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1109 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1110 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1111 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1112 } // end of component loop i
1113 } // end of k1 loop
1114 } // end of face 0
1115 if (m_faceOffset[1] > -1) {
1116 #pragma omp for nowait
1117 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1118 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
1119 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
1120 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1121 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1122 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1123 for (index_t i=0; i < numComp; ++i) {
1124 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1125 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1126 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1127 o[INDEX3(i,1,1,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1128 } // end of component loop i
1129 } // end of k1 loop
1130 } // end of face 1
1131 if (m_faceOffset[2] > -1) {
1132 #pragma omp for nowait
1133 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1134 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1135 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1136 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1137 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1138 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1139 for (index_t i=0; i < numComp; ++i) {
1140 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1141 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1142 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1143 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1144 } // end of component loop i
1145 } // end of k0 loop
1146 } // end of face 2
1147 if (m_faceOffset[3] > -1) {
1148 #pragma omp for nowait
1149 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1150 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1151 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1152 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1153 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1154 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1155 for (index_t i=0; i < numComp; ++i) {
1156 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1157 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1158 o[INDEX3(i,0,1,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1159 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1160 } // end of component loop i
1161 } // end of k0 loop
1162 } // end of face 3
1163 } // end of parallel section
1164
1165 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1166 out.requireWrite();
1167 #pragma omp parallel
1168 {
1169 vector<double> f_00(numComp);
1170 vector<double> f_01(numComp);
1171 vector<double> f_10(numComp);
1172 vector<double> f_11(numComp);
1173 if (m_faceOffset[0] > -1) {
1174 #pragma omp for nowait
1175 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1176 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1177 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1178 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
1179 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
1180 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1181 for (index_t i=0; i < numComp; ++i) {
1182 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1183 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1184 } // end of component loop i
1185 } // end of k1 loop
1186 } // end of face 0
1187 if (m_faceOffset[1] > -1) {
1188 #pragma omp for nowait
1189 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1190 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
1191 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
1192 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1193 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1194 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1195 for (index_t i=0; i < numComp; ++i) {
1196 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1197 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1198 } // end of component loop i
1199 } // end of k1 loop
1200 } // end of face 1
1201 if (m_faceOffset[2] > -1) {
1202 #pragma omp for nowait
1203 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1204 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1205 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1206 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1207 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1208 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1209 for (index_t i=0; i < numComp; ++i) {
1210 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1211 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1212 } // end of component loop i
1213 } // end of k0 loop
1214 } // end of face 2
1215 if (m_faceOffset[3] > -1) {
1216 #pragma omp for nowait
1217 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1218 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1219 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1220 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1221 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1222 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1223 for (index_t i=0; i < numComp; ++i) {
1224 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1225 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1226 } // end of component loop i
1227 } // end of k0 loop
1228 } // end of face 3
1229 } // end of parallel section
1230 }
1231 }
1232
1233 //protected
1234 void Rectangle::assembleIntegrate(vector<double>& integrals,
1235 const escript::Data& arg) const
1236 {
1237 const dim_t numComp = arg.getDataPointSize();
1238 const index_t left = (m_offset[0]==0 ? 0 : 1);
1239 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1240 const int fs=arg.getFunctionSpace().getTypeCode();
1241 if (fs == Elements && arg.actsExpanded()) {
1242 #pragma omp parallel
1243 {
1244 vector<double> int_local(numComp, 0);
1245 const double w = m_dx[0]*m_dx[1]/4.;
1246 #pragma omp for nowait
1247 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1248 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1249 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1250 for (index_t i=0; i < numComp; ++i) {
1251 const double f0 = f[INDEX2(i,0,numComp)];
1252 const double f1 = f[INDEX2(i,1,numComp)];
1253 const double f2 = f[INDEX2(i,2,numComp)];
1254 const double f3 = f[INDEX2(i,3,numComp)];
1255 int_local[i]+=(f0+f1+f2+f3)*w;
1256 } // end of component loop i
1257 } // end of k0 loop
1258 } // end of k1 loop
1259 #pragma omp critical
1260 for (index_t i=0; i<numComp; i++)
1261 integrals[i]+=int_local[i];
1262 } // end of parallel section
1263
1264 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1265 const double w = m_dx[0]*m_dx[1];
1266 #pragma omp parallel
1267 {
1268 vector<double> int_local(numComp, 0);
1269 #pragma omp for nowait
1270 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1271 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1272 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1273 for (index_t i=0; i < numComp; ++i) {
1274 int_local[i]+=f[i]*w;
1275 }
1276 }
1277 }
1278 #pragma omp critical
1279 for (index_t i=0; i<numComp; i++)
1280 integrals[i]+=int_local[i];
1281 } // end of parallel section
1282
1283 } else if (fs == FaceElements && arg.actsExpanded()) {
1284 #pragma omp parallel
1285 {
1286 vector<double> int_local(numComp, 0);
1287 const double w0 = m_dx[0]/2.;
1288 const double w1 = m_dx[1]/2.;
1289 if (m_faceOffset[0] > -1) {
1290 #pragma omp for nowait
1291 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1292 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1293 for (index_t i=0; i < numComp; ++i) {
1294 const double f0 = f[INDEX2(i,0,numComp)];
1295 const double f1 = f[INDEX2(i,1,numComp)];
1296 int_local[i]+=(f0+f1)*w1;
1297 } // end of component loop i
1298 } // end of k1 loop
1299 }
1300
1301 if (m_faceOffset[1] > -1) {
1302 #pragma omp for nowait
1303 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1304 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1305 for (index_t i=0; i < numComp; ++i) {
1306 const double f0 = f[INDEX2(i,0,numComp)];
1307 const double f1 = f[INDEX2(i,1,numComp)];
1308 int_local[i]+=(f0+f1)*w1;
1309 } // end of component loop i
1310 } // end of k1 loop
1311 }
1312
1313 if (m_faceOffset[2] > -1) {
1314 #pragma omp for nowait
1315 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1316 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1317 for (index_t i=0; i < numComp; ++i) {
1318 const double f0 = f[INDEX2(i,0,numComp)];
1319 const double f1 = f[INDEX2(i,1,numComp)];
1320 int_local[i]+=(f0+f1)*w0;
1321 } // end of component loop i
1322 } // end of k0 loop
1323 }
1324
1325 if (m_faceOffset[3] > -1) {
1326 #pragma omp for nowait
1327 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1328 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1329 for (index_t i=0; i < numComp; ++i) {
1330 const double f0 = f[INDEX2(i,0,numComp)];
1331 const double f1 = f[INDEX2(i,1,numComp)];
1332 int_local[i]+=(f0+f1)*w0;
1333 } // end of component loop i
1334 } // end of k0 loop
1335 }
1336 #pragma omp critical
1337 for (index_t i=0; i<numComp; i++)
1338 integrals[i]+=int_local[i];
1339 } // end of parallel section
1340
1341 } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded())) {
1342 #pragma omp parallel
1343 {
1344 vector<double> int_local(numComp, 0);
1345 if (m_faceOffset[0] > -1) {
1346 #pragma omp for nowait
1347 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1348 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1349 for (index_t i=0; i < numComp; ++i) {
1350 int_local[i]+=f[i]*m_dx[1];
1351 }
1352 }
1353 }
1354
1355 if (m_faceOffset[1] > -1) {
1356 #pragma omp for nowait
1357 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1358 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1359 for (index_t i=0; i < numComp; ++i) {
1360 int_local[i]+=f[i]*m_dx[1];
1361 }
1362 }
1363 }
1364
1365 if (m_faceOffset[2] > -1) {
1366 #pragma omp for nowait
1367 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1368 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1369 for (index_t i=0; i < numComp; ++i) {
1370 int_local[i]+=f[i]*m_dx[0];
1371 }
1372 }
1373 }
1374
1375 if (m_faceOffset[3] > -1) {
1376 #pragma omp for nowait
1377 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1378 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1379 for (index_t i=0; i < numComp; ++i) {
1380 int_local[i]+=f[i]*m_dx[0];
1381 }
1382 }
1383 }
1384
1385 #pragma omp critical
1386 for (index_t i=0; i<numComp; i++)
1387 integrals[i]+=int_local[i];
1388 } // end of parallel section
1389 } // function space selector
1390 }
1391
1392 //protected
1393 dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const
1394 {
1395 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1396 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1397 const int x=node%nDOF0;
1398 const int y=node/nDOF0;
1399 dim_t num=0;
1400 // loop through potential neighbours and add to index if positions are
1401 // within bounds
1402 for (int i1=-1; i1<2; i1++) {
1403 for (int i0=-1; i0<2; i0++) {
1404 // skip node itself
1405 if (i0==0 && i1==0)
1406 continue;
1407 // location of neighbour node
1408 const int nx=x+i0;
1409 const int ny=y+i1;
1410 if (nx>=0 && ny>=0 && nx<nDOF0 && ny<nDOF1) {
1411 index.push_back(ny*nDOF0+nx);
1412 num++;
1413 }
1414 }
1415 }
1416
1417 return num;
1418 }
1419
1420 //protected
1421 void Rectangle::nodesToDOF(escript::Data& out, const escript::Data& in) const
1422 {
1423 const dim_t numComp = in.getDataPointSize();
1424 out.requireWrite();
1425
1426 const index_t left = (m_offset[0]==0 ? 0 : 1);
1427 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1428 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1429 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1430 #pragma omp parallel for
1431 for (index_t i=0; i<nDOF1; i++) {
1432 for (index_t j=0; j<nDOF0; j++) {
1433 const index_t n=j+left+(i+bottom)*m_NN[0];
1434 const double* src=in.getSampleDataRO(n);
1435 copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0));
1436 }
1437 }
1438 }
1439
1440 //protected
1441 void Rectangle::dofToNodes(escript::Data& out, const escript::Data& in) const
1442 {
1443 const dim_t numComp = in.getDataPointSize();
1444 Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp);
1445 // expand data object if necessary to be able to grab the whole data
1446 const_cast<escript::Data*>(&in)->expand();
1447 Paso_Coupler_startCollect(coupler, in.getSampleDataRO(0));
1448
1449 const dim_t numDOF = getNumDOF();
1450 out.requireWrite();
1451 const double* buffer = Paso_Coupler_finishCollect(coupler);
1452
1453 #pragma omp parallel for
1454 for (index_t i=0; i<getNumNodes(); i++) {
1455 const double* src=(m_dofMap[i]<numDOF ?
1456 in.getSampleDataRO(m_dofMap[i])
1457 : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1458 copy(src, src+numComp, out.getSampleDataRW(i));
1459 }
1460 Paso_Coupler_free(coupler);
1461 }
1462
1463 //private
1464 void Rectangle::populateSampleIds()
1465 {
1466 // degrees of freedom are numbered from left to right, bottom to top in
1467 // each rank, continuing on the next rank (ranks also go left-right,
1468 // bottom-top).
1469 // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which
1470 // helps when writing out data rank after rank.
1471
1472 // build node distribution vector first.
1473 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is
1474 // constant for all ranks in this implementation
1475 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1476 const dim_t numDOF=getNumDOF();
1477 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1478 m_nodeDistribution[k]=k*numDOF;
1479 }
1480 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1481 m_nodeId.resize(getNumNodes());
1482 m_dofId.resize(numDOF);
1483 m_elementId.resize(getNumElements());
1484
1485 // populate face element counts
1486 //left
1487 if (m_offset[0]==0)
1488 m_faceCount[0]=m_NE[1];
1489 else
1490 m_faceCount[0]=0;
1491 //right
1492 if (m_mpiInfo->rank%m_NX[0]==m_NX[0]-1)
1493 m_faceCount[1]=m_NE[1];
1494 else
1495 m_faceCount[1]=0;
1496 //bottom
1497 if (m_offset[1]==0)
1498 m_faceCount[2]=m_NE[0];
1499 else
1500 m_faceCount[2]=0;
1501 //top
1502 if (m_mpiInfo->rank/m_NX[0]==m_NX[1]-1)
1503 m_faceCount[3]=m_NE[0];
1504 else
1505 m_faceCount[3]=0;
1506
1507 m_faceId.resize(getNumFaceElements());
1508
1509 const index_t left = (m_offset[0]==0 ? 0 : 1);
1510 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1511 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1512 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1513
1514 #define globalNodeId(x,y) \
1515 ((m_offset[0]+x)/nDOF0)*nDOF0*nDOF1+(m_offset[0]+x)%nDOF0 \
1516 + ((m_offset[1]+y)/nDOF1)*nDOF0*nDOF1*m_NX[0]+((m_offset[1]+y)%nDOF1)*nDOF0
1517
1518 // set corner id's outside the parallel region
1519 m_nodeId[0] = globalNodeId(0, 0);
1520 m_nodeId[m_NN[0]-1] = globalNodeId(m_NN[0]-1, 0);
1521 m_nodeId[m_NN[0]*(m_NN[1]-1)] = globalNodeId(0, m_NN[1]-1);
1522 m_nodeId[m_NN[0]*m_NN[1]-1] = globalNodeId(m_NN[0]-1,m_NN[1]-1);
1523 #undef globalNodeId
1524
1525 #pragma omp parallel
1526 {
1527 // populate degrees of freedom and own nodes (identical id)
1528 #pragma omp for nowait
1529 for (dim_t i=0; i<nDOF1; i++) {
1530 for (dim_t j=0; j<nDOF0; j++) {
1531 const index_t nodeIdx=j+left+(i+bottom)*m_NN[0];
1532 const index_t dofIdx=j+i*nDOF0;
1533 m_dofId[dofIdx] = m_nodeId[nodeIdx]
1534 = m_nodeDistribution[m_mpiInfo->rank]+dofIdx;
1535 }
1536 }
1537
1538 // populate the rest of the nodes (shared with other ranks)
1539 if (m_faceCount[0]==0) { // left column
1540 #pragma omp for nowait
1541 for (dim_t i=0; i<nDOF1; i++) {
1542 const index_t nodeIdx=(i+bottom)*m_NN[0];
1543 const index_t dofId=(i+1)*nDOF0-1;
1544 m_nodeId[nodeIdx]
1545 = m_nodeDistribution[m_mpiInfo->rank-1]+dofId;
1546 }
1547 }
1548 if (m_faceCount[1]==0) { // right column
1549 #pragma omp for nowait
1550 for (dim_t i=0; i<nDOF1; i++) {
1551 const index_t nodeIdx=(i+bottom+1)*m_NN[0]-1;
1552 const index_t dofId=i*nDOF0;
1553 m_nodeId[nodeIdx]
1554 = m_nodeDistribution[m_mpiInfo->rank+1]+dofId;
1555 }
1556 }
1557 if (m_faceCount[2]==0) { // bottom row
1558 #pragma omp for nowait
1559 for (dim_t i=0; i<nDOF0; i++) {
1560 const index_t nodeIdx=i+left;
1561 const index_t dofId=nDOF0*(nDOF1-1)+i;
1562 m_nodeId[nodeIdx]
1563 = m_nodeDistribution[m_mpiInfo->rank-m_NX[0]]+dofId;
1564 }
1565 }
1566 if (m_faceCount[3]==0) { // top row
1567 #pragma omp for nowait
1568 for (dim_t i=0; i<nDOF0; i++) {
1569 const index_t nodeIdx=m_NN[0]*(m_NN[1]-1)+i+left;
1570 const index_t dofId=i;
1571 m_nodeId[nodeIdx]
1572 = m_nodeDistribution[m_mpiInfo->rank+m_NX[0]]+dofId;
1573 }
1574 }
1575
1576 // populate element id's
1577 #pragma omp for nowait
1578 for (dim_t i1=0; i1<m_NE[1]; i1++) {
1579 for (dim_t i0=0; i0<m_NE[0]; i0++) {
1580 m_elementId[i0+i1*m_NE[0]]=(m_offset[1]+i1)*m_gNE[0]+m_offset[0]+i0;
1581 }
1582 }
1583
1584 // face elements
1585 #pragma omp for
1586 for (dim_t k=0; k<getNumFaceElements(); k++)
1587 m_faceId[k]=k;
1588 } // end parallel section
1589
1590 m_nodeTags.assign(getNumNodes(), 0);
1591 updateTagsInUse(Nodes);
1592
1593 m_elementTags.assign(getNumElements(), 0);
1594 updateTagsInUse(Elements);
1595
1596 // generate face offset vector and set face tags
1597 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1598 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1599 m_faceOffset.assign(4, -1);
1600 m_faceTags.clear();
1601 index_t offset=0;
1602 for (size_t i=0; i<4; i++) {
1603 if (m_faceCount[i]>0) {
1604 m_faceOffset[i]=offset;
1605 offset+=m_faceCount[i];
1606 m_faceTags.insert(m_faceTags.end(), m_faceCount[i], faceTag[i]);
1607 }
1608 }
1609 setTagMap("left", LEFT);
1610 setTagMap("right", RIGHT);
1611 setTagMap("bottom", BOTTOM);
1612 setTagMap("top", TOP);
1613 updateTagsInUse(FaceElements);
1614 }
1615
1616 //private
1617 void Rectangle::createPattern()
1618 {
1619 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1620 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1621 const index_t left = (m_offset[0]==0 ? 0 : 1);
1622 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1623
1624 // populate node->DOF mapping with own degrees of freedom.
1625 // The rest is assigned in the loop further down
1626 m_dofMap.assign(getNumNodes(), 0);
1627 #pragma omp parallel for
1628 for (index_t i=bottom; i<bottom+nDOF1; i++) {
1629 for (index_t j=left; j<left+nDOF0; j++) {
1630 m_dofMap[i*m_NN[0]+j]=(i-bottom)*nDOF0+j-left;
1631 }
1632 }
1633
1634 // build list of shared components and neighbours by looping through
1635 // all potential neighbouring ranks and checking if positions are
1636 // within bounds
1637 const dim_t numDOF=nDOF0*nDOF1;
1638 vector<IndexVector> colIndices(numDOF); // for the couple blocks
1639 RankVector neighbour;
1640 IndexVector offsetInShared(1,0);
1641 IndexVector sendShared, recvShared;
1642 int numShared=0, expectedShared=0;
1643 const int x=m_mpiInfo->rank%m_NX[0];
1644 const int y=m_mpiInfo->rank/m_NX[0];
1645 if (x > 0)
1646 expectedShared += nDOF1;
1647 if (x < m_NX[0] - 1)
1648 expectedShared += nDOF1;
1649 if (y > 0)
1650 expectedShared += nDOF0;
1651 if (y < m_NX[1] - 1)
1652 expectedShared += nDOF0;
1653 if (x > 0 && y > 0) expectedShared++;
1654 if (x > 0 && y < m_NX[1] - 1) expectedShared++;
1655 if (x < m_NX[0] - 1 && y > 0) expectedShared++;
1656 if (x < m_NX[0] - 1 && y < m_NX[1] - 1) expectedShared++;
1657
1658 vector<IndexVector> rowIndices(expectedShared);
1659
1660 for (int i1=-1; i1<2; i1++) {
1661 for (int i0=-1; i0<2; i0++) {
1662 // skip this rank
1663 if (i0==0 && i1==0)
1664 continue;
1665 // location of neighbour rank
1666 const int nx=x+i0;
1667 const int ny=y+i1;
1668 if (nx>=0 && ny>=0 && nx<m_NX[0] && ny<m_NX[1]) {
1669 neighbour.push_back(ny*m_NX[0]+nx);
1670 if (i0==0) {
1671 // sharing top or bottom edge
1672 const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0);
1673 const int firstNode=(i1==-1 ? left : m_NN[0]*(m_NN[1]-1)+left);
1674 offsetInShared.push_back(offsetInShared.back()+nDOF0);
1675 for (dim_t i=0; i<nDOF0; i++, numShared++) {
1676 sendShared.push_back(firstDOF+i);
1677 recvShared.push_back(numDOF+numShared);
1678 if (i>0)
1679 doublyLink(colIndices, rowIndices, firstDOF+i-1, numShared);
1680 doublyLink(colIndices, rowIndices, firstDOF+i, numShared);
1681 if (i<nDOF0-1)
1682 doublyLink(colIndices, rowIndices, firstDOF+i+1, numShared);
1683 m_dofMap[firstNode+i]=numDOF+numShared;
1684 }
1685 } else if (i1==0) {
1686 // sharing left or right edge
1687 const int firstDOF=(i0==-1 ? 0 : nDOF0-1);
1688 const int firstNode=(i0==-1 ? bottom*m_NN[0] : (bottom+1)*m_NN[0]-1);
1689 offsetInShared.push_back(offsetInShared.back()+nDOF1);
1690 for (dim_t i=0; i<nDOF1; i++, numShared++) {
1691 sendShared.push_back(firstDOF+i*nDOF0);
1692 recvShared.push_back(numDOF+numShared);
1693 if (i>0)
1694 doublyLink(colIndices, rowIndices, firstDOF+(i-1)*nDOF0, numShared);
1695 doublyLink(colIndices, rowIndices, firstDOF+i*nDOF0, numShared);
1696 if (i<nDOF1-1)
1697 doublyLink(colIndices, rowIndices, firstDOF+(i+1)*nDOF0, numShared);
1698 m_dofMap[firstNode+i*m_NN[0]]=numDOF+numShared;
1699 }
1700 } else {
1701 // sharing a node
1702 const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0);
1703 const int node=(i0+1)/2*(m_NN[0]-1)+(i1+1)/2*m_NN[0]*(m_NN[1]-1);
1704 offsetInShared.push_back(offsetInShared.back()+1);
1705 sendShared.push_back(dof);
1706 recvShared.push_back(numDOF+numShared);
1707 doublyLink(colIndices, rowIndices, dof, numShared);
1708 m_dofMap[node]=numDOF+numShared;
1709 ++numShared;
1710 }
1711 }
1712 }
1713 }
1714
1715 #pragma omp parallel for
1716 for (int i = 0; i < numShared; i++) {
1717 std::sort(rowIndices[i].begin(), rowIndices[i].end());
1718 }
1719
1720 // create connector
1721 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
1722 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
1723 &offsetInShared[0], 1, 0, m_mpiInfo);
1724 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
1725 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
1726 &offsetInShared[0], 1, 0, m_mpiInfo);
1727 m_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
1728 Paso_SharedComponents_free(snd_shcomp);
1729 Paso_SharedComponents_free(rcv_shcomp);
1730
1731 // create main and couple blocks
1732 Paso_Pattern *mainPattern = createMainPattern();
1733 Paso_Pattern *colPattern, *rowPattern;
1734 createCouplePatterns(colIndices, rowIndices, numShared, &colPattern, &rowPattern);
1735
1736 // allocate paso distribution
1737 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
1738 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
1739
1740 // finally create the system matrix
1741 m_pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT,
1742 distribution, distribution, mainPattern, colPattern, rowPattern,
1743 m_connector, m_connector);
1744
1745 Paso_Distribution_free(distribution);
1746
1747 // useful debug output
1748 /*
1749 cout << "--- rcv_shcomp ---" << endl;
1750 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
1751 for (size_t i=0; i<neighbour.size(); i++) {
1752 cout << "neighbor[" << i << "]=" << neighbour[i]
1753 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
1754 }
1755 for (size_t i=0; i<recvShared.size(); i++) {
1756 cout << "shared[" << i << "]=" << recvShared[i] << endl;
1757 }
1758 cout << "--- snd_shcomp ---" << endl;
1759 for (size_t i=0; i<sendShared.size(); i++) {
1760 cout << "shared[" << i << "]=" << sendShared[i] << endl;
1761 }
1762 cout << "--- dofMap ---" << endl;
1763 for (size_t i=0; i<m_dofMap.size(); i++) {
1764 cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl;
1765 }
1766 cout << "--- colIndices ---" << endl;
1767 for (size_t i=0; i<colIndices.size(); i++) {
1768 cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl;
1769 }
1770 */
1771
1772 /*
1773 cout << "--- main_pattern ---" << endl;
1774 cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl;
1775 for (size_t i=0; i<mainPattern->numOutput+1; i++) {
1776 cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl;
1777 }
1778 for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) {
1779 cout << "index[" << i << "]=" << mainPattern->index[i] << endl;
1780 }
1781 */
1782
1783 /*
1784 cout << "--- colCouple_pattern ---" << endl;
1785 cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl;
1786 for (size_t i=0; i<colPattern->numOutput+1; i++) {
1787 cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl;
1788 }
1789 for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) {
1790 cout << "index[" << i << "]=" << colPattern->index[i] << endl;
1791 }
1792 */
1793
1794 /*
1795 cout << "--- rowCouple_pattern ---" << endl;
1796 cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl;
1797 for (size_t i=0; i<rowPattern->numOutput+1; i++) {
1798 cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl;
1799 }
1800 for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) {
1801 cout << "index[" << i << "]=" << rowPattern->index[i] << endl;
1802 }
1803 */
1804
1805 Paso_Pattern_free(mainPattern);
1806 Paso_Pattern_free(colPattern);
1807 Paso_Pattern_free(rowPattern);
1808 }
1809
1810 //private
1811 void Rectangle::addToMatrixAndRHS(Paso_SystemMatrix* S, escript::Data& F,
1812 const vector<double>& EM_S, const vector<double>& EM_F, bool addS,
1813 bool addF, index_t firstNode, dim_t nEq, dim_t nComp) const
1814 {
1815 IndexVector rowIndex;
1816 rowIndex.push_back(m_dofMap[firstNode]);
1817 rowIndex.push_back(m_dofMap[firstNode+1]);
1818 rowIndex.push_back(m_dofMap[firstNode+m_NN[0]]);
1819 rowIndex.push_back(m_dofMap[firstNode+m_NN[0]+1]);
1820 if (addF) {
1821 double *F_p=F.getSampleDataRW(0);
1822 for (index_t i=0; i<rowIndex.size(); i++) {
1823 if (rowIndex[i]<getNumDOF()) {
1824 for (index_t eq=0; eq<nEq; eq++) {
1825 F_p[INDEX2(eq, rowIndex[i], nEq)]+=EM_F[INDEX2(eq,i,nEq)];
1826 }
1827 }
1828 }
1829 }
1830 if (addS) {
1831 addToSystemMatrix(S, rowIndex, nEq, rowIndex, nComp, EM_S);
1832 }
1833 }
1834
1835 //protected
1836 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1837 const escript::Data& in,
1838 bool reduced) const
1839 {
1840 const dim_t numComp = in.getDataPointSize();
1841 if (reduced) {
1842 out.requireWrite();
1843 const double c0 = 0.25;
1844 #pragma omp parallel
1845 {
1846 vector<double> f_00(numComp);
1847 vector<double> f_01(numComp);
1848 vector<double> f_10(numComp);
1849 vector<double> f_11(numComp);
1850 #pragma omp for
1851 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1852 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1853 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1854 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1855 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1856 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1857 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1858 for (index_t i=0; i < numComp; ++i) {
1859 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1860 } /* end of component loop i */
1861 } /* end of k0 loop */
1862 } /* end of k1 loop */
1863 } /* end of parallel section */
1864 } else {
1865 out.requireWrite();
1866 const double c0 = 0.16666666666666666667;
1867 const double c1 = 0.044658198738520451079;
1868 const double c2 = 0.62200846792814621559;
1869 #pragma omp parallel
1870 {
1871 vector<double> f_00(numComp);
1872 vector<double> f_01(numComp);
1873 vector<double> f_10(numComp);
1874 vector<double> f_11(numComp);
1875 #pragma omp for
1876 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1877 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1878 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1879 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1880 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1881 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1882 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1883 for (index_t i=0; i < numComp; ++i) {
1884 o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_10[i]) + c1*f_11[i] + c2*f_00[i];
1885 o[INDEX2(i,numComp,1)] = c0*(f_00[i] + f_11[i]) + c1*f_01[i] + c2*f_10[i];
1886 o[INDEX2(i,numComp,2)] = c0*(f_00[i] + f_11[i]) + c1*f_10[i] + c2*f_01[i];
1887 o[INDEX2(i,numComp,3)] = c0*(f_01[i] + f_10[i]) + c1*f_00[i] + c2*f_11[i];
1888 } /* end of component loop i */
1889 } /* end of k0 loop */
1890 } /* end of k1 loop */
1891 } /* end of parallel section */
1892 }
1893 }
1894
1895 //protected
1896 void Rectangle::interpolateNodesOnFaces(escript::Data& out,
1897 const escript::Data& in,
1898 bool reduced) const
1899 {
1900 const dim_t numComp = in.getDataPointSize();
1901 if (reduced) {
1902 out.requireWrite();
1903 #pragma omp parallel
1904 {
1905 vector<double> f_00(numComp);
1906 vector<double> f_01(numComp);
1907 vector<double> f_10(numComp);
1908 vector<double> f_11(numComp);
1909 if (m_faceOffset[0] > -1) {
1910 #pragma omp for nowait
1911 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1912 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1913 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1914 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1915 for (index_t i=0; i < numComp; ++i) {
1916 o[INDEX2(i,numComp,0)] = (f_00[i] + f_01[i])/2;
1917 } /* end of component loop i */
1918 } /* end of k1 loop */
1919 } /* end of face 0 */
1920 if (m_faceOffset[1] > -1) {
1921 #pragma omp for nowait
1922 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1923 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1924 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1925 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1926 for (index_t i=0; i < numComp; ++i) {
1927 o[INDEX2(i,numComp,0)] = (f_10[i] + f_11[i])/2;
1928 } /* end of component loop i */
1929 } /* end of k1 loop */
1930 } /* end of face 1 */
1931 if (m_faceOffset[2] > -1) {
1932 #pragma omp for nowait
1933 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1934 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1935 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1936 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1937 for (index_t i=0; i < numComp; ++i) {
1938 o[INDEX2(i,numComp,0)] = (f_00[i] + f_10[i])/2;
1939 } /* end of component loop i */
1940 } /* end of k0 loop */
1941 } /* end of face 2 */
1942 if (m_faceOffset[3] > -1) {
1943 #pragma omp for nowait
1944 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1945 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1946 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1947 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1948 for (index_t i=0; i < numComp; ++i) {
1949 o[INDEX2(i,numComp,0)] = (f_01[i] + f_11[i])/2;
1950 } /* end of component loop i */
1951 } /* end of k0 loop */
1952 } /* end of face 3 */
1953 } /* end of parallel section */
1954 } else {
1955 out.requireWrite();
1956 const double c0 = 0.21132486540518711775;
1957 const double c1 = 0.78867513459481288225;
1958 #pragma omp parallel
1959 {
1960 vector<double> f_00(numComp);
1961 vector<double> f_01(numComp);
1962 vector<double> f_10(numComp);
1963 vector<double> f_11(numComp);
1964 if (m_faceOffset[0] > -1) {
1965 #pragma omp for nowait
1966 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1967 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1968 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1969 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1970 for (index_t i=0; i < numComp; ++i) {
1971 o[INDEX2(i,numComp,0)] = c0*f_01[i] + c1*f_00[i];
1972 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_01[i];
1973 } /* end of component loop i */
1974 } /* end of k1 loop */
1975 } /* end of face 0 */
1976 if (m_faceOffset[1] > -1) {
1977 #pragma omp for nowait
1978 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1979 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1980 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1981 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1982 for (index_t i=0; i < numComp; ++i) {
1983 o[INDEX2(i,numComp,0)] = c1*f_10[i] + c0*f_11[i];
1984 o[INDEX2(i,numComp,1)] = c1*f_11[i] + c0*f_10[i];
1985 } /* end of component loop i */
1986 } /* end of k1 loop */
1987 } /* end of face 1 */
1988 if (m_faceOffset[2] > -1) {
1989 #pragma omp for nowait
1990 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1991 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1992 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1993 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1994 for (index_t i=0; i < numComp; ++i) {
1995 o[INDEX2(i,numComp,0)] = c0*f_10[i] + c1*f_00[i];
1996 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_10[i];
1997 } /* end of component loop i */
1998 } /* end of k0 loop */
1999 } /* end of face 2 */
2000 if (m_faceOffset[3] > -1) {
2001 #pragma omp for nowait
2002 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
2003 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
2004 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
2005 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
2006 for (index_t i=0; i < numComp; ++i) {
2007 o[INDEX2(i,numComp,0)] = c0*f_11[i] + c1*f_01[i];
2008 o[INDEX2(i,numComp,1)] = c0*f_01[i] + c1*f_11[i];
2009 } /* end of component loop i */
2010 } /* end of k0 loop */
2011 } /* end of face 3 */
2012 } /* end of parallel section */
2013 }
2014 }
2015
2016
2017
2018 namespace
2019 {
2020 // Calculates a guassian blur colvolution matrix for 2D
2021 // See wiki article on the subject
2022 double* get2DGauss(unsigned radius, double sigma)
2023 {
2024 double* arr=new double[(radius*2+1)*(radius*2+1)];
2025 double common=M_1_PI*0.5*1/(sigma*sigma);
2026 double total=0;
2027 int r=static_cast<int>(radius);
2028 for (int y=-r;y<=r;++y)
2029 {
2030 for (int x=-r;x<=r;++x)
2031 {
2032 arr[(x+r)+(y+r)*(r*2+1)]=common*exp(-(x*x+y*y)/(2*sigma*sigma));
2033 total+=arr[(x+r)+(y+r)*(r*2+1)];
2034 }
2035 }
2036 double invtotal=1/total;
2037 for (size_t p=0;p<(radius*2+1)*(radius*2+1);++p)
2038 {
2039 arr[p]*=invtotal;
2040 }
2041 return arr;
2042 }
2043
2044 // applies conv to source to get a point.
2045 // (xp, yp) are the coords in the source matrix not the destination matrix
2046 double Convolve2D(double* conv, double* source, size_t xp, size_t yp, unsigned radius, size_t width)
2047 {
2048 size_t bx=xp-radius, by=yp-radius;
2049 size_t sbase=bx+by*width;
2050 double result=0;
2051 for (int y=0;y<2*radius+1;++y)
2052 {
2053 for (int x=0;x<2*radius+1;++x)
2054 {
2055 result+=conv[x+y*(2*radius+1)] * source[sbase + x+y*width];
2056 }
2057 }
2058 return result;
2059 }
2060 }
2061
2062
2063 /* This is a wrapper for filtered (and non-filtered) randoms
2064 * For detailed doco see randomFillWorker
2065 */
2066 escript::Data Rectangle::randomFill(const escript::DataTypes::ShapeType& shape,
2067 const escript::FunctionSpace& what,
2068 long seed, const boost::python::tuple& filter) const
2069 {
2070 int numvals=escript::DataTypes::noValues(shape);
2071 if (len(filter)>0 && (numvals!=1))
2072 {
2073 throw RipleyException("Ripley only supports filters for scalar data.");
2074 }
2075 escript::Data res=randomFillWorker(shape, seed, filter);
2076 if (res.getFunctionSpace()!=what)
2077 {
2078 escript::Data r=escript::Data(res, what);
2079 return r;
2080 }
2081 return res;
2082 }
2083
2084
2085 /* This routine produces a Data object filled with smoothed random data.
2086 The dimensions of the rectangle being filled are internal[0] x internal[1] points.
2087 A parameter radius gives the size of the stencil used for the smoothing.
2088 A point on the left hand edge for example, will still require `radius` extra points to the left
2089 in order to complete the stencil.
2090
2091 All local calculation is done on an array called `src`, with
2092 dimensions = ext[0] * ext[1].
2093 Where ext[i]= internal[i]+2*radius.
2094
2095 Now for MPI there is overlap to deal with. We need to share both the overlapping
2096 values themselves but also the external region.
2097
2098 In a hypothetical 1-D case:
2099
2100
2101 1234567
2102 would be split into two ranks thus:
2103 123(4) (4)567 [4 being a shared element]
2104
2105 If the radius is 2. There will be padding elements on the outside:
2106
2107 pp123(4) (4)567pp
2108
2109 To ensure that 4 can be correctly computed on both ranks, values from the other rank
2110 need to be known.
2111
2112 pp123(4)56 23(4)567pp
2113
2114 Now in our case, we wout set all the values 23456 on the left rank and send them to the
2115 right hand rank.
2116
2117 So the edges _may_ need to be shared at a distance `inset` from all boundaries.
2118 inset=2*radius+1
2119 This is to ensure that values at distance `radius` from the shared/overlapped element
2120 that ripley has.
2121
2122
2123 */
2124 escript::Data Rectangle::randomFillWorker(const escript::DataTypes::ShapeType& shape,
2125 long seed, const boost::python::tuple& filter) const
2126 {
2127 if (m_numDim!=2)
2128 {
2129 throw RipleyException("Only 2D supported at this time.");
2130 }
2131
2132 unsigned int radius=0; // these are only used by gaussian
2133 double sigma=0.5;
2134
2135 unsigned int numvals=escript::DataTypes::noValues(shape);
2136
2137
2138 if (len(filter)==0)
2139 {
2140 // nothing special required here yet
2141 }
2142 else if (len(filter)==3)
2143 {
2144 boost::python::extract<string> ex(filter[0]);
2145 if (!ex.check() || (ex()!="gaussian"))
2146 {
2147 throw RipleyException("Unsupported random filter");
2148 }
2149 boost::python::extract<unsigned int> ex1(filter[1]);
2150 if (!ex1.check())
2151 {
2152 throw RipleyException("Radius of gaussian filter must be a positive integer.");
2153 }
2154 radius=ex1();
2155 sigma=0.5;
2156 boost::python::extract<double> ex2(filter[2]);
2157 if (!ex2.check() || (sigma=ex2())<=0)
2158 {
2159 throw RipleyException("Sigma must be a postive floating point number.");
2160 }
2161 }
2162 else
2163 {
2164 throw RipleyException("Unsupported random filter for Rectangle.");
2165 }
2166
2167
2168
2169 size_t internal[2];
2170 internal[0]=m_NE[0]+1; // number of points in the internal region
2171 internal[1]=m_NE[1]+1; // that is, the ones we need smoothed versions of
2172 size_t ext[2];
2173 ext[0]=internal[0]+2*radius; // includes points we need as input
2174 ext[1]=internal[1]+2*radius; // for smoothing
2175
2176 // now we check to see if the radius is acceptable
2177 // That is, would not cross multiple ranks in MPI
2178
2179 if (2*radius>=internal[0]-4)
2180 {
2181 throw RipleyException("Radius of gaussian filter is too large for X dimension of a rank");
2182 }
2183 if (2*radius>=internal[1]-4)
2184 {
2185 throw RipleyException("Radius of gaussian filter is too large for Y dimension of a rank");
2186 }
2187
2188 double* src=new double[ext[0]*ext[1]*numvals];
2189 esysUtils::randomFillArray(seed, src, ext[0]*ext[1]*numvals);
2190
2191
2192
2193 #ifdef ESYS_MPI
2194 if ((internal[0]<5) || (internal[1]<5))
2195 {
2196 // since the dimensions are equal for all ranks, this exception
2197 // will be thrown on all ranks
2198 throw RipleyException("Random Data in Ripley requries at least five elements per side per rank.");
2199 }
2200 dim_t X=m_mpiInfo->rank%m_NX[0];
2201 dim_t Y=m_mpiInfo->rank%(m_NX[0]*m_NX[1])/m_NX[0];
2202 #endif
2203
2204 /*
2205 // if we wanted to test a repeating pattern
2206 size_t basex=0;
2207 size_t basey=0;
2208 #ifdef ESYS_MPI
2209 basex=X*m_gNE[0]/m_NX[0];
2210 basey=Y*m_gNE[1]/m_NX[1];
2211 #endif
2212
2213 esysUtils::patternFillArray2D(ext[0], ext[1], src, 4, basex, basey, numvals);
2214 */
2215
2216
2217 #ifdef ESYS_MPI
2218
2219 BlockGrid2 grid(m_NX[0]-1, m_NX[1]-1);
2220 size_t inset=2*radius+2; // Its +2 not +1 because a whole element is shared (and hence
2221 // there is an overlap of two points both of which need to have "radius" points on either side.
2222
2223 size_t xmidlen=ext[0]-2*inset; // how wide is the x-dimension between the two insets
2224 size_t ymidlen=ext[1]-2*inset;
2225
2226 Block2 block(ext[0], ext[1], inset, xmidlen, ymidlen, numvals);
2227
2228 MPI_Request reqs[40]; // a non-tight upper bound on how many we need
2229 MPI_Status stats[40];
2230 short rused=0;
2231
2232 messvec incoms;
2233 messvec outcoms;
2234
2235 grid.generateInNeighbours(X, Y, incoms);
2236 grid.generateOutNeighbours(X, Y, outcoms);
2237
2238 block.copyAllToBuffer(src);
2239
2240 int comserr=0;
2241 for (size_t i=0;i<incoms.size();++i)
2242 {
2243 message& m=incoms[i];
2244 comserr|=MPI_Irecv(block.getInBuffer(m.destbuffid), block.getBuffSize(m.destbuffid) , MPI_DOUBLE, m.sourceID, m.tag, m_mpiInfo->comm, reqs+(rused++));
2245 block.setUsed(m.destbuffid);
2246 }
2247
2248 for (size_t i=0;i<outcoms.size();++i)
2249 {
2250 message& m=outcoms[i];
2251 comserr|=MPI_Isend(block.getOutBuffer(m.srcbuffid), block.getBuffSize(m.srcbuffid) , MPI_DOUBLE, m.destID, m.tag, m_mpiInfo->comm, reqs+(rused++));
2252 }
2253
2254 if (!comserr)
2255 {
2256 comserr=MPI_Waitall(rused, reqs, stats);
2257 }
2258
2259 if (comserr)
2260 {
2261 // Yes this is throwing an exception as a result of an MPI error.
2262 // and no we don't inform the other ranks that we are doing this.
2263 // however, we have no reason to believe coms work at this point anyway
2264 throw RipleyException("Error in coms for randomFill");
2265 }
2266
2267 block.copyUsedFromBuffer(src);
2268
2269 #endif
2270
2271 if (radius==0 || numvals>1) // the truth of either should imply the truth of the other but let's be safe
2272 {
2273
2274 escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
2275 escript::Data resdat(0, shape, fs , true);
2276 // don't need to check for exwrite because we just made it
2277 escript::DataVector& dv=resdat.getExpandedVectorReference();
2278
2279
2280 // now we need to copy values over
2281 for (size_t y=0;y<(internal[1]);++y)
2282 {
2283 for (size_t x=0;x<(internal[0]);++x)
2284 {
2285 for (unsigned int i=0;i<numvals;++i)
2286 {
2287 dv[i+(x+y*(internal[0]))*numvals]=src[i+(x+y*ext[0])*numvals];
2288 }
2289 }
2290 }
2291 delete[] src;
2292 return resdat;
2293 }
2294 else // filter enabled
2295 {
2296 escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
2297 escript::Data resdat(0, escript::DataTypes::scalarShape, fs , true);
2298 // don't need to check for exwrite because we just made it
2299 escript::DataVector& dv=resdat.getExpandedVectorReference();
2300 double* convolution=get2DGauss(radius, sigma);
2301 for (size_t y=0;y<(internal[1]);++y)
2302 {
2303 for (size_t x=0;x<(internal[0]);++x)
2304 {
2305 dv[x+y*(internal[0])]=Convolve2D(convolution, src, x+radius, y+radius, radius, ext[0]);
2306
2307 }
2308 }
2309 delete[] convolution;
2310 delete[] src;
2311 return resdat;
2312 }
2313 }
2314
2315 int Rectangle::findNode(const double *coords) const {
2316 const int NOT_MINE = -1;
2317 //is the found element even owned by this rank
2318 // (inside owned or shared elements but will map to an owned element)
2319 for (int dim = 0; dim < m_numDim; dim++) {
2320 double min = m_origin[dim] + m_offset[dim]* m_dx[dim]
2321 - m_dx[dim]/2.; //allows for point outside mapping onto node
2322 double max = m_origin[dim] + (m_offset[dim] + m_NE[dim])*m_dx[dim]
2323 + m_dx[dim]/2.;
2324 if (min > coords[dim] || max < coords[dim]) {
2325 return NOT_MINE;
2326 }
2327 }
2328 // get distance from origin
2329 double x = coords[0] - m_origin[0];
2330 double y = coords[1] - m_origin[1];
2331 // distance in elements
2332 int ex = (int) floor(x / m_dx[0]);
2333 int ey = (int) floor(y / m_dx[1]);
2334 // set the min distance high enough to be outside the element plus a bit
2335 int closest = NOT_MINE;
2336 double minDist = 1;
2337 for (int dim = 0; dim < m_numDim; dim++) {
2338 minDist += m_dx[dim]*m_dx[dim];
2339 }
2340 //find the closest node
2341 for (int dx = 0; dx < 1; dx++) {
2342 double xdist = (x - (ex + dx)*m_dx[0]);
2343 for (int dy = 0; dy < 1; dy++) {
2344 double ydist = (y - (ey + dy)*m_dx[1]);
2345 double total = xdist*xdist + ydist*ydist;
2346 if (total < minDist) {
2347 closest = INDEX2(ex+dx-m_offset[0], ey+dy-m_offset[1], m_NE[0] + 1);
2348 minDist = total;
2349 }
2350 }
2351 }
2352 //if this happens, we've let a dirac point slip through, which is awful
2353 if (closest == NOT_MINE) {
2354 throw RipleyException("Unable to map appropriate dirac point to a node,"
2355 " implementation problem in Rectangle::findNode()");
2356 }
2357 return closest;
2358 }
2359
2360 void Rectangle::setAssembler(std::string type, std::map<std::string,
2361 escript::Data> constants) {
2362 if (type.compare("WaveAssembler") == 0) {
2363 if (assembler_type != WAVE_ASSEMBLER && assembler_type != DEFAULT_ASSEMBLER)
2364 throw RipleyException("Domain already using a different custom assembler");
2365 assembler_type = WAVE_ASSEMBLER;
2366 delete assembler;
2367 assembler = new WaveAssembler2D(this, m_dx, m_NX, m_NE, m_NN, constants);
2368 } else if (type.compare("LameAssembler") == 0) {
2369 if (assembler_type != LAME_ASSEMBLER && assembler_type != DEFAULT_ASSEMBLER)
2370 throw RipleyException("Domain already using a different custom assembler");
2371 assembler_type = LAME_ASSEMBLER;
2372 delete assembler;
2373 assembler = new LameAssembler2D(this, m_dx, m_NX, m_NE, m_NN);
2374 } else { //else ifs would go before this for other types
2375 throw RipleyException("Ripley::Rectangle does not support the"
2376 " requested assembler");
2377 }
2378 }
2379
2380 } // end of namespace ripley
2381

  ViewVC Help
Powered by ViewVC 1.1.26