/[escript]/trunk/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /trunk/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4765 - (show annotations)
Wed Mar 19 00:17:16 2014 UTC (5 years, 7 months ago) by sshaw
File size: 93880 byte(s)
added ripley rectangle reading from compressed files, added unit tests for binaryGrid reads and writes (only single ranks for compressed thus far) (#104)
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <algorithm>
18
19 #include <ripley/Rectangle.h>
20 #include <paso/SystemMatrix.h>
21 #include <esysUtils/esysFileWriter.h>
22 #include <ripley/DefaultAssembler2D.h>
23 #include <ripley/WaveAssembler2D.h>
24 #include <ripley/LameAssembler2D.h>
25 #include <ripley/domainhelpers.h>
26 #include <boost/scoped_array.hpp>
27 #include "esysUtils/EsysRandom.h"
28 #include "blocktools.h"
29
30 #ifdef USE_NETCDF
31 #include <netcdfcpp.h>
32 #endif
33
34 #if USE_SILO
35 #include <silo.h>
36 #ifdef ESYS_MPI
37 #include <pmpio.h>
38 #endif
39 #endif
40
41 #include <iomanip>
42
43 using namespace std;
44 using esysUtils::FileWriter;
45
46 namespace ripley {
47
48 Rectangle::Rectangle(int n0, int n1, double x0, double y0, double x1,
49 double y1, int d0, int d1,
50 const std::vector<double>& points,
51 const std::vector<int>& tags,
52 const simap_t& tagnamestonums) :
53 RipleyDomain(2)
54 {
55 // ignore subdivision parameters for serial run
56 if (m_mpiInfo->size == 1) {
57 d0=1;
58 d1=1;
59 }
60
61 bool warn=false;
62 std::vector<int> factors;
63 int ranks = m_mpiInfo->size;
64 int epr[2] = {n0,n1};
65 int d[2] = {d0,d1};
66 if (d0<=0 || d1<=0) {
67 for (int i = 0; i < 2; i++) {
68 if (d[i] < 1) {
69 d[i] = 1;
70 continue;
71 }
72 epr[i] = -1; // can no longer be max
73 //remove
74 if (ranks % d[i] != 0) {
75 throw RipleyException("Invalid number of spatial subdivisions");
76 }
77 ranks /= d[i];
78 }
79 factorise(factors, ranks);
80 if (factors.size() != 0) {
81 warn = true;
82 }
83 }
84
85 while (factors.size() > 0) {
86 int i = epr[0] > epr[1] ? 0 : 1;
87 int f = factors.back();
88 factors.pop_back();
89 d[i] *= f;
90 epr[i] /= f;
91 }
92 d0 = d[0]; d1 = d[1];
93
94 // ensure number of subdivisions is valid and nodes can be distributed
95 // among number of ranks
96 if (d0*d1 != m_mpiInfo->size)
97 throw RipleyException("Invalid number of spatial subdivisions");
98
99 if (warn) {
100 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
101 << d1 << "). This may not be optimal!" << endl;
102 }
103
104 double l0 = x1-x0;
105 double l1 = y1-y0;
106 m_dx[0] = l0/n0;
107 m_dx[1] = l1/n1;
108
109 if ((n0+1)%d0 > 0) {
110 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
111 l0=m_dx[0]*n0;
112 cout << "Warning: Adjusted number of elements and length. N0="
113 << n0 << ", l0=" << l0 << endl;
114 }
115 if ((n1+1)%d1 > 0) {
116 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
117 l1=m_dx[1]*n1;
118 cout << "Warning: Adjusted number of elements and length. N1="
119 << n1 << ", l1=" << l1 << endl;
120 }
121
122 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2))
123 throw RipleyException("Too few elements for the number of ranks");
124
125 m_gNE[0] = n0;
126 m_gNE[1] = n1;
127 m_origin[0] = x0;
128 m_origin[1] = y0;
129 m_length[0] = l0;
130 m_length[1] = l1;
131 m_NX[0] = d0;
132 m_NX[1] = d1;
133
134 // local number of elements (with and without overlap)
135 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
136 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
137 m_NE[0]++;
138 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
139 m_ownNE[0]--;
140
141 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
142 if (m_mpiInfo->rank/d0>0 && m_mpiInfo->rank/d0<d1-1)
143 m_NE[1]++;
144 else if (d1>1 && m_mpiInfo->rank/d0==d1-1)
145 m_ownNE[1]--;
146
147 // local number of nodes
148 m_NN[0] = m_NE[0]+1;
149 m_NN[1] = m_NE[1]+1;
150
151 // bottom-left node is at (offset0,offset1) in global mesh
152 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
153 if (m_offset[0] > 0)
154 m_offset[0]--;
155 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank/d0);
156 if (m_offset[1] > 0)
157 m_offset[1]--;
158
159 populateSampleIds();
160 createPattern();
161 assembler = new DefaultAssembler2D(this, m_dx, m_NX, m_NE, m_NN);
162 for (map<string, int>::const_iterator i = tagnamestonums.begin();
163 i != tagnamestonums.end(); i++) {
164 setTagMap(i->first, i->second);
165 }
166 addPoints(tags.size(), &points[0], &tags[0]);
167 }
168
169 Rectangle::~Rectangle()
170 {
171 Paso_SystemMatrixPattern_free(m_pattern);
172 Paso_Connector_free(m_connector);
173 delete assembler;
174 }
175
176 string Rectangle::getDescription() const
177 {
178 return "ripley::Rectangle";
179 }
180
181 bool Rectangle::operator==(const AbstractDomain& other) const
182 {
183 const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
184 if (o) {
185 return (RipleyDomain::operator==(other) &&
186 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1]
187 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1]
188 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1]
189 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1]);
190 }
191
192 return false;
193 }
194
195 void Rectangle::readNcGrid(escript::Data& out, string filename, string varname,
196 const ReaderParameters& params) const
197 {
198 #ifdef USE_NETCDF
199 // check destination function space
200 int myN0, myN1;
201 if (out.getFunctionSpace().getTypeCode() == Nodes) {
202 myN0 = m_NN[0];
203 myN1 = m_NN[1];
204 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
205 out.getFunctionSpace().getTypeCode() == ReducedElements) {
206 myN0 = m_NE[0];
207 myN1 = m_NE[1];
208 } else
209 throw RipleyException("readNcGrid(): invalid function space for output data object");
210
211 if (params.first.size() != 2)
212 throw RipleyException("readNcGrid(): argument 'first' must have 2 entries");
213
214 if (params.numValues.size() != 2)
215 throw RipleyException("readNcGrid(): argument 'numValues' must have 2 entries");
216
217 if (params.multiplier.size() != 2)
218 throw RipleyException("readNcGrid(): argument 'multiplier' must have 2 entries");
219 for (size_t i=0; i<params.multiplier.size(); i++)
220 if (params.multiplier[i]<1)
221 throw RipleyException("readNcGrid(): all multipliers must be positive");
222 if (params.reverse.size() != 2)
223 throw RipleyException("readNcGrid(): argument 'reverse' must have 2 entries");
224
225 // check file existence and size
226 NcFile f(filename.c_str(), NcFile::ReadOnly);
227 if (!f.is_valid())
228 throw RipleyException("readNcGrid(): cannot open file");
229
230 NcVar* var = f.get_var(varname.c_str());
231 if (!var)
232 throw RipleyException("readNcGrid(): invalid variable");
233
234 // TODO: rank>0 data support
235 const int numComp = out.getDataPointSize();
236 if (numComp > 1)
237 throw RipleyException("readNcGrid(): only scalar data supported");
238
239 const int dims = var->num_dims();
240 boost::scoped_array<long> edges(var->edges());
241
242 // is this a slice of the data object (dims!=2)?
243 // note the expected ordering of edges (as in numpy: y,x)
244 if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1]))
245 || (dims==1 && params.numValues[1]>1) ) {
246 throw RipleyException("readNcGrid(): not enough data in file");
247 }
248
249 // check if this rank contributes anything
250 if (params.first[0] >= m_offset[0]+myN0 ||
251 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
252 params.first[1] >= m_offset[1]+myN1 ||
253 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1])
254 return;
255
256 // now determine how much this rank has to write
257
258 // first coordinates in data object to write to
259 const int first0 = max(0, params.first[0]-m_offset[0]);
260 const int first1 = max(0, params.first[1]-m_offset[1]);
261 // indices to first value in file (not accounting for reverse yet)
262 int idx0 = max(0, m_offset[0]-params.first[0]);
263 int idx1 = max(0, m_offset[1]-params.first[1]);
264 // number of values to read
265 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
266 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
267
268 // make sure we read the right block if going backwards through file
269 if (params.reverse[0])
270 idx0 = edges[dims-1]-num0-idx0;
271 if (dims>1 && params.reverse[1])
272 idx1 = edges[dims-2]-num1-idx1;
273
274 vector<double> values(num0*num1);
275 if (dims==2) {
276 var->set_cur(idx1, idx0);
277 var->get(&values[0], num1, num0);
278 } else {
279 var->set_cur(idx0);
280 var->get(&values[0], num0);
281 }
282
283 const int dpp = out.getNumDataPointsPerSample();
284 out.requireWrite();
285
286 // helpers for reversing
287 const int x0 = (params.reverse[0] ? num0-1 : 0);
288 const int x_mult = (params.reverse[0] ? -1 : 1);
289 const int y0 = (params.reverse[1] ? num1-1 : 0);
290 const int y_mult = (params.reverse[1] ? -1 : 1);
291
292 for (index_t y=0; y<num1; y++) {
293 #pragma omp parallel for
294 for (index_t x=0; x<num0; x++) {
295 const int baseIndex = first0+x*params.multiplier[0]
296 +(first1+y*params.multiplier[1])*myN0;
297 const int srcIndex = (y0+y_mult*y)*num0+(x0+x_mult*x);
298 if (!isnan(values[srcIndex])) {
299 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
300 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
301 const int dataIndex = baseIndex+m0+m1*myN0;
302 double* dest = out.getSampleDataRW(dataIndex);
303 for (index_t q=0; q<dpp; q++) {
304 *dest++ = values[srcIndex];
305 }
306 }
307 }
308 }
309 }
310 }
311 #else
312 throw RipleyException("readNcGrid(): not compiled with netCDF support");
313 #endif
314 }
315
316 void Rectangle::readBinaryGrid(escript::Data& out, string filename,
317 const ReaderParameters& params) const
318 {
319 // the mapping is not universally correct but should work on our
320 // supported platforms
321 switch (params.dataType) {
322 case DATATYPE_INT32:
323 readBinaryGridImpl<int>(out, filename, params);
324 break;
325 case DATATYPE_FLOAT32:
326 readBinaryGridImpl<float>(out, filename, params);
327 break;
328 case DATATYPE_FLOAT64:
329 readBinaryGridImpl<double>(out, filename, params);
330 break;
331 default:
332 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
333 }
334 }
335
336 void Rectangle::readBinaryGridFromZipped(escript::Data& out, string filename,
337 const ReaderParameters& params) const
338 {
339 // the mapping is not universally correct but should work on our
340 // supported platforms
341 switch (params.dataType) {
342 case DATATYPE_INT32:
343 readBinaryGridZippedImpl<int>(out, filename, params);
344 break;
345 case DATATYPE_FLOAT32:
346 readBinaryGridZippedImpl<float>(out, filename, params);
347 break;
348 case DATATYPE_FLOAT64:
349 readBinaryGridZippedImpl<double>(out, filename, params);
350 break;
351 default:
352 throw RipleyException("readBinaryGridFromZipped(): invalid or unsupported datatype");
353 }
354 }
355
356 template<typename ValueType>
357 void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename,
358 const ReaderParameters& params) const
359 {
360 // check destination function space
361 int myN0, myN1;
362 if (out.getFunctionSpace().getTypeCode() == Nodes) {
363 myN0 = m_NN[0];
364 myN1 = m_NN[1];
365 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
366 out.getFunctionSpace().getTypeCode() == ReducedElements) {
367 myN0 = m_NE[0];
368 myN1 = m_NE[1];
369 } else
370 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
371
372 // check file existence and size
373 ifstream f(filename.c_str(), ifstream::binary);
374 if (f.fail()) {
375 throw RipleyException("readBinaryGrid(): cannot open file");
376 }
377 f.seekg(0, ios::end);
378 const int numComp = out.getDataPointSize();
379 const int filesize = f.tellg();
380 const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
381 if (filesize < reqsize) {
382 f.close();
383 throw RipleyException("readBinaryGrid(): not enough data in file");
384 }
385
386 // check if this rank contributes anything
387 if (params.first[0] >= m_offset[0]+myN0 ||
388 params.first[0]+params.numValues[0] <= m_offset[0] ||
389 params.first[1] >= m_offset[1]+myN1 ||
390 params.first[1]+params.numValues[1] <= m_offset[1]) {
391 f.close();
392 return;
393 }
394
395 // now determine how much this rank has to write
396
397 // first coordinates in data object to write to
398 const int first0 = max(0, params.first[0]-m_offset[0]);
399 const int first1 = max(0, params.first[1]-m_offset[1]);
400 // indices to first value in file
401 const int idx0 = max(0, m_offset[0]-params.first[0]);
402 const int idx1 = max(0, m_offset[1]-params.first[1]);
403 // number of values to read
404 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
405 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
406
407 out.requireWrite();
408 vector<ValueType> values(num0*numComp);
409 const int dpp = out.getNumDataPointsPerSample();
410
411 for (int y=0; y<num1; y++) {
412 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
413 f.seekg(fileofs*sizeof(ValueType));
414 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
415 for (int x=0; x<num0; x++) {
416 const int baseIndex = first0+x*params.multiplier[0]
417 +(first1+y*params.multiplier[1])*myN0;
418 for (int m1=0; m1<params.multiplier[1]; m1++) {
419 for (int m0=0; m0<params.multiplier[0]; m0++) {
420 const int dataIndex = baseIndex+m0+m1*myN0;
421 double* dest = out.getSampleDataRW(dataIndex);
422 for (int c=0; c<numComp; c++) {
423 ValueType val = values[x*numComp+c];
424
425 if (params.byteOrder != BYTEORDER_NATIVE) {
426 char* cval = reinterpret_cast<char*>(&val);
427 // this will alter val!!
428 byte_swap32(cval);
429 }
430 if (!std::isnan(val)) {
431 for (int q=0; q<dpp; q++) {
432 *dest++ = static_cast<double>(val);
433 }
434 }
435 }
436 }
437 }
438 }
439 }
440
441 f.close();
442 }
443
444 template<typename ValueType>
445 void Rectangle::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
446 const ReaderParameters& params) const
447 {
448 // check destination function space
449 int myN0, myN1;
450 if (out.getFunctionSpace().getTypeCode() == Nodes) {
451 myN0 = m_NN[0];
452 myN1 = m_NN[1];
453 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
454 out.getFunctionSpace().getTypeCode() == ReducedElements) {
455 myN0 = m_NE[0];
456 myN1 = m_NE[1];
457 } else
458 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
459
460 // check file existence and size
461 ifstream f(filename.c_str(), ifstream::binary);
462 if (f.fail()) {
463 throw RipleyException("readBinaryGridFromZipped(): cannot open file");
464 }
465 f.seekg(0, ios::end);
466 const int numComp = out.getDataPointSize();
467 int filesize = f.tellg();
468 f.seekg(0, ios::beg);
469 std::vector<char> compressed(filesize);
470 f.read((char*)&compressed[0], filesize);
471 f.close();
472 std::vector<char> decompressed = unzip(compressed);
473 filesize = decompressed.size();
474 const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
475 if (filesize < reqsize) {
476 throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
477 }
478
479 // check if this rank contributes anything
480 if (params.first[0] >= m_offset[0]+myN0 ||
481 params.first[0]+params.numValues[0] <= m_offset[0] ||
482 params.first[1] >= m_offset[1]+myN1 ||
483 params.first[1]+params.numValues[1] <= m_offset[1]) {
484 f.close();
485 return;
486 }
487
488 // now determine how much this rank has to write
489
490 // first coordinates in data object to write to
491 const int first0 = max(0, params.first[0]-m_offset[0]);
492 const int first1 = max(0, params.first[1]-m_offset[1]);
493 // indices to first value in file
494 const int idx0 = max(0, m_offset[0]-params.first[0]);
495 const int idx1 = max(0, m_offset[1]-params.first[1]);
496 // number of values to read
497 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
498 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
499
500 out.requireWrite();
501 vector<ValueType> values(num0*numComp);
502 const int dpp = out.getNumDataPointsPerSample();
503
504 for (int y=0; y<num1; y++) {
505 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
506 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
507 for (int x=0; x<num0; x++) {
508 const int baseIndex = first0+x*params.multiplier[0]
509 +(first1+y*params.multiplier[1])*myN0;
510 for (int m1=0; m1<params.multiplier[1]; m1++) {
511 for (int m0=0; m0<params.multiplier[0]; m0++) {
512 const int dataIndex = baseIndex+m0+m1*myN0;
513 double* dest = out.getSampleDataRW(dataIndex);
514 for (int c=0; c<numComp; c++) {
515 ValueType val = values[x*numComp+c];
516
517 if (params.byteOrder != BYTEORDER_NATIVE) {
518 char* cval = reinterpret_cast<char*>(&val);
519 // this will alter val!!
520 byte_swap32(cval);
521 }
522 if (!std::isnan(val)) {
523 for (int q=0; q<dpp; q++) {
524 *dest++ = static_cast<double>(val);
525 }
526 }
527 }
528 }
529 }
530 }
531 }
532
533 f.close();
534 }
535
536 void Rectangle::writeBinaryGrid(const escript::Data& in, string filename,
537 int byteOrder, int dataType) const
538 {
539 // the mapping is not universally correct but should work on our
540 // supported platforms
541 switch (dataType) {
542 case DATATYPE_INT32:
543 writeBinaryGridImpl<int>(in, filename, byteOrder);
544 break;
545 case DATATYPE_FLOAT32:
546 writeBinaryGridImpl<float>(in, filename, byteOrder);
547 break;
548 case DATATYPE_FLOAT64:
549 writeBinaryGridImpl<double>(in, filename, byteOrder);
550 break;
551 default:
552 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
553 }
554 }
555
556 template<typename ValueType>
557 void Rectangle::writeBinaryGridImpl(const escript::Data& in,
558 const string& filename, int byteOrder) const
559 {
560 // check function space and determine number of points
561 int myN0, myN1;
562 int totalN0, totalN1;
563 if (in.getFunctionSpace().getTypeCode() == Nodes) {
564 myN0 = m_NN[0];
565 myN1 = m_NN[1];
566 totalN0 = m_gNE[0]+1;
567 totalN1 = m_gNE[1]+1;
568 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
569 in.getFunctionSpace().getTypeCode() == ReducedElements) {
570 myN0 = m_NE[0];
571 myN1 = m_NE[1];
572 totalN0 = m_gNE[0];
573 totalN1 = m_gNE[1];
574 } else
575 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
576
577 const int numComp = in.getDataPointSize();
578 const int dpp = in.getNumDataPointsPerSample();
579
580 if (numComp > 1 || dpp > 1)
581 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
582
583 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1;
584
585 // from here on we know that each sample consists of one value
586 FileWriter fw;
587 fw.openFile(filename, fileSize);
588 MPIBarrier();
589
590 for (index_t y=0; y<myN1; y++) {
591 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0)*sizeof(ValueType);
592 ostringstream oss;
593
594 for (index_t x=0; x<myN0; x++) {
595 const double* sample = in.getSampleDataRO(y*myN0+x);
596 ValueType fvalue = static_cast<ValueType>(*sample);
597 if (byteOrder == BYTEORDER_NATIVE) {
598 oss.write((char*)&fvalue, sizeof(fvalue));
599 } else {
600 char* value = reinterpret_cast<char*>(&fvalue);
601 oss.write(byte_swap32(value), sizeof(fvalue));
602 }
603 }
604 fw.writeAt(oss, fileofs);
605 }
606 fw.close();
607 }
608
609 void Rectangle::dump(const string& fileName) const
610 {
611 #if USE_SILO
612 string fn(fileName);
613 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
614 fn+=".silo";
615 }
616
617 int driver=DB_HDF5;
618 DBfile* dbfile = NULL;
619 const char* blockDirFmt = "/block%04d";
620
621 #ifdef ESYS_MPI
622 PMPIO_baton_t* baton = NULL;
623 const int NUM_SILO_FILES = 1;
624 #endif
625
626 if (m_mpiInfo->size > 1) {
627 #ifdef ESYS_MPI
628 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
629 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
630 PMPIO_DefaultClose, (void*)&driver);
631 // try the fallback driver in case of error
632 if (!baton && driver != DB_PDB) {
633 driver = DB_PDB;
634 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
635 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
636 PMPIO_DefaultClose, (void*)&driver);
637 }
638 if (baton) {
639 char siloPath[64];
640 snprintf(siloPath, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
641 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath);
642 }
643 #endif
644 } else {
645 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
646 getDescription().c_str(), driver);
647 // try the fallback driver in case of error
648 if (!dbfile && driver != DB_PDB) {
649 driver = DB_PDB;
650 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
651 getDescription().c_str(), driver);
652 }
653 char siloPath[64];
654 snprintf(siloPath, 64, blockDirFmt, 0);
655 DBMkDir(dbfile, siloPath);
656 DBSetDir(dbfile, siloPath);
657 }
658
659 if (!dbfile)
660 throw RipleyException("dump: Could not create Silo file");
661
662 /*
663 if (driver==DB_HDF5) {
664 // gzip level 1 already provides good compression with minimal
665 // performance penalty. Some tests showed that gzip levels >3 performed
666 // rather badly on escript data both in terms of time and space
667 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
668 }
669 */
670
671 boost::scoped_ptr<double> x(new double[m_NN[0]]);
672 boost::scoped_ptr<double> y(new double[m_NN[1]]);
673 double* coords[2] = { x.get(), y.get() };
674 #pragma omp parallel
675 {
676 #pragma omp for nowait
677 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
678 coords[0][i0]=getLocalCoordinate(i0, 0);
679 }
680 #pragma omp for nowait
681 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
682 coords[1][i1]=getLocalCoordinate(i1, 1);
683 }
684 }
685 int* dims = const_cast<int*>(getNumNodesPerDim());
686
687 // write mesh
688 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 2, DB_DOUBLE,
689 DB_COLLINEAR, NULL);
690
691 // write node ids
692 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 2,
693 NULL, 0, DB_INT, DB_NODECENT, NULL);
694
695 // write element ids
696 dims = const_cast<int*>(getNumElementsPerDim());
697 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
698 dims, 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
699
700 // rank 0 writes multimesh and multivar
701 if (m_mpiInfo->rank == 0) {
702 vector<string> tempstrings;
703 vector<char*> names;
704 for (dim_t i=0; i<m_mpiInfo->size; i++) {
705 stringstream path;
706 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
707 tempstrings.push_back(path.str());
708 names.push_back((char*)tempstrings.back().c_str());
709 }
710 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
711 DBSetDir(dbfile, "/");
712 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
713 &types[0], NULL);
714 tempstrings.clear();
715 names.clear();
716 for (dim_t i=0; i<m_mpiInfo->size; i++) {
717 stringstream path;
718 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
719 tempstrings.push_back(path.str());
720 names.push_back((char*)tempstrings.back().c_str());
721 }
722 types.assign(m_mpiInfo->size, DB_QUADVAR);
723 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
724 &types[0], NULL);
725 tempstrings.clear();
726 names.clear();
727 for (dim_t i=0; i<m_mpiInfo->size; i++) {
728 stringstream path;
729 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
730 tempstrings.push_back(path.str());
731 names.push_back((char*)tempstrings.back().c_str());
732 }
733 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
734 &types[0], NULL);
735 }
736
737 if (m_mpiInfo->size > 1) {
738 #ifdef ESYS_MPI
739 PMPIO_HandOffBaton(baton, dbfile);
740 PMPIO_Finish(baton);
741 #endif
742 } else {
743 DBClose(dbfile);
744 }
745
746 #else // USE_SILO
747 throw RipleyException("dump: no Silo support");
748 #endif
749 }
750
751 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
752 {
753 switch (fsType) {
754 case Nodes:
755 case ReducedNodes: // FIXME: reduced
756 return &m_nodeId[0];
757 case DegreesOfFreedom:
758 case ReducedDegreesOfFreedom: // FIXME: reduced
759 return &m_dofId[0];
760 case Elements:
761 case ReducedElements:
762 return &m_elementId[0];
763 case FaceElements:
764 case ReducedFaceElements:
765 return &m_faceId[0];
766 case Points:
767 return &m_diracPointNodeIDs[0];
768 default:
769 break;
770 }
771
772 stringstream msg;
773 msg << "borrowSampleReferenceIDs: invalid function space type " << fsType;
774 throw RipleyException(msg.str());
775 }
776
777 bool Rectangle::ownSample(int fsType, index_t id) const
778 {
779 if (getMPISize()==1)
780 return true;
781
782 switch (fsType) {
783 case Nodes:
784 case ReducedNodes: // FIXME: reduced
785 return (m_dofMap[id] < getNumDOF());
786 case DegreesOfFreedom:
787 case ReducedDegreesOfFreedom:
788 return true;
789 case Elements:
790 case ReducedElements:
791 // check ownership of element's bottom left node
792 return (m_dofMap[id%m_NE[0]+m_NN[0]*(id/m_NE[0])] < getNumDOF());
793 case FaceElements:
794 case ReducedFaceElements:
795 {
796 // determine which face the sample belongs to before
797 // checking ownership of corresponding element's first node
798 dim_t n=0;
799 for (size_t i=0; i<4; i++) {
800 n+=m_faceCount[i];
801 if (id<n) {
802 index_t k;
803 if (i==1)
804 k=m_NN[0]-2;
805 else if (i==3)
806 k=m_NN[0]*(m_NN[1]-2);
807 else
808 k=0;
809 // determine whether to move right or up
810 const index_t delta=(i/2==0 ? m_NN[0] : 1);
811 return (m_dofMap[k+(id-n+m_faceCount[i])*delta] < getNumDOF());
812 }
813 }
814 return false;
815 }
816 default:
817 break;
818 }
819
820 stringstream msg;
821 msg << "ownSample: invalid function space type " << fsType;
822 throw RipleyException(msg.str());
823 }
824
825 void Rectangle::setToNormal(escript::Data& out) const
826 {
827 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
828 out.requireWrite();
829 #pragma omp parallel
830 {
831 if (m_faceOffset[0] > -1) {
832 #pragma omp for nowait
833 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
834 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
835 // set vector at two quadrature points
836 *o++ = -1.;
837 *o++ = 0.;
838 *o++ = -1.;
839 *o = 0.;
840 }
841 }
842
843 if (m_faceOffset[1] > -1) {
844 #pragma omp for nowait
845 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
846 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
847 // set vector at two quadrature points
848 *o++ = 1.;
849 *o++ = 0.;
850 *o++ = 1.;
851 *o = 0.;
852 }
853 }
854
855 if (m_faceOffset[2] > -1) {
856 #pragma omp for nowait
857 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
858 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
859 // set vector at two quadrature points
860 *o++ = 0.;
861 *o++ = -1.;
862 *o++ = 0.;
863 *o = -1.;
864 }
865 }
866
867 if (m_faceOffset[3] > -1) {
868 #pragma omp for nowait
869 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
870 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
871 // set vector at two quadrature points
872 *o++ = 0.;
873 *o++ = 1.;
874 *o++ = 0.;
875 *o = 1.;
876 }
877 }
878 } // end of parallel section
879 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
880 out.requireWrite();
881 #pragma omp parallel
882 {
883 if (m_faceOffset[0] > -1) {
884 #pragma omp for nowait
885 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
886 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
887 *o++ = -1.;
888 *o = 0.;
889 }
890 }
891
892 if (m_faceOffset[1] > -1) {
893 #pragma omp for nowait
894 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
895 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
896 *o++ = 1.;
897 *o = 0.;
898 }
899 }
900
901 if (m_faceOffset[2] > -1) {
902 #pragma omp for nowait
903 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
904 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
905 *o++ = 0.;
906 *o = -1.;
907 }
908 }
909
910 if (m_faceOffset[3] > -1) {
911 #pragma omp for nowait
912 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
913 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
914 *o++ = 0.;
915 *o = 1.;
916 }
917 }
918 } // end of parallel section
919
920 } else {
921 stringstream msg;
922 msg << "setToNormal: invalid function space type "
923 << out.getFunctionSpace().getTypeCode();
924 throw RipleyException(msg.str());
925 }
926 }
927
928 void Rectangle::setToSize(escript::Data& out) const
929 {
930 if (out.getFunctionSpace().getTypeCode() == Elements
931 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
932 out.requireWrite();
933 const dim_t numQuad=out.getNumDataPointsPerSample();
934 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]);
935 #pragma omp parallel for
936 for (index_t k = 0; k < getNumElements(); ++k) {
937 double* o = out.getSampleDataRW(k);
938 fill(o, o+numQuad, size);
939 }
940 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
941 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
942 out.requireWrite();
943 const dim_t numQuad=out.getNumDataPointsPerSample();
944 #pragma omp parallel
945 {
946 if (m_faceOffset[0] > -1) {
947 #pragma omp for nowait
948 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
949 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
950 fill(o, o+numQuad, m_dx[1]);
951 }
952 }
953
954 if (m_faceOffset[1] > -1) {
955 #pragma omp for nowait
956 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
957 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
958 fill(o, o+numQuad, m_dx[1]);
959 }
960 }
961
962 if (m_faceOffset[2] > -1) {
963 #pragma omp for nowait
964 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
965 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
966 fill(o, o+numQuad, m_dx[0]);
967 }
968 }
969
970 if (m_faceOffset[3] > -1) {
971 #pragma omp for nowait
972 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
973 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
974 fill(o, o+numQuad, m_dx[0]);
975 }
976 }
977 } // end of parallel section
978
979 } else {
980 stringstream msg;
981 msg << "setToSize: invalid function space type "
982 << out.getFunctionSpace().getTypeCode();
983 throw RipleyException(msg.str());
984 }
985 }
986
987 void Rectangle::Print_Mesh_Info(const bool full) const
988 {
989 RipleyDomain::Print_Mesh_Info(full);
990 if (full) {
991 cout << " Id Coordinates" << endl;
992 cout.precision(15);
993 cout.setf(ios::scientific, ios::floatfield);
994 for (index_t i=0; i < getNumNodes(); i++) {
995 cout << " " << setw(5) << m_nodeId[i]
996 << " " << getLocalCoordinate(i%m_NN[0], 0)
997 << " " << getLocalCoordinate(i/m_NN[0], 1) << endl;
998 }
999 }
1000 }
1001
1002
1003 //protected
1004 void Rectangle::assembleCoordinates(escript::Data& arg) const
1005 {
1006 escriptDataC x = arg.getDataC();
1007 int numDim = m_numDim;
1008 if (!isDataPointShapeEqual(&x, 1, &numDim))
1009 throw RipleyException("setToX: Invalid Data object shape");
1010 if (!numSamplesEqual(&x, 1, getNumNodes()))
1011 throw RipleyException("setToX: Illegal number of samples in Data object");
1012
1013 arg.requireWrite();
1014 #pragma omp parallel for
1015 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
1016 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
1017 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1);
1018 point[0] = getLocalCoordinate(i0, 0);
1019 point[1] = getLocalCoordinate(i1, 1);
1020 }
1021 }
1022 }
1023
1024 //protected
1025 void Rectangle::assembleGradient(escript::Data& out, const escript::Data& in) const
1026 {
1027 const dim_t numComp = in.getDataPointSize();
1028 const double cx0 = .21132486540518711775/m_dx[0];
1029 const double cx1 = .78867513459481288225/m_dx[0];
1030 const double cx2 = 1./m_dx[0];
1031 const double cy0 = .21132486540518711775/m_dx[1];
1032 const double cy1 = .78867513459481288225/m_dx[1];
1033 const double cy2 = 1./m_dx[1];
1034
1035 if (out.getFunctionSpace().getTypeCode() == Elements) {
1036 out.requireWrite();
1037 #pragma omp parallel
1038 {
1039 vector<double> f_00(numComp);
1040 vector<double> f_01(numComp);
1041 vector<double> f_10(numComp);
1042 vector<double> f_11(numComp);
1043 #pragma omp for
1044 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1045 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1046 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1047 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1048 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1049 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1050 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1051 for (index_t i=0; i < numComp; ++i) {
1052 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1053 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1054 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1055 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1056 o[INDEX3(i,0,2,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1057 o[INDEX3(i,1,2,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1058 o[INDEX3(i,0,3,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1059 o[INDEX3(i,1,3,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1060 } // end of component loop i
1061 } // end of k0 loop
1062 } // end of k1 loop
1063 } // end of parallel section
1064 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1065 out.requireWrite();
1066 #pragma omp parallel
1067 {
1068 vector<double> f_00(numComp);
1069 vector<double> f_01(numComp);
1070 vector<double> f_10(numComp);
1071 vector<double> f_11(numComp);
1072 #pragma omp for
1073 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1074 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1075 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1076 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1077 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1078 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1079 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1080 for (index_t i=0; i < numComp; ++i) {
1081 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1082 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1083 } // end of component loop i
1084 } // end of k0 loop
1085 } // end of k1 loop
1086 } // end of parallel section
1087 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1088 out.requireWrite();
1089 #pragma omp parallel
1090 {
1091 vector<double> f_00(numComp);
1092 vector<double> f_01(numComp);
1093 vector<double> f_10(numComp);
1094 vector<double> f_11(numComp);
1095 if (m_faceOffset[0] > -1) {
1096 #pragma omp for nowait
1097 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1098 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1099 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1100 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
1101 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
1102 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1103 for (index_t i=0; i < numComp; ++i) {
1104 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1105 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1106 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1107 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1108 } // end of component loop i
1109 } // end of k1 loop
1110 } // end of face 0
1111 if (m_faceOffset[1] > -1) {
1112 #pragma omp for nowait
1113 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1114 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
1115 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
1116 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1117 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1118 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1119 for (index_t i=0; i < numComp; ++i) {
1120 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1121 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1122 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1123 o[INDEX3(i,1,1,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1124 } // end of component loop i
1125 } // end of k1 loop
1126 } // end of face 1
1127 if (m_faceOffset[2] > -1) {
1128 #pragma omp for nowait
1129 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1130 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1131 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1132 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1133 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1134 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1135 for (index_t i=0; i < numComp; ++i) {
1136 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1137 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1138 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1139 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1140 } // end of component loop i
1141 } // end of k0 loop
1142 } // end of face 2
1143 if (m_faceOffset[3] > -1) {
1144 #pragma omp for nowait
1145 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1146 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1147 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1148 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1149 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1150 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1151 for (index_t i=0; i < numComp; ++i) {
1152 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1153 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1154 o[INDEX3(i,0,1,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1155 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1156 } // end of component loop i
1157 } // end of k0 loop
1158 } // end of face 3
1159 } // end of parallel section
1160
1161 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1162 out.requireWrite();
1163 #pragma omp parallel
1164 {
1165 vector<double> f_00(numComp);
1166 vector<double> f_01(numComp);
1167 vector<double> f_10(numComp);
1168 vector<double> f_11(numComp);
1169 if (m_faceOffset[0] > -1) {
1170 #pragma omp for nowait
1171 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1172 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1173 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1174 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
1175 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
1176 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1177 for (index_t i=0; i < numComp; ++i) {
1178 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1179 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1180 } // end of component loop i
1181 } // end of k1 loop
1182 } // end of face 0
1183 if (m_faceOffset[1] > -1) {
1184 #pragma omp for nowait
1185 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1186 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
1187 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
1188 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1189 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1190 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1191 for (index_t i=0; i < numComp; ++i) {
1192 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1193 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1194 } // end of component loop i
1195 } // end of k1 loop
1196 } // end of face 1
1197 if (m_faceOffset[2] > -1) {
1198 #pragma omp for nowait
1199 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1200 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1201 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1202 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1203 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1204 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1205 for (index_t i=0; i < numComp; ++i) {
1206 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1207 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1208 } // end of component loop i
1209 } // end of k0 loop
1210 } // end of face 2
1211 if (m_faceOffset[3] > -1) {
1212 #pragma omp for nowait
1213 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1214 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1215 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1216 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1217 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1218 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1219 for (index_t i=0; i < numComp; ++i) {
1220 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1221 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1222 } // end of component loop i
1223 } // end of k0 loop
1224 } // end of face 3
1225 } // end of parallel section
1226 }
1227 }
1228
1229 //protected
1230 void Rectangle::assembleIntegrate(vector<double>& integrals,
1231 const escript::Data& arg) const
1232 {
1233 const dim_t numComp = arg.getDataPointSize();
1234 const index_t left = (m_offset[0]==0 ? 0 : 1);
1235 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1236 const int fs=arg.getFunctionSpace().getTypeCode();
1237 if (fs == Elements && arg.actsExpanded()) {
1238 #pragma omp parallel
1239 {
1240 vector<double> int_local(numComp, 0);
1241 const double w = m_dx[0]*m_dx[1]/4.;
1242 #pragma omp for nowait
1243 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1244 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1245 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1246 for (index_t i=0; i < numComp; ++i) {
1247 const double f0 = f[INDEX2(i,0,numComp)];
1248 const double f1 = f[INDEX2(i,1,numComp)];
1249 const double f2 = f[INDEX2(i,2,numComp)];
1250 const double f3 = f[INDEX2(i,3,numComp)];
1251 int_local[i]+=(f0+f1+f2+f3)*w;
1252 } // end of component loop i
1253 } // end of k0 loop
1254 } // end of k1 loop
1255 #pragma omp critical
1256 for (index_t i=0; i<numComp; i++)
1257 integrals[i]+=int_local[i];
1258 } // end of parallel section
1259
1260 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1261 const double w = m_dx[0]*m_dx[1];
1262 #pragma omp parallel
1263 {
1264 vector<double> int_local(numComp, 0);
1265 #pragma omp for nowait
1266 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1267 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1268 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1269 for (index_t i=0; i < numComp; ++i) {
1270 int_local[i]+=f[i]*w;
1271 }
1272 }
1273 }
1274 #pragma omp critical
1275 for (index_t i=0; i<numComp; i++)
1276 integrals[i]+=int_local[i];
1277 } // end of parallel section
1278
1279 } else if (fs == FaceElements && arg.actsExpanded()) {
1280 #pragma omp parallel
1281 {
1282 vector<double> int_local(numComp, 0);
1283 const double w0 = m_dx[0]/2.;
1284 const double w1 = m_dx[1]/2.;
1285 if (m_faceOffset[0] > -1) {
1286 #pragma omp for nowait
1287 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1288 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1289 for (index_t i=0; i < numComp; ++i) {
1290 const double f0 = f[INDEX2(i,0,numComp)];
1291 const double f1 = f[INDEX2(i,1,numComp)];
1292 int_local[i]+=(f0+f1)*w1;
1293 } // end of component loop i
1294 } // end of k1 loop
1295 }
1296
1297 if (m_faceOffset[1] > -1) {
1298 #pragma omp for nowait
1299 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1300 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1301 for (index_t i=0; i < numComp; ++i) {
1302 const double f0 = f[INDEX2(i,0,numComp)];
1303 const double f1 = f[INDEX2(i,1,numComp)];
1304 int_local[i]+=(f0+f1)*w1;
1305 } // end of component loop i
1306 } // end of k1 loop
1307 }
1308
1309 if (m_faceOffset[2] > -1) {
1310 #pragma omp for nowait
1311 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1312 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1313 for (index_t i=0; i < numComp; ++i) {
1314 const double f0 = f[INDEX2(i,0,numComp)];
1315 const double f1 = f[INDEX2(i,1,numComp)];
1316 int_local[i]+=(f0+f1)*w0;
1317 } // end of component loop i
1318 } // end of k0 loop
1319 }
1320
1321 if (m_faceOffset[3] > -1) {
1322 #pragma omp for nowait
1323 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1324 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1325 for (index_t i=0; i < numComp; ++i) {
1326 const double f0 = f[INDEX2(i,0,numComp)];
1327 const double f1 = f[INDEX2(i,1,numComp)];
1328 int_local[i]+=(f0+f1)*w0;
1329 } // end of component loop i
1330 } // end of k0 loop
1331 }
1332 #pragma omp critical
1333 for (index_t i=0; i<numComp; i++)
1334 integrals[i]+=int_local[i];
1335 } // end of parallel section
1336
1337 } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded())) {
1338 #pragma omp parallel
1339 {
1340 vector<double> int_local(numComp, 0);
1341 if (m_faceOffset[0] > -1) {
1342 #pragma omp for nowait
1343 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1344 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1345 for (index_t i=0; i < numComp; ++i) {
1346 int_local[i]+=f[i]*m_dx[1];
1347 }
1348 }
1349 }
1350
1351 if (m_faceOffset[1] > -1) {
1352 #pragma omp for nowait
1353 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1354 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1355 for (index_t i=0; i < numComp; ++i) {
1356 int_local[i]+=f[i]*m_dx[1];
1357 }
1358 }
1359 }
1360
1361 if (m_faceOffset[2] > -1) {
1362 #pragma omp for nowait
1363 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1364 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1365 for (index_t i=0; i < numComp; ++i) {
1366 int_local[i]+=f[i]*m_dx[0];
1367 }
1368 }
1369 }
1370
1371 if (m_faceOffset[3] > -1) {
1372 #pragma omp for nowait
1373 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1374 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1375 for (index_t i=0; i < numComp; ++i) {
1376 int_local[i]+=f[i]*m_dx[0];
1377 }
1378 }
1379 }
1380
1381 #pragma omp critical
1382 for (index_t i=0; i<numComp; i++)
1383 integrals[i]+=int_local[i];
1384 } // end of parallel section
1385 } // function space selector
1386 }
1387
1388 //protected
1389 dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const
1390 {
1391 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1392 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1393 const int x=node%nDOF0;
1394 const int y=node/nDOF0;
1395 dim_t num=0;
1396 // loop through potential neighbours and add to index if positions are
1397 // within bounds
1398 for (int i1=-1; i1<2; i1++) {
1399 for (int i0=-1; i0<2; i0++) {
1400 // skip node itself
1401 if (i0==0 && i1==0)
1402 continue;
1403 // location of neighbour node
1404 const int nx=x+i0;
1405 const int ny=y+i1;
1406 if (nx>=0 && ny>=0 && nx<nDOF0 && ny<nDOF1) {
1407 index.push_back(ny*nDOF0+nx);
1408 num++;
1409 }
1410 }
1411 }
1412
1413 return num;
1414 }
1415
1416 //protected
1417 void Rectangle::nodesToDOF(escript::Data& out, const escript::Data& in) const
1418 {
1419 const dim_t numComp = in.getDataPointSize();
1420 out.requireWrite();
1421
1422 const index_t left = (m_offset[0]==0 ? 0 : 1);
1423 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1424 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1425 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1426 #pragma omp parallel for
1427 for (index_t i=0; i<nDOF1; i++) {
1428 for (index_t j=0; j<nDOF0; j++) {
1429 const index_t n=j+left+(i+bottom)*m_NN[0];
1430 const double* src=in.getSampleDataRO(n);
1431 copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0));
1432 }
1433 }
1434 }
1435
1436 //protected
1437 void Rectangle::dofToNodes(escript::Data& out, const escript::Data& in) const
1438 {
1439 const dim_t numComp = in.getDataPointSize();
1440 Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp);
1441 // expand data object if necessary to be able to grab the whole data
1442 const_cast<escript::Data*>(&in)->expand();
1443 Paso_Coupler_startCollect(coupler, in.getSampleDataRO(0));
1444
1445 const dim_t numDOF = getNumDOF();
1446 out.requireWrite();
1447 const double* buffer = Paso_Coupler_finishCollect(coupler);
1448
1449 #pragma omp parallel for
1450 for (index_t i=0; i<getNumNodes(); i++) {
1451 const double* src=(m_dofMap[i]<numDOF ?
1452 in.getSampleDataRO(m_dofMap[i])
1453 : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1454 copy(src, src+numComp, out.getSampleDataRW(i));
1455 }
1456 Paso_Coupler_free(coupler);
1457 }
1458
1459 //private
1460 void Rectangle::populateSampleIds()
1461 {
1462 // degrees of freedom are numbered from left to right, bottom to top in
1463 // each rank, continuing on the next rank (ranks also go left-right,
1464 // bottom-top).
1465 // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which
1466 // helps when writing out data rank after rank.
1467
1468 // build node distribution vector first.
1469 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is
1470 // constant for all ranks in this implementation
1471 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1472 const dim_t numDOF=getNumDOF();
1473 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1474 m_nodeDistribution[k]=k*numDOF;
1475 }
1476 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1477 m_nodeId.resize(getNumNodes());
1478 m_dofId.resize(numDOF);
1479 m_elementId.resize(getNumElements());
1480
1481 // populate face element counts
1482 //left
1483 if (m_offset[0]==0)
1484 m_faceCount[0]=m_NE[1];
1485 else
1486 m_faceCount[0]=0;
1487 //right
1488 if (m_mpiInfo->rank%m_NX[0]==m_NX[0]-1)
1489 m_faceCount[1]=m_NE[1];
1490 else
1491 m_faceCount[1]=0;
1492 //bottom
1493 if (m_offset[1]==0)
1494 m_faceCount[2]=m_NE[0];
1495 else
1496 m_faceCount[2]=0;
1497 //top
1498 if (m_mpiInfo->rank/m_NX[0]==m_NX[1]-1)
1499 m_faceCount[3]=m_NE[0];
1500 else
1501 m_faceCount[3]=0;
1502
1503 m_faceId.resize(getNumFaceElements());
1504
1505 const index_t left = (m_offset[0]==0 ? 0 : 1);
1506 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1507 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1508 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1509
1510 #define globalNodeId(x,y) \
1511 ((m_offset[0]+x)/nDOF0)*nDOF0*nDOF1+(m_offset[0]+x)%nDOF0 \
1512 + ((m_offset[1]+y)/nDOF1)*nDOF0*nDOF1*m_NX[0]+((m_offset[1]+y)%nDOF1)*nDOF0
1513
1514 // set corner id's outside the parallel region
1515 m_nodeId[0] = globalNodeId(0, 0);
1516 m_nodeId[m_NN[0]-1] = globalNodeId(m_NN[0]-1, 0);
1517 m_nodeId[m_NN[0]*(m_NN[1]-1)] = globalNodeId(0, m_NN[1]-1);
1518 m_nodeId[m_NN[0]*m_NN[1]-1] = globalNodeId(m_NN[0]-1,m_NN[1]-1);
1519 #undef globalNodeId
1520
1521 #pragma omp parallel
1522 {
1523 // populate degrees of freedom and own nodes (identical id)
1524 #pragma omp for nowait
1525 for (dim_t i=0; i<nDOF1; i++) {
1526 for (dim_t j=0; j<nDOF0; j++) {
1527 const index_t nodeIdx=j+left+(i+bottom)*m_NN[0];
1528 const index_t dofIdx=j+i*nDOF0;
1529 m_dofId[dofIdx] = m_nodeId[nodeIdx]
1530 = m_nodeDistribution[m_mpiInfo->rank]+dofIdx;
1531 }
1532 }
1533
1534 // populate the rest of the nodes (shared with other ranks)
1535 if (m_faceCount[0]==0) { // left column
1536 #pragma omp for nowait
1537 for (dim_t i=0; i<nDOF1; i++) {
1538 const index_t nodeIdx=(i+bottom)*m_NN[0];
1539 const index_t dofId=(i+1)*nDOF0-1;
1540 m_nodeId[nodeIdx]
1541 = m_nodeDistribution[m_mpiInfo->rank-1]+dofId;
1542 }
1543 }
1544 if (m_faceCount[1]==0) { // right column
1545 #pragma omp for nowait
1546 for (dim_t i=0; i<nDOF1; i++) {
1547 const index_t nodeIdx=(i+bottom+1)*m_NN[0]-1;
1548 const index_t dofId=i*nDOF0;
1549 m_nodeId[nodeIdx]
1550 = m_nodeDistribution[m_mpiInfo->rank+1]+dofId;
1551 }
1552 }
1553 if (m_faceCount[2]==0) { // bottom row
1554 #pragma omp for nowait
1555 for (dim_t i=0; i<nDOF0; i++) {
1556 const index_t nodeIdx=i+left;
1557 const index_t dofId=nDOF0*(nDOF1-1)+i;
1558 m_nodeId[nodeIdx]
1559 = m_nodeDistribution[m_mpiInfo->rank-m_NX[0]]+dofId;
1560 }
1561 }
1562 if (m_faceCount[3]==0) { // top row
1563 #pragma omp for nowait
1564 for (dim_t i=0; i<nDOF0; i++) {
1565 const index_t nodeIdx=m_NN[0]*(m_NN[1]-1)+i+left;
1566 const index_t dofId=i;
1567 m_nodeId[nodeIdx]
1568 = m_nodeDistribution[m_mpiInfo->rank+m_NX[0]]+dofId;
1569 }
1570 }
1571
1572 // populate element id's
1573 #pragma omp for nowait
1574 for (dim_t i1=0; i1<m_NE[1]; i1++) {
1575 for (dim_t i0=0; i0<m_NE[0]; i0++) {
1576 m_elementId[i0+i1*m_NE[0]]=(m_offset[1]+i1)*m_gNE[0]+m_offset[0]+i0;
1577 }
1578 }
1579
1580 // face elements
1581 #pragma omp for
1582 for (dim_t k=0; k<getNumFaceElements(); k++)
1583 m_faceId[k]=k;
1584 } // end parallel section
1585
1586 m_nodeTags.assign(getNumNodes(), 0);
1587 updateTagsInUse(Nodes);
1588
1589 m_elementTags.assign(getNumElements(), 0);
1590 updateTagsInUse(Elements);
1591
1592 // generate face offset vector and set face tags
1593 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1594 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1595 m_faceOffset.assign(4, -1);
1596 m_faceTags.clear();
1597 index_t offset=0;
1598 for (size_t i=0; i<4; i++) {
1599 if (m_faceCount[i]>0) {
1600 m_faceOffset[i]=offset;
1601 offset+=m_faceCount[i];
1602 m_faceTags.insert(m_faceTags.end(), m_faceCount[i], faceTag[i]);
1603 }
1604 }
1605 setTagMap("left", LEFT);
1606 setTagMap("right", RIGHT);
1607 setTagMap("bottom", BOTTOM);
1608 setTagMap("top", TOP);
1609 updateTagsInUse(FaceElements);
1610 }
1611
1612 //private
1613 void Rectangle::createPattern()
1614 {
1615 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1616 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1617 const index_t left = (m_offset[0]==0 ? 0 : 1);
1618 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1619
1620 // populate node->DOF mapping with own degrees of freedom.
1621 // The rest is assigned in the loop further down
1622 m_dofMap.assign(getNumNodes(), 0);
1623 #pragma omp parallel for
1624 for (index_t i=bottom; i<bottom+nDOF1; i++) {
1625 for (index_t j=left; j<left+nDOF0; j++) {
1626 m_dofMap[i*m_NN[0]+j]=(i-bottom)*nDOF0+j-left;
1627 }
1628 }
1629
1630 // build list of shared components and neighbours by looping through
1631 // all potential neighbouring ranks and checking if positions are
1632 // within bounds
1633 const dim_t numDOF=nDOF0*nDOF1;
1634 vector<IndexVector> colIndices(numDOF); // for the couple blocks
1635 RankVector neighbour;
1636 IndexVector offsetInShared(1,0);
1637 IndexVector sendShared, recvShared;
1638 int numShared=0, expectedShared=0;
1639 const int x=m_mpiInfo->rank%m_NX[0];
1640 const int y=m_mpiInfo->rank/m_NX[0];
1641 if (x > 0)
1642 expectedShared += nDOF1;
1643 if (x < m_NX[0] - 1)
1644 expectedShared += nDOF1;
1645 if (y > 0)
1646 expectedShared += nDOF0;
1647 if (y < m_NX[1] - 1)
1648 expectedShared += nDOF0;
1649 if (x > 0 && y > 0) expectedShared++;
1650 if (x > 0 && y < m_NX[1] - 1) expectedShared++;
1651 if (x < m_NX[0] - 1 && y > 0) expectedShared++;
1652 if (x < m_NX[0] - 1 && y < m_NX[1] - 1) expectedShared++;
1653
1654 vector<IndexVector> rowIndices(expectedShared);
1655
1656 for (int i1=-1; i1<2; i1++) {
1657 for (int i0=-1; i0<2; i0++) {
1658 // skip this rank
1659 if (i0==0 && i1==0)
1660 continue;
1661 // location of neighbour rank
1662 const int nx=x+i0;
1663 const int ny=y+i1;
1664 if (nx>=0 && ny>=0 && nx<m_NX[0] && ny<m_NX[1]) {
1665 neighbour.push_back(ny*m_NX[0]+nx);
1666 if (i0==0) {
1667 // sharing top or bottom edge
1668 const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0);
1669 const int firstNode=(i1==-1 ? left : m_NN[0]*(m_NN[1]-1)+left);
1670 offsetInShared.push_back(offsetInShared.back()+nDOF0);
1671 for (dim_t i=0; i<nDOF0; i++, numShared++) {
1672 sendShared.push_back(firstDOF+i);
1673 recvShared.push_back(numDOF+numShared);
1674 if (i>0)
1675 doublyLink(colIndices, rowIndices, firstDOF+i-1, numShared);
1676 doublyLink(colIndices, rowIndices, firstDOF+i, numShared);
1677 if (i<nDOF0-1)
1678 doublyLink(colIndices, rowIndices, firstDOF+i+1, numShared);
1679 m_dofMap[firstNode+i]=numDOF+numShared;
1680 }
1681 } else if (i1==0) {
1682 // sharing left or right edge
1683 const int firstDOF=(i0==-1 ? 0 : nDOF0-1);
1684 const int firstNode=(i0==-1 ? bottom*m_NN[0] : (bottom+1)*m_NN[0]-1);
1685 offsetInShared.push_back(offsetInShared.back()+nDOF1);
1686 for (dim_t i=0; i<nDOF1; i++, numShared++) {
1687 sendShared.push_back(firstDOF+i*nDOF0);
1688 recvShared.push_back(numDOF+numShared);
1689 if (i>0)
1690 doublyLink(colIndices, rowIndices, firstDOF+(i-1)*nDOF0, numShared);
1691 doublyLink(colIndices, rowIndices, firstDOF+i*nDOF0, numShared);
1692 if (i<nDOF1-1)
1693 doublyLink(colIndices, rowIndices, firstDOF+(i+1)*nDOF0, numShared);
1694 m_dofMap[firstNode+i*m_NN[0]]=numDOF+numShared;
1695 }
1696 } else {
1697 // sharing a node
1698 const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0);
1699 const int node=(i0+1)/2*(m_NN[0]-1)+(i1+1)/2*m_NN[0]*(m_NN[1]-1);
1700 offsetInShared.push_back(offsetInShared.back()+1);
1701 sendShared.push_back(dof);
1702 recvShared.push_back(numDOF+numShared);
1703 doublyLink(colIndices, rowIndices, dof, numShared);
1704 m_dofMap[node]=numDOF+numShared;
1705 ++numShared;
1706 }
1707 }
1708 }
1709 }
1710
1711 #pragma omp parallel for
1712 for (int i = 0; i < numShared; i++) {
1713 std::sort(rowIndices[i].begin(), rowIndices[i].end());
1714 }
1715
1716 // create connector
1717 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
1718 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
1719 &offsetInShared[0], 1, 0, m_mpiInfo);
1720 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
1721 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
1722 &offsetInShared[0], 1, 0, m_mpiInfo);
1723 m_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
1724 Paso_SharedComponents_free(snd_shcomp);
1725 Paso_SharedComponents_free(rcv_shcomp);
1726
1727 // create main and couple blocks
1728 Paso_Pattern *mainPattern = createMainPattern();
1729 Paso_Pattern *colPattern, *rowPattern;
1730 createCouplePatterns(colIndices, rowIndices, numShared, &colPattern, &rowPattern);
1731
1732 // allocate paso distribution
1733 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
1734 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
1735
1736 // finally create the system matrix
1737 m_pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT,
1738 distribution, distribution, mainPattern, colPattern, rowPattern,
1739 m_connector, m_connector);
1740
1741 Paso_Distribution_free(distribution);
1742
1743 // useful debug output
1744 /*
1745 cout << "--- rcv_shcomp ---" << endl;
1746 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
1747 for (size_t i=0; i<neighbour.size(); i++) {
1748 cout << "neighbor[" << i << "]=" << neighbour[i]
1749 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
1750 }
1751 for (size_t i=0; i<recvShared.size(); i++) {
1752 cout << "shared[" << i << "]=" << recvShared[i] << endl;
1753 }
1754 cout << "--- snd_shcomp ---" << endl;
1755 for (size_t i=0; i<sendShared.size(); i++) {
1756 cout << "shared[" << i << "]=" << sendShared[i] << endl;
1757 }
1758 cout << "--- dofMap ---" << endl;
1759 for (size_t i=0; i<m_dofMap.size(); i++) {
1760 cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl;
1761 }
1762 cout << "--- colIndices ---" << endl;
1763 for (size_t i=0; i<colIndices.size(); i++) {
1764 cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl;
1765 }
1766 */
1767
1768 /*
1769 cout << "--- main_pattern ---" << endl;
1770 cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl;
1771 for (size_t i=0; i<mainPattern->numOutput+1; i++) {
1772 cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl;
1773 }
1774 for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) {
1775 cout << "index[" << i << "]=" << mainPattern->index[i] << endl;
1776 }
1777 */
1778
1779 /*
1780 cout << "--- colCouple_pattern ---" << endl;
1781 cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl;
1782 for (size_t i=0; i<colPattern->numOutput+1; i++) {
1783 cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl;
1784 }
1785 for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) {
1786 cout << "index[" << i << "]=" << colPattern->index[i] << endl;
1787 }
1788 */
1789
1790 /*
1791 cout << "--- rowCouple_pattern ---" << endl;
1792 cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl;
1793 for (size_t i=0; i<rowPattern->numOutput+1; i++) {
1794 cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl;
1795 }
1796 for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) {
1797 cout << "index[" << i << "]=" << rowPattern->index[i] << endl;
1798 }
1799 */
1800
1801 Paso_Pattern_free(mainPattern);
1802 Paso_Pattern_free(colPattern);
1803 Paso_Pattern_free(rowPattern);
1804 }
1805
1806 //private
1807 void Rectangle::addToMatrixAndRHS(Paso_SystemMatrix* S, escript::Data& F,
1808 const vector<double>& EM_S, const vector<double>& EM_F, bool addS,
1809 bool addF, index_t firstNode, dim_t nEq, dim_t nComp) const
1810 {
1811 IndexVector rowIndex;
1812 rowIndex.push_back(m_dofMap[firstNode]);
1813 rowIndex.push_back(m_dofMap[firstNode+1]);
1814 rowIndex.push_back(m_dofMap[firstNode+m_NN[0]]);
1815 rowIndex.push_back(m_dofMap[firstNode+m_NN[0]+1]);
1816 if (addF) {
1817 double *F_p=F.getSampleDataRW(0);
1818 for (index_t i=0; i<rowIndex.size(); i++) {
1819 if (rowIndex[i]<getNumDOF()) {
1820 for (index_t eq=0; eq<nEq; eq++) {
1821 F_p[INDEX2(eq, rowIndex[i], nEq)]+=EM_F[INDEX2(eq,i,nEq)];
1822 }
1823 }
1824 }
1825 }
1826 if (addS) {
1827 addToSystemMatrix(S, rowIndex, nEq, rowIndex, nComp, EM_S);
1828 }
1829 }
1830
1831 //protected
1832 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1833 const escript::Data& in,
1834 bool reduced) const
1835 {
1836 const dim_t numComp = in.getDataPointSize();
1837 if (reduced) {
1838 out.requireWrite();
1839 const double c0 = 0.25;
1840 #pragma omp parallel
1841 {
1842 vector<double> f_00(numComp);
1843 vector<double> f_01(numComp);
1844 vector<double> f_10(numComp);
1845 vector<double> f_11(numComp);
1846 #pragma omp for
1847 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1848 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1849 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1850 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1851 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1852 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1853 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1854 for (index_t i=0; i < numComp; ++i) {
1855 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1856 } /* end of component loop i */
1857 } /* end of k0 loop */
1858 } /* end of k1 loop */
1859 } /* end of parallel section */
1860 } else {
1861 out.requireWrite();
1862 const double c0 = 0.16666666666666666667;
1863 const double c1 = 0.044658198738520451079;
1864 const double c2 = 0.62200846792814621559;
1865 #pragma omp parallel
1866 {
1867 vector<double> f_00(numComp);
1868 vector<double> f_01(numComp);
1869 vector<double> f_10(numComp);
1870 vector<double> f_11(numComp);
1871 #pragma omp for
1872 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1873 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1874 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1875 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1876 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1877 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1878 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1879 for (index_t i=0; i < numComp; ++i) {
1880 o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_10[i]) + c1*f_11[i] + c2*f_00[i];
1881 o[INDEX2(i,numComp,1)] = c0*(f_00[i] + f_11[i]) + c1*f_01[i] + c2*f_10[i];
1882 o[INDEX2(i,numComp,2)] = c0*(f_00[i] + f_11[i]) + c1*f_10[i] + c2*f_01[i];
1883 o[INDEX2(i,numComp,3)] = c0*(f_01[i] + f_10[i]) + c1*f_00[i] + c2*f_11[i];
1884 } /* end of component loop i */
1885 } /* end of k0 loop */
1886 } /* end of k1 loop */
1887 } /* end of parallel section */
1888 }
1889 }
1890
1891 //protected
1892 void Rectangle::interpolateNodesOnFaces(escript::Data& out,
1893 const escript::Data& in,
1894 bool reduced) const
1895 {
1896 const dim_t numComp = in.getDataPointSize();
1897 if (reduced) {
1898 out.requireWrite();
1899 #pragma omp parallel
1900 {
1901 vector<double> f_00(numComp);
1902 vector<double> f_01(numComp);
1903 vector<double> f_10(numComp);
1904 vector<double> f_11(numComp);
1905 if (m_faceOffset[0] > -1) {
1906 #pragma omp for nowait
1907 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1908 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1909 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1910 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1911 for (index_t i=0; i < numComp; ++i) {
1912 o[INDEX2(i,numComp,0)] = (f_00[i] + f_01[i])/2;
1913 } /* end of component loop i */
1914 } /* end of k1 loop */
1915 } /* end of face 0 */
1916 if (m_faceOffset[1] > -1) {
1917 #pragma omp for nowait
1918 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1919 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1920 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1921 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1922 for (index_t i=0; i < numComp; ++i) {
1923 o[INDEX2(i,numComp,0)] = (f_10[i] + f_11[i])/2;
1924 } /* end of component loop i */
1925 } /* end of k1 loop */
1926 } /* end of face 1 */
1927 if (m_faceOffset[2] > -1) {
1928 #pragma omp for nowait
1929 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1930 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1931 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1932 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1933 for (index_t i=0; i < numComp; ++i) {
1934 o[INDEX2(i,numComp,0)] = (f_00[i] + f_10[i])/2;
1935 } /* end of component loop i */
1936 } /* end of k0 loop */
1937 } /* end of face 2 */
1938 if (m_faceOffset[3] > -1) {
1939 #pragma omp for nowait
1940 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1941 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1942 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1943 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1944 for (index_t i=0; i < numComp; ++i) {
1945 o[INDEX2(i,numComp,0)] = (f_01[i] + f_11[i])/2;
1946 } /* end of component loop i */
1947 } /* end of k0 loop */
1948 } /* end of face 3 */
1949 } /* end of parallel section */
1950 } else {
1951 out.requireWrite();
1952 const double c0 = 0.21132486540518711775;
1953 const double c1 = 0.78867513459481288225;
1954 #pragma omp parallel
1955 {
1956 vector<double> f_00(numComp);
1957 vector<double> f_01(numComp);
1958 vector<double> f_10(numComp);
1959 vector<double> f_11(numComp);
1960 if (m_faceOffset[0] > -1) {
1961 #pragma omp for nowait
1962 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1963 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1964 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1965 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1966 for (index_t i=0; i < numComp; ++i) {
1967 o[INDEX2(i,numComp,0)] = c0*f_01[i] + c1*f_00[i];
1968 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_01[i];
1969 } /* end of component loop i */
1970 } /* end of k1 loop */
1971 } /* end of face 0 */
1972 if (m_faceOffset[1] > -1) {
1973 #pragma omp for nowait
1974 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1975 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1976 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1977 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1978 for (index_t i=0; i < numComp; ++i) {
1979 o[INDEX2(i,numComp,0)] = c1*f_10[i] + c0*f_11[i];
1980 o[INDEX2(i,numComp,1)] = c1*f_11[i] + c0*f_10[i];
1981 } /* end of component loop i */
1982 } /* end of k1 loop */
1983 } /* end of face 1 */
1984 if (m_faceOffset[2] > -1) {
1985 #pragma omp for nowait
1986 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1987 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1988 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1989 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1990 for (index_t i=0; i < numComp; ++i) {
1991 o[INDEX2(i,numComp,0)] = c0*f_10[i] + c1*f_00[i];
1992 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_10[i];
1993 } /* end of component loop i */
1994 } /* end of k0 loop */
1995 } /* end of face 2 */
1996 if (m_faceOffset[3] > -1) {
1997 #pragma omp for nowait
1998 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1999 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
2000 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
2001 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
2002 for (index_t i=0; i < numComp; ++i) {
2003 o[INDEX2(i,numComp,0)] = c0*f_11[i] + c1*f_01[i];
2004 o[INDEX2(i,numComp,1)] = c0*f_01[i] + c1*f_11[i];
2005 } /* end of component loop i */
2006 } /* end of k0 loop */
2007 } /* end of face 3 */
2008 } /* end of parallel section */
2009 }
2010 }
2011
2012
2013
2014 namespace
2015 {
2016 // Calculates a guassian blur colvolution matrix for 2D
2017 // See wiki article on the subject
2018 double* get2DGauss(unsigned radius, double sigma)
2019 {
2020 double* arr=new double[(radius*2+1)*(radius*2+1)];
2021 double common=M_1_PI*0.5*1/(sigma*sigma);
2022 double total=0;
2023 int r=static_cast<int>(radius);
2024 for (int y=-r;y<=r;++y)
2025 {
2026 for (int x=-r;x<=r;++x)
2027 {
2028 arr[(x+r)+(y+r)*(r*2+1)]=common*exp(-(x*x+y*y)/(2*sigma*sigma));
2029 total+=arr[(x+r)+(y+r)*(r*2+1)];
2030 }
2031 }
2032 double invtotal=1/total;
2033 for (size_t p=0;p<(radius*2+1)*(radius*2+1);++p)
2034 {
2035 arr[p]*=invtotal;
2036 }
2037 return arr;
2038 }
2039
2040 // applies conv to source to get a point.
2041 // (xp, yp) are the coords in the source matrix not the destination matrix
2042 double Convolve2D(double* conv, double* source, size_t xp, size_t yp, unsigned radius, size_t width)
2043 {
2044 size_t bx=xp-radius, by=yp-radius;
2045 size_t sbase=bx+by*width;
2046 double result=0;
2047 for (int y=0;y<2*radius+1;++y)
2048 {
2049 for (int x=0;x<2*radius+1;++x)
2050 {
2051 result+=conv[x+y*(2*radius+1)] * source[sbase + x+y*width];
2052 }
2053 }
2054 return result;
2055 }
2056 }
2057
2058
2059 /* This is a wrapper for filtered (and non-filtered) randoms
2060 * For detailed doco see randomFillWorker
2061 */
2062 escript::Data Rectangle::randomFill(const escript::DataTypes::ShapeType& shape,
2063 const escript::FunctionSpace& what,
2064 long seed, const boost::python::tuple& filter) const
2065 {
2066 int numvals=escript::DataTypes::noValues(shape);
2067 if (len(filter)>0 && (numvals!=1))
2068 {
2069 throw RipleyException("Ripley only supports filters for scalar data.");
2070 }
2071 escript::Data res=randomFillWorker(shape, seed, filter);
2072 if (res.getFunctionSpace()!=what)
2073 {
2074 escript::Data r=escript::Data(res, what);
2075 return r;
2076 }
2077 return res;
2078 }
2079
2080
2081 /* This routine produces a Data object filled with smoothed random data.
2082 The dimensions of the rectangle being filled are internal[0] x internal[1] points.
2083 A parameter radius gives the size of the stencil used for the smoothing.
2084 A point on the left hand edge for example, will still require `radius` extra points to the left
2085 in order to complete the stencil.
2086
2087 All local calculation is done on an array called `src`, with
2088 dimensions = ext[0] * ext[1].
2089 Where ext[i]= internal[i]+2*radius.
2090
2091 Now for MPI there is overlap to deal with. We need to share both the overlapping
2092 values themselves but also the external region.
2093
2094 In a hypothetical 1-D case:
2095
2096
2097 1234567
2098 would be split into two ranks thus:
2099 123(4) (4)567 [4 being a shared element]
2100
2101 If the radius is 2. There will be padding elements on the outside:
2102
2103 pp123(4) (4)567pp
2104
2105 To ensure that 4 can be correctly computed on both ranks, values from the other rank
2106 need to be known.
2107
2108 pp123(4)56 23(4)567pp
2109
2110 Now in our case, we wout set all the values 23456 on the left rank and send them to the
2111 right hand rank.
2112
2113 So the edges _may_ need to be shared at a distance `inset` from all boundaries.
2114 inset=2*radius+1
2115 This is to ensure that values at distance `radius` from the shared/overlapped element
2116 that ripley has.
2117
2118
2119 */
2120 escript::Data Rectangle::randomFillWorker(const escript::DataTypes::ShapeType& shape,
2121 long seed, const boost::python::tuple& filter) const
2122 {
2123 if (m_numDim!=2)
2124 {
2125 throw RipleyException("Only 2D supported at this time.");
2126 }
2127
2128 unsigned int radius=0; // these are only used by gaussian
2129 double sigma=0.5;
2130
2131 unsigned int numvals=escript::DataTypes::noValues(shape);
2132
2133
2134 if (len(filter)==0)
2135 {
2136 // nothing special required here yet
2137 }
2138 else if (len(filter)==3)
2139 {
2140 boost::python::extract<string> ex(filter[0]);
2141 if (!ex.check() || (ex()!="gaussian"))
2142 {
2143 throw RipleyException("Unsupported random filter");
2144 }
2145 boost::python::extract<unsigned int> ex1(filter[1]);
2146 if (!ex1.check())
2147 {
2148 throw RipleyException("Radius of gaussian filter must be a positive integer.");
2149 }
2150 radius=ex1();
2151 sigma=0.5;
2152 boost::python::extract<double> ex2(filter[2]);
2153 if (!ex2.check() || (sigma=ex2())<=0)
2154 {
2155 throw RipleyException("Sigma must be a postive floating point number.");
2156 }
2157 }
2158 else
2159 {
2160 throw RipleyException("Unsupported random filter for Rectangle.");
2161 }
2162
2163
2164
2165 size_t internal[2];
2166 internal[0]=m_NE[0]+1; // number of points in the internal region
2167 internal[1]=m_NE[1]+1; // that is, the ones we need smoothed versions of
2168 size_t ext[2];
2169 ext[0]=internal[0]+2*radius; // includes points we need as input
2170 ext[1]=internal[1]+2*radius; // for smoothing
2171
2172 // now we check to see if the radius is acceptable
2173 // That is, would not cross multiple ranks in MPI
2174
2175 if (2*radius>=internal[0]-4)
2176 {
2177 throw RipleyException("Radius of gaussian filter is too large for X dimension of a rank");
2178 }
2179 if (2*radius>=internal[1]-4)
2180 {
2181 throw RipleyException("Radius of gaussian filter is too large for Y dimension of a rank");
2182 }
2183
2184 double* src=new double[ext[0]*ext[1]*numvals];
2185 esysUtils::randomFillArray(seed, src, ext[0]*ext[1]*numvals);
2186
2187
2188
2189 #ifdef ESYS_MPI
2190 if ((internal[0]<5) || (internal[1]<5))
2191 {
2192 // since the dimensions are equal for all ranks, this exception
2193 // will be thrown on all ranks
2194 throw RipleyException("Random Data in Ripley requries at least five elements per side per rank.");
2195 }
2196 dim_t X=m_mpiInfo->rank%m_NX[0];
2197 dim_t Y=m_mpiInfo->rank%(m_NX[0]*m_NX[1])/m_NX[0];
2198 #endif
2199
2200 /*
2201 // if we wanted to test a repeating pattern
2202 size_t basex=0;
2203 size_t basey=0;
2204 #ifdef ESYS_MPI
2205 basex=X*m_gNE[0]/m_NX[0];
2206 basey=Y*m_gNE[1]/m_NX[1];
2207 #endif
2208
2209 esysUtils::patternFillArray2D(ext[0], ext[1], src, 4, basex, basey, numvals);
2210 */
2211
2212
2213 #ifdef ESYS_MPI
2214
2215 BlockGrid2 grid(m_NX[0]-1, m_NX[1]-1);
2216 size_t inset=2*radius+2; // Its +2 not +1 because a whole element is shared (and hence
2217 // there is an overlap of two points both of which need to have "radius" points on either side.
2218
2219 size_t xmidlen=ext[0]-2*inset; // how wide is the x-dimension between the two insets
2220 size_t ymidlen=ext[1]-2*inset;
2221
2222 Block2 block(ext[0], ext[1], inset, xmidlen, ymidlen, numvals);
2223
2224 MPI_Request reqs[40]; // a non-tight upper bound on how many we need
2225 MPI_Status stats[40];
2226 short rused=0;
2227
2228 messvec incoms;
2229 messvec outcoms;
2230
2231 grid.generateInNeighbours(X, Y, incoms);
2232 grid.generateOutNeighbours(X, Y, outcoms);
2233
2234 block.copyAllToBuffer(src);
2235
2236 int comserr=0;
2237 for (size_t i=0;i<incoms.size();++i)
2238 {
2239 message& m=incoms[i];
2240 comserr|=MPI_Irecv(block.getInBuffer(m.destbuffid), block.getBuffSize(m.destbuffid) , MPI_DOUBLE, m.sourceID, m.tag, m_mpiInfo->comm, reqs+(rused++));
2241 block.setUsed(m.destbuffid);
2242 }
2243
2244 for (size_t i=0;i<outcoms.size();++i)
2245 {
2246 message& m=outcoms[i];
2247 comserr|=MPI_Isend(block.getOutBuffer(m.srcbuffid), block.getBuffSize(m.srcbuffid) , MPI_DOUBLE, m.destID, m.tag, m_mpiInfo->comm, reqs+(rused++));
2248 }
2249
2250 if (!comserr)
2251 {
2252 comserr=MPI_Waitall(rused, reqs, stats);
2253 }
2254
2255 if (comserr)
2256 {
2257 // Yes this is throwing an exception as a result of an MPI error.
2258 // and no we don't inform the other ranks that we are doing this.
2259 // however, we have no reason to believe coms work at this point anyway
2260 throw RipleyException("Error in coms for randomFill");
2261 }
2262
2263 block.copyUsedFromBuffer(src);
2264
2265 #endif
2266
2267 if (radius==0 || numvals>1) // the truth of either should imply the truth of the other but let's be safe
2268 {
2269
2270 escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
2271 escript::Data resdat(0, shape, fs , true);
2272 // don't need to check for exwrite because we just made it
2273 escript::DataVector& dv=resdat.getExpandedVectorReference();
2274
2275
2276 // now we need to copy values over
2277 for (size_t y=0;y<(internal[1]);++y)
2278 {
2279 for (size_t x=0;x<(internal[0]);++x)
2280 {
2281 for (unsigned int i=0;i<numvals;++i)
2282 {
2283 dv[i+(x+y*(internal[0]))*numvals]=src[i+(x+y*ext[0])*numvals];
2284 }
2285 }
2286 }
2287 delete[] src;
2288 return resdat;
2289 }
2290 else // filter enabled
2291 {
2292 escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
2293 escript::Data resdat(0, escript::DataTypes::scalarShape, fs , true);
2294 // don't need to check for exwrite because we just made it
2295 escript::DataVector& dv=resdat.getExpandedVectorReference();
2296 double* convolution=get2DGauss(radius, sigma);
2297 for (size_t y=0;y<(internal[1]);++y)
2298 {
2299 for (size_t x=0;x<(internal[0]);++x)
2300 {
2301 dv[x+y*(internal[0])]=Convolve2D(convolution, src, x+radius, y+radius, radius, ext[0]);
2302
2303 }
2304 }
2305 delete[] convolution;
2306 delete[] src;
2307 return resdat;
2308 }
2309 }
2310
2311 int Rectangle::findNode(const double *coords) const {
2312 const int NOT_MINE = -1;
2313 //is the found element even owned by this rank
2314 // (inside owned or shared elements but will map to an owned element)
2315 for (int dim = 0; dim < m_numDim; dim++) {
2316 double min = m_origin[dim] + m_offset[dim]* m_dx[dim]
2317 - m_dx[dim]/2.; //allows for point outside mapping onto node
2318 double max = m_origin[dim] + (m_offset[dim] + m_NE[dim])*m_dx[dim]
2319 + m_dx[dim]/2.;
2320 if (min > coords[dim] || max < coords[dim]) {
2321 return NOT_MINE;
2322 }
2323 }
2324 // get distance from origin
2325 double x = coords[0] - m_origin[0];
2326 double y = coords[1] - m_origin[1];
2327 // distance in elements
2328 int ex = (int) floor(x / m_dx[0]);
2329 int ey = (int) floor(y / m_dx[1]);
2330 // set the min distance high enough to be outside the element plus a bit
2331 int closest = NOT_MINE;
2332 double minDist = 1;
2333 for (int dim = 0; dim < m_numDim; dim++) {
2334 minDist += m_dx[dim]*m_dx[dim];
2335 }
2336 //find the closest node
2337 for (int dx = 0; dx < 1; dx++) {
2338 double xdist = (x - (ex + dx)*m_dx[0]);
2339 for (int dy = 0; dy < 1; dy++) {
2340 double ydist = (y - (ey + dy)*m_dx[1]);
2341 double total = xdist*xdist + ydist*ydist;
2342 if (total < minDist) {
2343 closest = INDEX2(ex+dx-m_offset[0], ey+dy-m_offset[1], m_NE[0] + 1);
2344 minDist = total;
2345 }
2346 }
2347 }
2348 //if this happens, we've let a dirac point slip through, which is awful
2349 if (closest == NOT_MINE) {
2350 throw RipleyException("Unable to map appropriate dirac point to a node,"
2351 " implementation problem in Rectangle::findNode()");
2352 }
2353 return closest;
2354 }
2355
2356 void Rectangle::setAssembler(std::string type, std::map<std::string,
2357 escript::Data> constants) {
2358 if (type.compare("WaveAssembler") == 0) {
2359 if (assembler_type != WAVE_ASSEMBLER && assembler_type != DEFAULT_ASSEMBLER)
2360 throw RipleyException("Domain already using a different custom assembler");
2361 assembler_type = WAVE_ASSEMBLER;
2362 delete assembler;
2363 assembler = new WaveAssembler2D(this, m_dx, m_NX, m_NE, m_NN, constants);
2364 } else if (type.compare("LameAssembler") == 0) {
2365 if (assembler_type != LAME_ASSEMBLER && assembler_type != DEFAULT_ASSEMBLER)
2366 throw RipleyException("Domain already using a different custom assembler");
2367 assembler_type = LAME_ASSEMBLER;
2368 delete assembler;
2369 assembler = new LameAssembler2D(this, m_dx, m_NX, m_NE, m_NN);
2370 } else { //else ifs would go before this for other types
2371 throw RipleyException("Ripley::Rectangle does not support the"
2372 " requested assembler");
2373 }
2374 }
2375
2376 } // end of namespace ripley
2377

  ViewVC Help
Powered by ViewVC 1.1.26