/[escript]/trunk/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /trunk/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6651 - (show annotations)
Wed Feb 7 02:12:08 2018 UTC (21 months, 1 week ago) by jfenwick
File size: 110117 byte(s)
Make everyone sad by touching all the files

Copyright dates update

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2018 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <ripley/Rectangle.h>
18 #include <ripley/DefaultAssembler2D.h>
19 #include <ripley/LameAssembler2D.h>
20 #include <ripley/WaveAssembler2D.h>
21 #include <ripley/blocktools.h>
22 #include <ripley/domainhelpers.h>
23
24 #include <escript/FileWriter.h>
25 #include <escript/index.h>
26 #include <escript/Random.h>
27
28 #ifdef ESYS_HAVE_PASO
29 #include <paso/SystemMatrix.h>
30 #endif
31
32 #ifdef ESYS_HAVE_NETCDF
33 #ifdef NETCDF4
34 #include <ncVar.h>
35 #include <ncDim.h>
36 #include <escript/NCHelper.h>
37
38 #else
39 #include <netcdfcpp.h>
40 #endif
41 #endif
42
43
44
45 #ifdef ESYS_HAVE_SILO
46 #include <silo.h>
47 #ifdef ESYS_MPI
48 #include <pmpio.h>
49 #endif
50 #endif
51
52 #include <boost/math/special_functions/fpclassify.hpp> // for isnan
53 #include <boost/scoped_array.hpp>
54
55 #include <iomanip>
56 #include <limits>
57
58 namespace bp = boost::python;
59 namespace bm = boost::math;
60 using escript::AbstractSystemMatrix;
61 using escript::FileWriter;
62 using escript::IOError;
63 using escript::NotImplementedError;
64 using escript::ValueError;
65 using std::vector;
66 using std::string;
67 using std::min;
68 using std::max;
69 using std::copy;
70 using std::ios;
71 using std::fill;
72
73 #ifdef NETCDF4
74 using namespace netCDF;
75 #endif
76
77
78 namespace ripley {
79
80 Rectangle::Rectangle(dim_t n0, dim_t n1, double x0, double y0, double x1,
81 double y1, int d0, int d1,
82 const vector<double>& points,
83 const vector<int>& tags,
84 const TagMap& tagnamestonums,
85 escript::SubWorld_ptr w) :
86 RipleyDomain(2, w)
87 {
88 if (static_cast<long>(n0 + 1) * static_cast<long>(n1 + 1)
89 > std::numeric_limits<dim_t>::max())
90 throw RipleyException("The number of elements has overflowed, this "
91 "limit may be raised in future releases.");
92
93 if (n0 <= 0 || n1 <= 0)
94 throw ValueError("Number of elements in each spatial dimension "
95 "must be positive");
96
97 // ignore subdivision parameters for serial run
98 if (m_mpiInfo->size == 1) {
99 d0=1;
100 d1=1;
101 }
102
103 bool warn = false;
104 vector<int> factors;
105 int ranks = m_mpiInfo->size;
106 dim_t epr[2] = {n0,n1};
107 int d[2] = {d0,d1};
108 if (d0<=0 || d1<=0) {
109 for (int i = 0; i < 2; i++) {
110 if (d[i] < 1) {
111 d[i] = 1;
112 continue;
113 }
114 epr[i] = -1; // can no longer be max
115 //remove
116 if (ranks % d[i] != 0) {
117 throw ValueError("Invalid number of spatial subdivisions");
118 }
119 ranks /= d[i];
120 }
121 factorise(factors, ranks);
122 if (factors.size() != 0) {
123 warn = true;
124 }
125 }
126
127 while (factors.size() > 0) {
128 int i = epr[0] > epr[1] ? 0 : 1;
129 int f = factors.back();
130 factors.pop_back();
131 d[i] *= f;
132 epr[i] /= f;
133 }
134 d0 = d[0]; d1 = d[1];
135
136 // ensure number of subdivisions is valid and nodes can be distributed
137 // among number of ranks
138 if (d0*d1 != m_mpiInfo->size)
139 throw ValueError("Invalid number of spatial subdivisions");
140
141 if (warn) {
142 std::cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
143 << d1 << "). This may not be optimal!" << std::endl;
144 }
145
146 double l0 = x1-x0;
147 double l1 = y1-y0;
148 m_dx[0] = l0/n0;
149 m_dx[1] = l1/n1;
150
151 warn = false;
152 if ((n0+1)%d0 > 0) {
153 switch (getDecompositionPolicy()) {
154 case DECOMP_EXPAND:
155 l0 = m_dx[0]*n0; // fall through
156 case DECOMP_ADD_ELEMENTS:
157 n0 = (dim_t)round((float)(n0+1)/d0+0.5)*d0-1; // fall through
158 case DECOMP_STRICT:
159 warn = true;
160 break;
161 }
162 // reset spacing
163 m_dx[0] = l0/n0;
164 }
165 if ((n1+1)%d1 > 0) {
166 switch (getDecompositionPolicy()) {
167 case DECOMP_EXPAND:
168 l1 = m_dx[1]*n1; // fall through
169 case DECOMP_ADD_ELEMENTS:
170 n1 = (dim_t)round((float)(n1+1)/d1+0.5)*d1-1; // fall through
171 case DECOMP_STRICT:
172 warn = true;
173 break;
174 }
175 // reset spacing
176 m_dx[1] = l1/n1;
177 }
178
179 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2))
180 throw ValueError("Too few elements for the number of ranks");
181
182 if (warn) {
183 if (getDecompositionPolicy() == DECOMP_STRICT) {
184 throw ValueError("Unable to decompose domain to the number of "
185 "MPI ranks without adding elements and the policy "
186 "is set to STRICT. Use setDecompositionPolicy() "
187 "to allow adding elements.");
188 } else {
189 std::cout << "Warning: Domain setup has been adjusted as follows "
190 "to allow decomposition into " << m_mpiInfo->size
191 << " MPI ranks:" << std::endl
192 << " N0=" << n0 << ", l0=" << l0 << std::endl
193 << " N1=" << n1 << ", l1=" << l1 << std::endl;
194 }
195 }
196 m_gNE[0] = n0;
197 m_gNE[1] = n1;
198 m_origin[0] = x0;
199 m_origin[1] = y0;
200 m_length[0] = l0;
201 m_length[1] = l1;
202 m_NX[0] = d0;
203 m_NX[1] = d1;
204
205 // local number of elements (with and without overlap)
206 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
207 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
208 m_NE[0]++;
209 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
210 m_ownNE[0]--;
211
212 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
213 if (m_mpiInfo->rank/d0>0 && m_mpiInfo->rank/d0<d1-1)
214 m_NE[1]++;
215 else if (d1>1 && m_mpiInfo->rank/d0==d1-1)
216 m_ownNE[1]--;
217
218 // local number of nodes
219 m_NN[0] = m_NE[0]+1;
220 m_NN[1] = m_NE[1]+1;
221
222 // bottom-left node is at (offset0,offset1) in global mesh
223 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
224 if (m_offset[0] > 0)
225 m_offset[0]--;
226 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank/d0);
227 if (m_offset[1] > 0)
228 m_offset[1]--;
229
230 populateSampleIds();
231
232 for (TagMap::const_iterator i = tagnamestonums.begin();
233 i != tagnamestonums.end(); i++) {
234 setTagMap(i->first, i->second);
235 }
236 addPoints(points, tags);
237 }
238
239 Rectangle::~Rectangle()
240 {
241 }
242
243 string Rectangle::getDescription() const
244 {
245 return "ripley::Rectangle";
246 }
247
248 bool Rectangle::operator==(const escript::AbstractDomain& other) const
249 {
250 const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
251 if (o) {
252 return (RipleyDomain::operator==(other) &&
253 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1]
254 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1]
255 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1]
256 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1]);
257 }
258
259 return false;
260 }
261
262 #ifdef NETCDF4
263 void Rectangle::readNcGrid(escript::Data& out, string filename, string varname,
264 const ReaderParameters& params) const
265 {
266 #ifdef ESYS_HAVE_NETCDF
267 // check destination function space
268 dim_t myN0, myN1;
269 if (out.getFunctionSpace().getTypeCode() == Nodes) {
270 myN0 = m_NN[0];
271 myN1 = m_NN[1];
272 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
273 out.getFunctionSpace().getTypeCode() == ReducedElements) {
274 myN0 = m_NE[0];
275 myN1 = m_NE[1];
276 } else
277 throw ValueError("readNcGrid(): invalid function space for output data object");
278
279 if (params.first.size() != 2)
280 throw ValueError("readNcGrid(): argument 'first' must have 2 entries");
281
282 if (params.numValues.size() != 2)
283 throw ValueError("readNcGrid(): argument 'numValues' must have 2 entries");
284
285 if (params.multiplier.size() != 2)
286 throw ValueError("readNcGrid(): argument 'multiplier' must have 2 entries");
287 for (size_t i=0; i<params.multiplier.size(); i++)
288 if (params.multiplier[i]<1)
289 throw ValueError("readNcGrid(): all multipliers must be positive");
290 if (params.reverse.size() != 2)
291 throw ValueError("readNcGrid(): argument 'reverse' must have 2 entries");
292
293 NcFile f;
294 if (!escript::openNcFile(f, filename))
295 {
296 throw RipleyException("readNcGrid(): cannot open file");
297 }
298
299 NcVar var = f.getVar(varname.c_str());
300 if (var.isNull())
301 throw RipleyException("readNcGrid(): invalid variable name");
302
303 // TODO: rank>0 data support
304 const int numComp = out.getDataPointSize();
305 if (numComp > 1)
306 throw NotImplementedError("readNcGrid(): only scalar data supported");
307
308 const int dims = var.getDimCount();
309 vector<long> edges(dims);
310 std::vector< NcDim > vard=var.getDims();
311 for (size_t i=0;i<vard.size();++i)
312 {
313 edges[i]=vard[i].getSize();
314 }
315
316 // is this a slice of the data object (dims!=2)?
317 // note the expected ordering of edges (as in numpy: y,x)
318 if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1]))
319 || (dims==1 && params.numValues[1]>1) ) {
320 throw IOError("readNcGrid(): not enough data in file");
321 }
322
323 // check if this rank contributes anything
324 if (params.first[0] >= m_offset[0]+myN0 ||
325 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
326 params.first[1] >= m_offset[1]+myN1 ||
327 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1])
328 return;
329
330 // now determine how much this rank has to write
331
332 // first coordinates in data object to write to
333 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
334 const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
335 // indices to first value in file (not accounting for reverse yet)
336 dim_t idx0 = max(dim_t(0), m_offset[0]-params.first[0]);
337 dim_t idx1 = max(dim_t(0), m_offset[1]-params.first[1]);
338 // number of values to read
339 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
340 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
341
342 // make sure we read the right block if going backwards through file
343 if (params.reverse[0])
344 idx0 = edges[dims-1]-num0-idx0;
345 if (dims>1 && params.reverse[1])
346 idx1 = edges[dims-2]-num1-idx1;
347
348 vector<double> values(num0*num1);
349 vector<size_t> startindex;
350 vector<size_t> counts;
351 if (dims==2) {
352 // var->set_cur(idx1, idx0);
353 // var->get(&values[0], num1, num0);
354 startindex.push_back(idx1);
355 startindex.push_back(idx0);
356 counts.push_back(num1);
357 counts.push_back(num0);
358 var.getVar(startindex, counts, &values[0]);
359 } else {
360 // var->set_cur(idx0);
361 // var->get(&values[0], num0);
362 startindex.push_back(idx0);
363 counts.push_back(num0);
364 var.getVar(startindex, counts, &values[0]);
365 }
366
367 const int dpp = out.getNumDataPointsPerSample();
368 out.requireWrite();
369
370 // helpers for reversing
371 const dim_t x0 = (params.reverse[0] ? num0-1 : 0);
372 const int x_mult = (params.reverse[0] ? -1 : 1);
373 const dim_t y0 = (params.reverse[1] ? num1-1 : 0);
374 const int y_mult = (params.reverse[1] ? -1 : 1);
375
376 for (index_t y=0; y<num1; y++) {
377 #pragma omp parallel for
378 for (index_t x=0; x<num0; x++) {
379 const dim_t baseIndex = first0+x*params.multiplier[0]
380 +(first1+y*params.multiplier[1])*myN0;
381 const dim_t srcIndex = (y0+y_mult*y)*num0+(x0+x_mult*x);
382 if (!bm::isnan(values[srcIndex])) {
383 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
384 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
385 const dim_t dataIndex = baseIndex+m0+m1*myN0;
386 double* dest = out.getSampleDataRW(dataIndex);
387 for (index_t q=0; q<dpp; q++) {
388 *dest++ = values[srcIndex];
389 }
390 }
391 }
392 }
393 }
394 }
395 #else
396 throw RipleyException("readNcGrid(): not compiled with netCDF support");
397 #endif
398 }
399
400 #else
401
402 void Rectangle::readNcGrid(escript::Data& out, string filename, string varname,
403 const ReaderParameters& params) const
404 {
405 #ifdef ESYS_HAVE_NETCDF
406 // check destination function space
407 dim_t myN0, myN1;
408 if (out.getFunctionSpace().getTypeCode() == Nodes) {
409 myN0 = m_NN[0];
410 myN1 = m_NN[1];
411 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
412 out.getFunctionSpace().getTypeCode() == ReducedElements) {
413 myN0 = m_NE[0];
414 myN1 = m_NE[1];
415 } else
416 throw ValueError("readNcGrid(): invalid function space for output data object");
417
418 if (params.first.size() != 2)
419 throw ValueError("readNcGrid(): argument 'first' must have 2 entries");
420
421 if (params.numValues.size() != 2)
422 throw ValueError("readNcGrid(): argument 'numValues' must have 2 entries");
423
424 if (params.multiplier.size() != 2)
425 throw ValueError("readNcGrid(): argument 'multiplier' must have 2 entries");
426 for (size_t i=0; i<params.multiplier.size(); i++)
427 if (params.multiplier[i]<1)
428 throw ValueError("readNcGrid(): all multipliers must be positive");
429 if (params.reverse.size() != 2)
430 throw ValueError("readNcGrid(): argument 'reverse' must have 2 entries");
431
432 // check file existence and size
433 NcFile f(filename.c_str(), NcFile::ReadOnly);
434 if (!f.is_valid())
435 throw IOError("readNcGrid(): cannot open file");
436
437 NcVar* var = f.get_var(varname.c_str());
438 if (!var)
439 throw IOError("readNcGrid(): invalid variable");
440
441 // TODO: rank>0 data support
442 const int numComp = out.getDataPointSize();
443 if (numComp > 1)
444 throw NotImplementedError("readNcGrid(): only scalar data supported");
445
446 const int dims = var->num_dims();
447 boost::scoped_array<long> edges(var->edges());
448
449 // is this a slice of the data object (dims!=2)?
450 // note the expected ordering of edges (as in numpy: y,x)
451 if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1]))
452 || (dims==1 && params.numValues[1]>1) ) {
453 throw IOError("readNcGrid(): not enough data in file");
454 }
455
456 // check if this rank contributes anything
457 if (params.first[0] >= m_offset[0]+myN0 ||
458 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
459 params.first[1] >= m_offset[1]+myN1 ||
460 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1])
461 return;
462
463 // now determine how much this rank has to write
464
465 // first coordinates in data object to write to
466 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
467 const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
468 // indices to first value in file (not accounting for reverse yet)
469 dim_t idx0 = max(dim_t(0), m_offset[0]-params.first[0]);
470 dim_t idx1 = max(dim_t(0), m_offset[1]-params.first[1]);
471 // number of values to read
472 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
473 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
474
475 // make sure we read the right block if going backwards through file
476 if (params.reverse[0])
477 idx0 = edges[dims-1]-num0-idx0;
478 if (dims>1 && params.reverse[1])
479 idx1 = edges[dims-2]-num1-idx1;
480
481 vector<double> values(num0*num1);
482 if (dims==2) {
483 var->set_cur(idx1, idx0);
484 var->get(&values[0], num1, num0);
485 } else {
486 var->set_cur(idx0);
487 var->get(&values[0], num0);
488 }
489
490 const int dpp = out.getNumDataPointsPerSample();
491 out.requireWrite();
492
493 // helpers for reversing
494 const dim_t x0 = (params.reverse[0] ? num0-1 : 0);
495 const int x_mult = (params.reverse[0] ? -1 : 1);
496 const dim_t y0 = (params.reverse[1] ? num1-1 : 0);
497 const int y_mult = (params.reverse[1] ? -1 : 1);
498
499 for (index_t y=0; y<num1; y++) {
500 #pragma omp parallel for
501 for (index_t x=0; x<num0; x++) {
502 const dim_t baseIndex = first0+x*params.multiplier[0]
503 +(first1+y*params.multiplier[1])*myN0;
504 const dim_t srcIndex = (y0+y_mult*y)*num0+(x0+x_mult*x);
505 if (!bm::isnan(values[srcIndex])) {
506 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
507 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
508 const dim_t dataIndex = baseIndex+m0+m1*myN0;
509 double* dest = out.getSampleDataRW(dataIndex);
510 for (index_t q=0; q<dpp; q++) {
511 *dest++ = values[srcIndex];
512 }
513 }
514 }
515 }
516 }
517 }
518 #else
519 throw RipleyException("readNcGrid(): not compiled with netCDF support");
520 #endif
521 }
522
523 #endif
524
525 void Rectangle::readBinaryGrid(escript::Data& out, string filename,
526 const ReaderParameters& params) const
527 {
528 // the mapping is not universally correct but should work on our
529 // supported platforms
530 switch (params.dataType) {
531 case DATATYPE_INT32:
532 readBinaryGridImpl<int>(out, filename, params);
533 break;
534 case DATATYPE_FLOAT32:
535 readBinaryGridImpl<float>(out, filename, params);
536 break;
537 case DATATYPE_FLOAT64:
538 readBinaryGridImpl<double>(out, filename, params);
539 break;
540 default:
541 throw ValueError("readBinaryGrid(): invalid or unsupported datatype");
542 }
543 }
544
545 void Rectangle::readBinaryGridFromZipped(escript::Data& out, string filename,
546 const ReaderParameters& params) const
547 {
548 #ifdef ESYS_HAVE_BOOST_IO
549 // the mapping is not universally correct but should work on our
550 // supported platforms
551 switch (params.dataType) {
552 case DATATYPE_INT32:
553 readBinaryGridZippedImpl<int>(out, filename, params);
554 break;
555 case DATATYPE_FLOAT32:
556 readBinaryGridZippedImpl<float>(out, filename, params);
557 break;
558 case DATATYPE_FLOAT64:
559 readBinaryGridZippedImpl<double>(out, filename, params);
560 break;
561 default:
562 throw ValueError("readBinaryGridFromZipped(): invalid or unsupported datatype");
563 }
564 #else
565 throw ValueError("readBinaryGridFromZipped(): not compiled with zip support");
566 #endif
567 }
568
569 template<typename ValueType>
570 void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename,
571 const ReaderParameters& params) const
572 {
573 // check destination function space
574 dim_t myN0, myN1;
575 if (out.getFunctionSpace().getTypeCode() == Nodes) {
576 myN0 = m_NN[0];
577 myN1 = m_NN[1];
578 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
579 out.getFunctionSpace().getTypeCode() == ReducedElements) {
580 myN0 = m_NE[0];
581 myN1 = m_NE[1];
582 } else
583 throw ValueError("readBinaryGrid(): invalid function space for output data object");
584
585 if (params.first.size() != 2)
586 throw ValueError("readBinaryGrid(): argument 'first' must have 2 entries");
587
588 if (params.numValues.size() != 2)
589 throw ValueError("readBinaryGrid(): argument 'numValues' must have 2 entries");
590
591 if (params.multiplier.size() != 2)
592 throw ValueError("readBinaryGrid(): argument 'multiplier' must have 2 entries");
593 for (size_t i=0; i<params.multiplier.size(); i++)
594 if (params.multiplier[i]<1)
595 throw ValueError("readBinaryGrid(): all multipliers must be positive");
596 if (params.reverse[0] != 0 || params.reverse[1] != 0)
597 throw NotImplementedError("readBinaryGrid(): reversing not supported yet");
598
599 // check file existence and size
600 std::ifstream f(filename.c_str(), std::ifstream::binary);
601 if (f.fail()) {
602 throw IOError("readBinaryGrid(): cannot open file " + filename);
603 }
604 f.seekg(0, ios::end);
605 const int numComp = out.getDataPointSize();
606 const dim_t filesize = f.tellg();
607 const dim_t reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
608 if (filesize < reqsize) {
609 f.close();
610 throw IOError("readBinaryGrid(): not enough data in file");
611 }
612
613 // check if this rank contributes anything
614 if (params.first[0] >= m_offset[0]+myN0 ||
615 params.first[0]+(params.numValues[0]*params.multiplier[0]) <= m_offset[0] ||
616 params.first[1] >= m_offset[1]+myN1 ||
617 params.first[1]+(params.numValues[1]*params.multiplier[1]) <= m_offset[1]) {
618 f.close();
619 return;
620 }
621
622 // now determine how much this rank has to write
623
624 // first coordinates in data object to write to
625 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
626 const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
627 // indices to first value in file
628 const dim_t idx0 = max(dim_t(0), (m_offset[0]/params.multiplier[0])-params.first[0]);
629 const dim_t idx1 = max(dim_t(0), (m_offset[1]/params.multiplier[1])-params.first[1]);
630 // if restX > 0 the first value in the respective dimension has been
631 // written restX times already in a previous rank so this rank only
632 // contributes (multiplier-rank) copies of that value
633 const dim_t rest0 = m_offset[0]%params.multiplier[0];
634 const dim_t rest1 = m_offset[1]%params.multiplier[1];
635 // number of values to read
636 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
637 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
638
639 out.requireWrite();
640 vector<ValueType> values(num0*numComp);
641 const int dpp = out.getNumDataPointsPerSample();
642
643 for (dim_t y=0; y<num1; y++) {
644 const dim_t fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
645 f.seekg(fileofs*sizeof(ValueType));
646 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
647 const dim_t m1limit = (y==0 ? params.multiplier[1]-rest1 : params.multiplier[1]);
648 dim_t dataYbase = first1+y*params.multiplier[1];
649 if (y>0)
650 dataYbase -= rest1;
651 for (dim_t x=0; x<num0; x++) {
652 const dim_t m0limit = (x==0 ? params.multiplier[0]-rest0 : params.multiplier[0]);
653 dim_t dataXbase = first0+x*params.multiplier[0];
654 if (x>0)
655 dataXbase -= rest0;
656 // write a block of mult0 x mult1 identical values into Data object
657 for (dim_t m1=0; m1<m1limit; m1++) {
658 const dim_t dataY = dataYbase+m1;
659 if (dataY >= myN1)
660 break;
661 for (dim_t m0=0; m0<m0limit; m0++) {
662 const dim_t dataX = dataXbase+m0;
663 if (dataX >= myN0)
664 break;
665 const dim_t dataIndex = dataX+dataY*myN0;
666 double* dest = out.getSampleDataRW(dataIndex);
667 for (int c=0; c<numComp; c++) {
668 ValueType val = values[x*numComp+c];
669
670 if (params.byteOrder != BYTEORDER_NATIVE) {
671 char* cval = reinterpret_cast<char*>(&val);
672 // this will alter val!!
673 if (sizeof(ValueType) > 4) {
674 byte_swap64(cval);
675 } else {
676 byte_swap32(cval);
677 }
678 }
679 if (!bm::isnan(val)) {
680 for (int q=0; q<dpp; q++) {
681 *dest++ = static_cast<double>(val);
682 }
683 }
684 }
685 }
686 }
687 }
688 }
689
690 f.close();
691 }
692
693 #ifdef ESYS_HAVE_BOOST_IO
694 template<typename ValueType>
695 void Rectangle::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
696 const ReaderParameters& params) const
697 {
698 // check destination function space
699 dim_t myN0, myN1;
700 if (out.getFunctionSpace().getTypeCode() == Nodes) {
701 myN0 = m_NN[0];
702 myN1 = m_NN[1];
703 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
704 out.getFunctionSpace().getTypeCode() == ReducedElements) {
705 myN0 = m_NE[0];
706 myN1 = m_NE[1];
707 } else
708 throw ValueError("readBinaryGrid(): invalid function space for output data object");
709
710 // check file existence and size
711 std::ifstream f(filename.c_str(), std::ifstream::binary);
712 if (f.fail()) {
713 throw IOError("readBinaryGridFromZipped(): cannot open file" + filename);
714 }
715 f.seekg(0, ios::end);
716 const int numComp = out.getDataPointSize();
717 dim_t filesize = f.tellg();
718 f.seekg(0, ios::beg);
719 vector<char> compressed(filesize);
720 f.read((char*)&compressed[0], filesize);
721 f.close();
722 vector<char> decompressed = unzip(compressed);
723 filesize = decompressed.size();
724 const dim_t reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
725 if (filesize < reqsize) {
726 throw IOError("readBinaryGridFromZipped(): not enough data in file");
727 }
728
729 // check if this rank contributes anything
730 if (params.first[0] >= m_offset[0]+myN0 ||
731 params.first[0]+params.numValues[0] <= m_offset[0] ||
732 params.first[1] >= m_offset[1]+myN1 ||
733 params.first[1]+params.numValues[1] <= m_offset[1]) {
734 f.close();
735 return;
736 }
737
738 // now determine how much this rank has to write
739
740 // first coordinates in data object to write to
741 const dim_t first0 = max(dim_t(0), params.first[0]-m_offset[0]);
742 const dim_t first1 = max(dim_t(0), params.first[1]-m_offset[1]);
743 // indices to first value in file
744 const dim_t idx0 = max(dim_t(0), m_offset[0]-params.first[0]);
745 const dim_t idx1 = max(dim_t(0), m_offset[1]-params.first[1]);
746 // number of values to read
747 const dim_t num0 = min(params.numValues[0]-idx0, myN0-first0);
748 const dim_t num1 = min(params.numValues[1]-idx1, myN1-first1);
749
750 out.requireWrite();
751 vector<ValueType> values(num0*numComp);
752 const int dpp = out.getNumDataPointsPerSample();
753
754 for (dim_t y=0; y<num1; y++) {
755 const dim_t fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
756 memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
757 for (dim_t x=0; x<num0; x++) {
758 const dim_t baseIndex = first0+x*params.multiplier[0]
759 +(first1+y*params.multiplier[1])*myN0;
760 for (int m1=0; m1<params.multiplier[1]; m1++) {
761 for (int m0=0; m0<params.multiplier[0]; m0++) {
762 const dim_t dataIndex = baseIndex+m0+m1*myN0;
763 double* dest = out.getSampleDataRW(dataIndex);
764 for (int c=0; c<numComp; c++) {
765 ValueType val = values[x*numComp+c];
766
767 if (params.byteOrder != BYTEORDER_NATIVE) {
768 char* cval = reinterpret_cast<char*>(&val);
769 // this will alter val!!
770 byte_swap32(cval);
771 }
772 if (!bm::isnan(val)) {
773 for (int q=0; q<dpp; q++) {
774 *dest++ = static_cast<double>(val);
775 }
776 }
777 }
778 }
779 }
780 }
781 }
782
783 f.close();
784 }
785 #endif
786
787 void Rectangle::writeBinaryGrid(const escript::Data& in, string filename,
788 int byteOrder, int dataType) const
789 {
790 // the mapping is not universally correct but should work on our
791 // supported platforms
792 switch (dataType) {
793 case DATATYPE_INT32:
794 writeBinaryGridImpl<int>(in, filename, byteOrder);
795 break;
796 case DATATYPE_FLOAT32:
797 writeBinaryGridImpl<float>(in, filename, byteOrder);
798 break;
799 case DATATYPE_FLOAT64:
800 writeBinaryGridImpl<double>(in, filename, byteOrder);
801 break;
802 default:
803 throw ValueError("writeBinaryGrid(): invalid or unsupported datatype");
804 }
805 }
806
807 template<typename ValueType>
808 void Rectangle::writeBinaryGridImpl(const escript::Data& in,
809 const string& filename, int byteOrder) const
810 {
811 // check function space and determine number of points
812 dim_t myN0, myN1;
813 dim_t totalN0, totalN1;
814 dim_t offset0, offset1;
815 if (in.getFunctionSpace().getTypeCode() == Nodes) {
816 myN0 = m_NN[0];
817 myN1 = m_NN[1];
818 totalN0 = m_gNE[0]+1;
819 totalN1 = m_gNE[1]+1;
820 offset0 = m_offset[0];
821 offset1 = m_offset[1];
822 } else if (in.getFunctionSpace().getTypeCode() == DegreesOfFreedom ||
823 in.getFunctionSpace().getTypeCode() == ReducedDegreesOfFreedom) {
824 myN0 = (m_gNE[0]+1)/m_NX[0];
825 myN1 = (m_gNE[1]+1)/m_NX[1];
826 totalN0 = m_gNE[0]+1;
827 totalN1 = m_gNE[1]+1;
828 offset0 = (m_offset[0]>0 ? m_offset[0]+1 : 0);
829 offset1 = (m_offset[1]>0 ? m_offset[1]+1 : 0);
830 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
831 in.getFunctionSpace().getTypeCode() == ReducedElements) {
832 myN0 = m_NE[0];
833 myN1 = m_NE[1];
834 totalN0 = m_gNE[0];
835 totalN1 = m_gNE[1];
836 offset0 = m_offset[0];
837 offset1 = m_offset[1];
838 } else
839 throw ValueError("writeBinaryGrid(): unsupported function space");
840
841 const int numComp = in.getDataPointSize();
842 const int dpp = in.getNumDataPointsPerSample();
843
844 if (numComp > 1 || dpp > 1)
845 throw NotImplementedError("writeBinaryGrid(): only scalar, single-value data supported");
846
847 const dim_t fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1;
848
849 // from here on we know that each sample consists of one value
850 FileWriter fw(m_mpiInfo->comm);
851 fw.openFile(filename, fileSize, true);
852 MPIBarrier();
853
854 for (index_t y=0; y<myN1; y++) {
855 const dim_t fileofs = (offset0+(offset1+y)*totalN0)*sizeof(ValueType);
856 std::ostringstream oss;
857
858 for (index_t x=0; x<myN0; x++) {
859 const double* sample = in.getSampleDataRO(y*myN0+x);
860 ValueType fvalue = static_cast<ValueType>(*sample);
861 if (byteOrder == BYTEORDER_NATIVE) {
862 oss.write((char*)&fvalue, sizeof(fvalue));
863 } else {
864 char* value = reinterpret_cast<char*>(&fvalue);
865 if (sizeof(fvalue)>4) {
866 byte_swap64(value);
867 } else {
868 byte_swap32(value);
869 }
870 oss.write(value, sizeof(fvalue));
871 }
872 }
873 fw.writeAt(oss, fileofs);
874 }
875 fw.close();
876 }
877
878 void Rectangle::write(const std::string& filename) const
879 {
880 throw NotImplementedError("write: not supported");
881 }
882
883 void Rectangle::dump(const string& fileName) const
884 {
885 #ifdef ESYS_HAVE_SILO
886 string fn(fileName);
887 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
888 fn+=".silo";
889 }
890
891 int driver=DB_HDF5;
892 DBfile* dbfile = NULL;
893 const char* blockDirFmt = "/block%04d";
894
895 #ifdef ESYS_MPI
896 PMPIO_baton_t* baton = NULL;
897 const int NUM_SILO_FILES = 1;
898 #endif
899
900 if (m_mpiInfo->size > 1) {
901 #ifdef ESYS_MPI
902 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
903 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
904 PMPIO_DefaultClose, (void*)&driver);
905 // try the fallback driver in case of error
906 if (!baton && driver != DB_PDB) {
907 driver = DB_PDB;
908 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
909 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
910 PMPIO_DefaultClose, (void*)&driver);
911 }
912 if (baton) {
913 char siloPath[64];
914 snprintf(siloPath, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
915 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath);
916 }
917 #endif
918 } else {
919 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
920 getDescription().c_str(), driver);
921 // try the fallback driver in case of error
922 if (!dbfile && driver != DB_PDB) {
923 driver = DB_PDB;
924 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
925 getDescription().c_str(), driver);
926 }
927 char siloPath[64];
928 snprintf(siloPath, 64, blockDirFmt, 0);
929 DBMkDir(dbfile, siloPath);
930 DBSetDir(dbfile, siloPath);
931 }
932
933 if (!dbfile)
934 throw IOError("dump: Could not create Silo file");
935
936 /*
937 if (driver==DB_HDF5) {
938 // gzip level 1 already provides good compression with minimal
939 // performance penalty. Some tests showed that gzip levels >3 performed
940 // rather badly on escript data both in terms of time and space
941 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
942 }
943 */
944
945 const dim_t NN0 = m_NN[0];
946 const dim_t NN1 = m_NN[1];
947 boost::scoped_ptr<double> x(new double[NN0]);
948 boost::scoped_ptr<double> y(new double[NN1]);
949 double* coords[2] = { x.get(), y.get() };
950 #pragma omp parallel
951 {
952 #pragma omp for nowait
953 for (dim_t i0 = 0; i0 < NN0; i0++) {
954 coords[0][i0]=getLocalCoordinate(i0, 0);
955 }
956 #pragma omp for nowait
957 for (dim_t i1 = 0; i1 < NN1; i1++) {
958 coords[1][i1]=getLocalCoordinate(i1, 1);
959 }
960 }
961
962 // casting to int!!
963 vector<int> dims(m_NN, m_NN+2);
964
965 // write mesh
966 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
967 DB_COLLINEAR, NULL);
968
969 // write node ids
970 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
971 NULL, 0, DB_INT, DB_NODECENT, NULL);
972
973 // write element ids
974 dims.assign(m_NE, m_NE+2);
975 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
976 &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
977
978 // rank 0 writes multimesh and multivar
979 if (m_mpiInfo->rank == 0) {
980 vector<string> tempstrings;
981 vector<char*> names;
982 for (dim_t i=0; i<m_mpiInfo->size; i++) {
983 std::stringstream path;
984 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/mesh";
985 tempstrings.push_back(path.str());
986 names.push_back((char*)tempstrings.back().c_str());
987 }
988 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
989 DBSetDir(dbfile, "/");
990 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
991 &types[0], NULL);
992 tempstrings.clear();
993 names.clear();
994 for (dim_t i=0; i<m_mpiInfo->size; i++) {
995 std::stringstream path;
996 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/nodeId";
997 tempstrings.push_back(path.str());
998 names.push_back((char*)tempstrings.back().c_str());
999 }
1000 types.assign(m_mpiInfo->size, DB_QUADVAR);
1001 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
1002 &types[0], NULL);
1003 tempstrings.clear();
1004 names.clear();
1005 for (dim_t i=0; i<m_mpiInfo->size; i++) {
1006 std::stringstream path;
1007 path << "/block" << std::setw(4) << std::setfill('0') << std::right << i << "/elementId";
1008 tempstrings.push_back(path.str());
1009 names.push_back((char*)tempstrings.back().c_str());
1010 }
1011 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
1012 &types[0], NULL);
1013 }
1014
1015 if (m_mpiInfo->size > 1) {
1016 #ifdef ESYS_MPI
1017 PMPIO_HandOffBaton(baton, dbfile);
1018 PMPIO_Finish(baton);
1019 #endif
1020 } else {
1021 DBClose(dbfile);
1022 }
1023
1024 #else // ESYS_HAVE_SILO
1025 throw RipleyException("dump: no Silo support");
1026 #endif
1027 }
1028
1029 const dim_t* Rectangle::borrowSampleReferenceIDs(int fsType) const
1030 {
1031 switch (fsType) {
1032 case Nodes:
1033 case ReducedNodes: // FIXME: reduced
1034 return &m_nodeId[0];
1035 case DegreesOfFreedom:
1036 case ReducedDegreesOfFreedom: // FIXME: reduced
1037 return &m_dofId[0];
1038 case Elements:
1039 case ReducedElements:
1040 return &m_elementId[0];
1041 case FaceElements:
1042 case ReducedFaceElements:
1043 return &m_faceId[0];
1044 case Points:
1045 return &m_diracPointNodeIDs[0];
1046 default:
1047 break;
1048 }
1049
1050 std::stringstream msg;
1051 msg << "borrowSampleReferenceIDs: invalid function space type " << fsType;
1052 throw ValueError(msg.str());
1053 }
1054
1055 bool Rectangle::ownSample(int fsType, index_t id) const
1056 {
1057 if (getMPISize()==1)
1058 return true;
1059
1060 switch (fsType) {
1061 case Nodes:
1062 case ReducedNodes: // FIXME: reduced
1063 return (m_dofMap[id] < getNumDOF());
1064 case DegreesOfFreedom:
1065 case ReducedDegreesOfFreedom:
1066 return true;
1067 case Elements:
1068 case ReducedElements:
1069 // check ownership of element's bottom left node
1070 return (m_dofMap[id%m_NE[0]+m_NN[0]*(id/m_NE[0])] < getNumDOF());
1071 case FaceElements:
1072 case ReducedFaceElements:
1073 {
1074 // determine which face the sample belongs to before
1075 // checking ownership of corresponding element's first node
1076 dim_t n=0;
1077 for (size_t i=0; i<4; i++) {
1078 n+=m_faceCount[i];
1079 if (id<n) {
1080 index_t k;
1081 if (i==1)
1082 k=m_NN[0]-2;
1083 else if (i==3)
1084 k=m_NN[0]*(m_NN[1]-2);
1085 else
1086 k=0;
1087 // determine whether to move right or up
1088 const index_t delta=(i/2==0 ? m_NN[0] : 1);
1089 return (m_dofMap[k+(id-n+m_faceCount[i])*delta] < getNumDOF());
1090 }
1091 }
1092 return false;
1093 }
1094 default:
1095 break;
1096 }
1097
1098 std::stringstream msg;
1099 msg << "ownSample: invalid function space type " << fsType;
1100 throw ValueError(msg.str());
1101 }
1102
1103 RankVector Rectangle::getOwnerVector(int fsType) const
1104 {
1105 RankVector owner;
1106 const int rank = m_mpiInfo->rank;
1107
1108 if (fsType == Elements || fsType == ReducedElements) {
1109 owner.assign(getNumElements(), rank);
1110 if (m_faceCount[0] == 0) {
1111 owner[0]=(m_faceCount[2]==0 ? rank-m_NX[0]-1 : rank-1);
1112 for (dim_t i=1; i<m_NE[1]; i++)
1113 owner[i*m_NE[0]] = rank-1;
1114 }
1115 if (m_faceCount[2]==0) {
1116 const int first=(m_faceCount[0]==0 ? 1 : 0);
1117 for (dim_t i=first; i<m_NE[0]; i++)
1118 owner[i] = rank-m_NX[0];
1119 }
1120
1121 } else if (fsType == FaceElements || fsType == ReducedFaceElements) {
1122 owner.assign(getNumFaceElements(), rank);
1123 if (m_faceCount[0] == 0) {
1124 if (m_faceCount[2] > 0)
1125 owner[m_faceCount[1]] = rank-1;
1126 if (m_faceCount[3] > 0)
1127 owner[m_faceCount[1]+m_faceCount[2]] = rank-1;
1128 }
1129 if (m_faceCount[2] == 0) {
1130 if (m_faceCount[0] > 0)
1131 owner[0] = rank-m_NX[0];
1132 if (m_faceCount[1] > 0)
1133 owner[m_faceCount[0]] = rank-m_NX[0];
1134 }
1135
1136 } else {
1137 throw ValueError("getOwnerVector: only valid for element types");
1138 }
1139
1140 return owner;
1141 }
1142
1143 void Rectangle::setToNormal(escript::Data& out) const
1144 {
1145 const dim_t NE0=m_NE[0];
1146 const dim_t NE1=m_NE[1];
1147 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1148 out.requireWrite();
1149 #pragma omp parallel
1150 {
1151 if (m_faceOffset[0] > -1) {
1152 #pragma omp for nowait
1153 for (index_t k1 = 0; k1 < NE1; ++k1) {
1154 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1155 // set vector at two quadrature points
1156 *o++ = -1.;
1157 *o++ = 0.;
1158 *o++ = -1.;
1159 *o = 0.;
1160 }
1161 }
1162
1163 if (m_faceOffset[1] > -1) {
1164 #pragma omp for nowait
1165 for (index_t k1 = 0; k1 < NE1; ++k1) {
1166 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1167 // set vector at two quadrature points
1168 *o++ = 1.;
1169 *o++ = 0.;
1170 *o++ = 1.;
1171 *o = 0.;
1172 }
1173 }
1174
1175 if (m_faceOffset[2] > -1) {
1176 #pragma omp for nowait
1177 for (index_t k0 = 0; k0 < NE0; ++k0) {
1178 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1179 // set vector at two quadrature points
1180 *o++ = 0.;
1181 *o++ = -1.;
1182 *o++ = 0.;
1183 *o = -1.;
1184 }
1185 }
1186
1187 if (m_faceOffset[3] > -1) {
1188 #pragma omp for nowait
1189 for (index_t k0 = 0; k0 < NE0; ++k0) {
1190 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1191 // set vector at two quadrature points
1192 *o++ = 0.;
1193 *o++ = 1.;
1194 *o++ = 0.;
1195 *o = 1.;
1196 }
1197 }
1198 } // end of parallel section
1199 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1200 out.requireWrite();
1201 #pragma omp parallel
1202 {
1203 if (m_faceOffset[0] > -1) {
1204 #pragma omp for nowait
1205 for (index_t k1 = 0; k1 < NE1; ++k1) {
1206 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1207 *o++ = -1.;
1208 *o = 0.;
1209 }
1210 }
1211
1212 if (m_faceOffset[1] > -1) {
1213 #pragma omp for nowait
1214 for (index_t k1 = 0; k1 < NE1; ++k1) {
1215 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1216 *o++ = 1.;
1217 *o = 0.;
1218 }
1219 }
1220
1221 if (m_faceOffset[2] > -1) {
1222 #pragma omp for nowait
1223 for (index_t k0 = 0; k0 < NE0; ++k0) {
1224 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1225 *o++ = 0.;
1226 *o = -1.;
1227 }
1228 }
1229
1230 if (m_faceOffset[3] > -1) {
1231 #pragma omp for nowait
1232 for (index_t k0 = 0; k0 < NE0; ++k0) {
1233 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1234 *o++ = 0.;
1235 *o = 1.;
1236 }
1237 }
1238 } // end of parallel section
1239
1240 } else {
1241 std::stringstream msg;
1242 msg << "setToNormal: invalid function space type "
1243 << out.getFunctionSpace().getTypeCode();
1244 throw ValueError(msg.str());
1245 }
1246 }
1247
1248 void Rectangle::setToSize(escript::Data& out) const
1249 {
1250 if (out.getFunctionSpace().getTypeCode() == Elements
1251 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
1252 out.requireWrite();
1253 const dim_t numQuad = out.getNumDataPointsPerSample();
1254 const dim_t numElements = getNumElements();
1255 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]);
1256 #pragma omp parallel for
1257 for (index_t k = 0; k < numElements; ++k) {
1258 double* o = out.getSampleDataRW(k);
1259 fill(o, o+numQuad, size);
1260 }
1261 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
1262 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1263 out.requireWrite();
1264 const dim_t numQuad=out.getNumDataPointsPerSample();
1265 const dim_t NE0 = m_NE[0];
1266 const dim_t NE1 = m_NE[1];
1267 #pragma omp parallel
1268 {
1269 if (m_faceOffset[0] > -1) {
1270 #pragma omp for nowait
1271 for (index_t k1 = 0; k1 < NE1; ++k1) {
1272 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1273 fill(o, o+numQuad, m_dx[1]);
1274 }
1275 }
1276
1277 if (m_faceOffset[1] > -1) {
1278 #pragma omp for nowait
1279 for (index_t k1 = 0; k1 < NE1; ++k1) {
1280 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1281 fill(o, o+numQuad, m_dx[1]);
1282 }
1283 }
1284
1285 if (m_faceOffset[2] > -1) {
1286 #pragma omp for nowait
1287 for (index_t k0 = 0; k0 < NE0; ++k0) {
1288 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1289 fill(o, o+numQuad, m_dx[0]);
1290 }
1291 }
1292
1293 if (m_faceOffset[3] > -1) {
1294 #pragma omp for nowait
1295 for (index_t k0 = 0; k0 < NE0; ++k0) {
1296 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1297 fill(o, o+numQuad, m_dx[0]);
1298 }
1299 }
1300 } // end of parallel section
1301
1302 } else {
1303 std::stringstream msg;
1304 msg << "setToSize: invalid function space type "
1305 << out.getFunctionSpace().getTypeCode();
1306 throw ValueError(msg.str());
1307 }
1308 }
1309
1310 void Rectangle::Print_Mesh_Info(const bool full) const
1311 {
1312 RipleyDomain::Print_Mesh_Info(full);
1313 if (full) {
1314 std::cout << " Id Coordinates" << std::endl;
1315 std::cout.precision(15);
1316 std::cout.setf(ios::scientific, ios::floatfield);
1317 for (index_t i=0; i < getNumNodes(); i++) {
1318 std::cout << " " << std::setw(5) << m_nodeId[i]
1319 << " " << getLocalCoordinate(i%m_NN[0], 0)
1320 << " " << getLocalCoordinate(i/m_NN[0], 1) << std::endl;
1321 }
1322 }
1323 }
1324
1325
1326 //protected
1327 void Rectangle::assembleCoordinates(escript::Data& arg) const
1328 {
1329 int numDim = m_numDim;
1330 if (!arg.isDataPointShapeEqual(1, &numDim))
1331 throw ValueError("setToX: Invalid Data object shape");
1332 if (!arg.numSamplesEqual(1, getNumNodes()))
1333 throw ValueError("setToX: Illegal number of samples in Data object");
1334
1335 const dim_t NN0 = m_NN[0];
1336 const dim_t NN1 = m_NN[1];
1337 arg.requireWrite();
1338 #pragma omp parallel for
1339 for (dim_t i1 = 0; i1 < NN1; i1++) {
1340 for (dim_t i0 = 0; i0 < NN0; i0++) {
1341 double* point = arg.getSampleDataRW(i0+NN0*i1);
1342 point[0] = getLocalCoordinate(i0, 0);
1343 point[1] = getLocalCoordinate(i1, 1);
1344 }
1345 }
1346 }
1347
1348 //protected
1349 void Rectangle::assembleGradient(escript::Data& out,
1350 const escript::Data& in) const
1351 {
1352 if (out.isComplex() && in.isComplex())
1353 assembleGradientImpl<cplx_t>(out, in);
1354 else if (!out.isComplex() && !in.isComplex())
1355 assembleGradientImpl<real_t>(out, in);
1356 else
1357 throw ValueError("Gradient: input & output complexity must match.");
1358 }
1359
1360 //protected
1361 template<typename Scalar>
1362 void Rectangle::assembleGradientImpl(escript::Data& out,
1363 const escript::Data& in) const
1364 {
1365 const dim_t numComp = in.getDataPointSize();
1366 const dim_t NE0 = m_NE[0];
1367 const dim_t NE1 = m_NE[1];
1368 const double cx0 = .21132486540518711775/m_dx[0];
1369 const double cx1 = .78867513459481288225/m_dx[0];
1370 const double cx2 = 1./m_dx[0];
1371 const double cy0 = .21132486540518711775/m_dx[1];
1372 const double cy1 = .78867513459481288225/m_dx[1];
1373 const double cy2 = 1./m_dx[1];
1374 const Scalar zero = static_cast<Scalar>(0);
1375
1376 if (out.getFunctionSpace().getTypeCode() == Elements) {
1377 out.requireWrite();
1378 #pragma omp parallel
1379 {
1380 vector<Scalar> f_00(numComp, zero);
1381 vector<Scalar> f_01(numComp, zero);
1382 vector<Scalar> f_10(numComp, zero);
1383 vector<Scalar> f_11(numComp, zero);
1384 #pragma omp for
1385 for (index_t k1 = 0; k1 < NE1; ++k1) {
1386 for (index_t k0 = 0; k0 < NE0; ++k0) {
1387 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0]), zero), numComp*sizeof(Scalar));
1388 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0]), zero), numComp*sizeof(Scalar));
1389 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0]), zero), numComp*sizeof(Scalar));
1390 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0]), zero), numComp*sizeof(Scalar));
1391 Scalar* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]), zero);
1392 for (index_t i = 0; i < numComp; ++i) {
1393 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1394 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1395 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1396 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1397 o[INDEX3(i,0,2,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1398 o[INDEX3(i,1,2,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1399 o[INDEX3(i,0,3,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1400 o[INDEX3(i,1,3,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1401 } // end of component loop i
1402 } // end of k0 loop
1403 } // end of k1 loop
1404 } // end of parallel section
1405 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
1406 out.requireWrite();
1407 #pragma omp parallel
1408 {
1409 vector<Scalar> f_00(numComp, zero);
1410 vector<Scalar> f_01(numComp, zero);
1411 vector<Scalar> f_10(numComp, zero);
1412 vector<Scalar> f_11(numComp, zero);
1413 #pragma omp for
1414 for (index_t k1 = 0; k1 < NE1; ++k1) {
1415 for (index_t k0 = 0; k0 < NE0; ++k0) {
1416 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0]), zero), numComp*sizeof(Scalar));
1417 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0]), zero), numComp*sizeof(Scalar));
1418 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0]), zero), numComp*sizeof(Scalar));
1419 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0]), zero), numComp*sizeof(Scalar));
1420 Scalar* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]), zero);
1421 for (index_t i = 0; i < numComp; ++i) {
1422 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2 * 0.5;
1423 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2 * 0.5;
1424 } // end of component loop i
1425 } // end of k0 loop
1426 } // end of k1 loop
1427 } // end of parallel section
1428 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1429 out.requireWrite();
1430 #pragma omp parallel
1431 {
1432 vector<Scalar> f_00(numComp, zero);
1433 vector<Scalar> f_01(numComp, zero);
1434 vector<Scalar> f_10(numComp, zero);
1435 vector<Scalar> f_11(numComp, zero);
1436 if (m_faceOffset[0] > -1) {
1437 #pragma omp for nowait
1438 for (index_t k1 = 0; k1 < NE1; ++k1) {
1439 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0]), zero), numComp*sizeof(Scalar));
1440 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0]), zero), numComp*sizeof(Scalar));
1441 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0]), zero), numComp*sizeof(Scalar));
1442 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0]), zero), numComp*sizeof(Scalar));
1443 Scalar* o = out.getSampleDataRW(m_faceOffset[0]+k1, zero);
1444 for (index_t i = 0; i < numComp; ++i) {
1445 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1446 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1447 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1448 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1449 } // end of component loop i
1450 } // end of k1 loop
1451 } // end of face 0
1452 if (m_faceOffset[1] > -1) {
1453 #pragma omp for nowait
1454 for (index_t k1 = 0; k1 < NE1; ++k1) {
1455 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0]), zero), numComp*sizeof(Scalar));
1456 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0]), zero), numComp*sizeof(Scalar));
1457 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0]), zero), numComp*sizeof(Scalar));
1458 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0]), zero), numComp*sizeof(Scalar));
1459 Scalar* o = out.getSampleDataRW(m_faceOffset[1]+k1, zero);
1460 for (index_t i = 0; i < numComp; ++i) {
1461 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1462 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1463 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1464 o[INDEX3(i,1,1,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1465 } // end of component loop i
1466 } // end of k1 loop
1467 } // end of face 1
1468 if (m_faceOffset[2] > -1) {
1469 #pragma omp for nowait
1470 for (index_t k0 = 0; k0 < NE0; ++k0) {
1471 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0]), zero), numComp*sizeof(Scalar));
1472 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0]), zero), numComp*sizeof(Scalar));
1473 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0]), zero), numComp*sizeof(Scalar));
1474 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0]), zero), numComp*sizeof(Scalar));
1475 Scalar* o = out.getSampleDataRW(m_faceOffset[2]+k0, zero);
1476 for (index_t i = 0; i < numComp; ++i) {
1477 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1478 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1479 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1480 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1481 } // end of component loop i
1482 } // end of k0 loop
1483 } // end of face 2
1484 if (m_faceOffset[3] > -1) {
1485 #pragma omp for nowait
1486 for (index_t k0 = 0; k0 < NE0; ++k0) {
1487 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0]), zero), numComp*sizeof(Scalar));
1488 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0]), zero), numComp*sizeof(Scalar));
1489 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0]), zero), numComp*sizeof(Scalar));
1490 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0]), zero), numComp*sizeof(Scalar));
1491 Scalar* o = out.getSampleDataRW(m_faceOffset[3]+k0, zero);
1492 for (index_t i = 0; i < numComp; ++i) {
1493 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1494 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1495 o[INDEX3(i,0,1,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1496 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1497 } // end of component loop i
1498 } // end of k0 loop
1499 } // end of face 3
1500 } // end of parallel section
1501
1502 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1503 out.requireWrite();
1504 #pragma omp parallel
1505 {
1506 vector<Scalar> f_00(numComp, zero);
1507 vector<Scalar> f_01(numComp, zero);
1508 vector<Scalar> f_10(numComp, zero);
1509 vector<Scalar> f_11(numComp, zero);
1510 if (m_faceOffset[0] > -1) {
1511 #pragma omp for nowait
1512 for (index_t k1 = 0; k1 < NE1; ++k1) {
1513 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0]), zero), numComp*sizeof(Scalar));
1514 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0]), zero), numComp*sizeof(Scalar));
1515 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0]), zero), numComp*sizeof(Scalar));
1516 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0]), zero), numComp*sizeof(Scalar));
1517 Scalar* o = out.getSampleDataRW(m_faceOffset[0]+k1, zero);
1518 for (index_t i = 0; i < numComp; ++i) {
1519 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2 * 0.5;
1520 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1521 } // end of component loop i
1522 } // end of k1 loop
1523 } // end of face 0
1524 if (m_faceOffset[1] > -1) {
1525 #pragma omp for nowait
1526 for (index_t k1 = 0; k1 < NE1; ++k1) {
1527 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0]), zero), numComp*sizeof(Scalar));
1528 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0]), zero), numComp*sizeof(Scalar));
1529 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0]), zero), numComp*sizeof(Scalar));
1530 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0]), zero), numComp*sizeof(Scalar));
1531 Scalar* o = out.getSampleDataRW(m_faceOffset[1]+k1, zero);
1532 for (index_t i = 0; i < numComp; ++i) {
1533 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2 * 0.5;
1534 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1535 } // end of component loop i
1536 } // end of k1 loop
1537 } // end of face 1
1538 if (m_faceOffset[2] > -1) {
1539 #pragma omp for nowait
1540 for (index_t k0 = 0; k0 < NE0; ++k0) {
1541 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0]), zero), numComp*sizeof(Scalar));
1542 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0]), zero), numComp*sizeof(Scalar));
1543 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0]), zero), numComp*sizeof(Scalar));
1544 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0]), zero), numComp*sizeof(Scalar));
1545 Scalar* o = out.getSampleDataRW(m_faceOffset[2]+k0, zero);
1546 for (index_t i = 0; i < numComp; ++i) {
1547 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1548 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2 * 0.5;
1549 } // end of component loop i
1550 } // end of k0 loop
1551 } // end of face 2
1552 if (m_faceOffset[3] > -1) {
1553 #pragma omp for nowait
1554 for (index_t k0 = 0; k0 < NE0; ++k0) {
1555 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0]), zero), numComp*sizeof(Scalar));
1556 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0]), zero), numComp*sizeof(Scalar));
1557 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0]), zero), numComp*sizeof(Scalar));
1558 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0]), zero), numComp*sizeof(Scalar));
1559 Scalar* o = out.getSampleDataRW(m_faceOffset[3]+k0, zero);
1560 for (index_t i = 0; i < numComp; ++i) {
1561 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1562 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2 * 0.5;
1563 } // end of component loop i
1564 } // end of k0 loop
1565 } // end of face 3
1566 } // end of parallel section
1567 }
1568 }
1569
1570 // instantiate our two supported versions
1571 template
1572 void Rectangle::assembleGradientImpl<real_t>(escript::Data& out,
1573 const escript::Data& in) const;
1574
1575 template
1576 void Rectangle::assembleGradientImpl<cplx_t>(escript::Data& out,
1577 const escript::Data& in) const;
1578
1579 //protected
1580 void Rectangle::assembleIntegrate(vector<real_t>& integrals,
1581 const escript::Data& arg) const
1582 {
1583 assembleIntegrateImpl<real_t>(integrals, arg);
1584 }
1585
1586 //protected
1587 void Rectangle::assembleIntegrate(vector<cplx_t>& integrals,
1588 const escript::Data& arg) const
1589 {
1590 assembleIntegrateImpl<cplx_t>(integrals, arg);
1591 }
1592
1593 //private
1594 template<typename Scalar>
1595 void Rectangle::assembleIntegrateImpl(vector<Scalar>& integrals,
1596 const escript::Data& arg) const
1597 {
1598 const dim_t numComp = arg.getDataPointSize();
1599 const index_t left = getFirstInDim(0);
1600 const index_t bottom = getFirstInDim(1);
1601 const int fs = arg.getFunctionSpace().getTypeCode();
1602 const Scalar zero = static_cast<Scalar>(0);
1603
1604 if (fs == Elements && arg.actsExpanded()) {
1605 #pragma omp parallel
1606 {
1607 vector<Scalar> int_local(numComp, zero);
1608 const real_t w = m_dx[0]*m_dx[1]/4.;
1609 #pragma omp for nowait
1610 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1611 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1612 const Scalar* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]), zero);
1613 for (index_t i = 0; i < numComp; ++i) {
1614 const Scalar f0 = f[INDEX2(i,0,numComp)];
1615 const Scalar f1 = f[INDEX2(i,1,numComp)];
1616 const Scalar f2 = f[INDEX2(i,2,numComp)];
1617 const Scalar f3 = f[INDEX2(i,3,numComp)];
1618 int_local[i] += (f0+f1+f2+f3)*w;
1619 } // end of component loop i
1620 } // end of k0 loop
1621 } // end of k1 loop
1622 #pragma omp critical
1623 for (index_t i=0; i<numComp; i++)
1624 integrals[i] += int_local[i];
1625 } // end of parallel section
1626
1627 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1628 const real_t w = m_dx[0]*m_dx[1];
1629 #pragma omp parallel
1630 {
1631 vector<Scalar> int_local(numComp, 0);
1632 #pragma omp for nowait
1633 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1634 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1635 const Scalar* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]), zero);
1636 for (index_t i = 0; i < numComp; ++i) {
1637 int_local[i] += f[i]*w;
1638 }
1639 }
1640 }
1641 #pragma omp critical
1642 for (index_t i = 0; i < numComp; i++)
1643 integrals[i] += int_local[i];
1644 } // end of parallel section
1645
1646 } else if (fs == FaceElements && arg.actsExpanded()) {
1647 #pragma omp parallel
1648 {
1649 vector<Scalar> int_local(numComp, zero);
1650 const real_t w0 = m_dx[0]/2.;
1651 const real_t w1 = m_dx[1]/2.;
1652 if (m_faceOffset[0] > -1) {
1653 #pragma omp for nowait
1654 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1655 const Scalar* f = arg.getSampleDataRO(m_faceOffset[0]+k1, zero);
1656 for (index_t i=0; i < numComp; ++i) {
1657 const Scalar f0 = f[INDEX2(i,0,numComp)];
1658 const Scalar f1 = f[INDEX2(i,1,numComp)];
1659 int_local[i] += (f0+f1)*w1;
1660 } // end of component loop i
1661 } // end of k1 loop
1662 }
1663
1664 if (m_faceOffset[1] > -1) {
1665 #pragma omp for nowait
1666 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1667 const Scalar* f = arg.getSampleDataRO(m_faceOffset[1]+k1, zero);
1668 for (index_t i = 0; i < numComp; ++i) {
1669 const Scalar f0 = f[INDEX2(i,0,numComp)];
1670 const Scalar f1 = f[INDEX2(i,1,numComp)];
1671 int_local[i] += (f0+f1)*w1;
1672 } // end of component loop i
1673 } // end of k1 loop
1674 }
1675
1676 if (m_faceOffset[2] > -1) {
1677 #pragma omp for nowait
1678 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1679 const Scalar* f = arg.getSampleDataRO(m_faceOffset[2]+k0, zero);
1680 for (index_t i = 0; i < numComp; ++i) {
1681 const Scalar f0 = f[INDEX2(i,0,numComp)];
1682 const Scalar f1 = f[INDEX2(i,1,numComp)];
1683 int_local[i] += (f0+f1)*w0;
1684 } // end of component loop i
1685 } // end of k0 loop
1686 }
1687
1688 if (m_faceOffset[3] > -1) {
1689 #pragma omp for nowait
1690 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1691 const Scalar* f = arg.getSampleDataRO(m_faceOffset[3]+k0, zero);
1692 for (index_t i = 0; i < numComp; ++i) {
1693 const Scalar f0 = f[INDEX2(i,0,numComp)];
1694 const Scalar f1 = f[INDEX2(i,1,numComp)];
1695 int_local[i] += (f0+f1)*w0;
1696 } // end of component loop i
1697 } // end of k0 loop
1698 }
1699 #pragma omp critical
1700 for (index_t i = 0; i < numComp; i++)
1701 integrals[i] += int_local[i];
1702 } // end of parallel section
1703
1704 } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded())) {
1705 #pragma omp parallel
1706 {
1707 vector<Scalar> int_local(numComp, 0);
1708 if (m_faceOffset[0] > -1) {
1709 #pragma omp for nowait
1710 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1711 const Scalar* f = arg.getSampleDataRO(m_faceOffset[0]+k1, zero);
1712 for (index_t i = 0; i < numComp; ++i) {
1713 int_local[i] += f[i]*m_dx[1];
1714 }
1715 }
1716 }
1717
1718 if (m_faceOffset[1] > -1) {
1719 #pragma omp for nowait
1720 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1721 const Scalar* f = arg.getSampleDataRO(m_faceOffset[1]+k1, zero);
1722 for (index_t i = 0; i < numComp; ++i) {
1723 int_local[i] += f[i]*m_dx[1];
1724 }
1725 }
1726 }
1727
1728 if (m_faceOffset[2] > -1) {
1729 #pragma omp for nowait
1730 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1731 const Scalar* f = arg.getSampleDataRO(m_faceOffset[2]+k0, zero);
1732 for (index_t i = 0; i < numComp; ++i) {
1733 int_local[i] += f[i]*m_dx[0];
1734 }
1735 }
1736 }
1737
1738 if (m_faceOffset[3] > -1) {
1739 #pragma omp for nowait
1740 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1741 const Scalar* f = arg.getSampleDataRO(m_faceOffset[3]+k0, zero);
1742 for (index_t i = 0; i < numComp; ++i) {
1743 int_local[i] += f[i]*m_dx[0];
1744 }
1745 }
1746 }
1747
1748 #pragma omp critical
1749 for (index_t i = 0; i < numComp; i++)
1750 integrals[i] += int_local[i];
1751 } // end of parallel section
1752 } // function space selector
1753 }
1754
1755 //protected
1756 IndexVector Rectangle::getDiagonalIndices(bool upperOnly) const
1757 {
1758 IndexVector ret;
1759 // only store non-negative indices if requested
1760 if (upperOnly)
1761 ret.resize(5);
1762 else
1763 ret.resize(9);
1764 const dim_t nDOF0 = getNumDOFInAxis(0);
1765 size_t idx = 0;
1766 for (int i1=-1; i1<2; i1++) {
1767 for (int i0=-1; i0<2; i0++) {
1768 const int index = i1*nDOF0 + i0;
1769 if (!upperOnly || index >= 0)
1770 ret[idx++] = index;
1771 }
1772 }
1773
1774 return ret;
1775 }
1776
1777 //protected
1778 void Rectangle::nodesToDOF(escript::Data& out, const escript::Data& in) const
1779 {
1780 const dim_t numComp = in.getDataPointSize();
1781 out.requireWrite();
1782
1783 const index_t left = getFirstInDim(0);
1784 const index_t bottom = getFirstInDim(1);
1785 const dim_t nDOF0 = getNumDOFInAxis(0);
1786 const dim_t nDOF1 = getNumDOFInAxis(1);
1787 #pragma omp parallel for
1788 for (index_t i=0; i<nDOF1; i++) {
1789 for (index_t j=0; j<nDOF0; j++) {
1790 const index_t n=j+left+(i+bottom)*m_NN[0];
1791 const double* src=in.getSampleDataRO(n);
1792 copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0));
1793 }
1794 }
1795 }
1796
1797 #ifdef ESYS_HAVE_TRILINOS
1798 //protected
1799 esys_trilinos::const_TrilinosGraph_ptr Rectangle::getTrilinosGraph() const
1800 {
1801 if (m_graph.is_null()) {
1802 m_graph = createTrilinosGraph(m_dofId, m_nodeId);
1803 }
1804 return m_graph;
1805 }
1806 #endif
1807
1808 #ifdef ESYS_HAVE_PASO
1809 //protected
1810 paso::SystemMatrixPattern_ptr Rectangle::getPasoMatrixPattern(
1811 bool reducedRowOrder,
1812 bool reducedColOrder) const
1813 {
1814 if (m_pattern.get())
1815 return m_pattern;
1816
1817 // first call - create pattern, then return
1818 paso::Connector_ptr conn(getPasoConnector());
1819 const dim_t numDOF = getNumDOF();
1820 const dim_t numShared = conn->send->numSharedComponents;
1821 const dim_t numNeighbours = conn->send->neighbour.size();
1822 const std::vector<index_t>& offsetInShared(conn->send->offsetInShared);
1823 const index_t* sendShared = conn->send->shared;
1824
1825 // these are for the couple blocks
1826 vector<IndexVector> colIndices(numDOF);
1827 vector<IndexVector> rowIndices(numShared);
1828
1829 for (dim_t i=0; i<numNeighbours; i++) {
1830 const dim_t start = offsetInShared[i];
1831 const dim_t end = offsetInShared[i+1];
1832 for (dim_t j = start; j < end; j++) {
1833 if (j > start)
1834 doublyLink(colIndices, rowIndices, sendShared[j-1], j);
1835 doublyLink(colIndices, rowIndices, sendShared[j], j);
1836 if (j < end-1)
1837 doublyLink(colIndices, rowIndices, sendShared[j+1], j);
1838 }
1839 }
1840 #pragma omp parallel for
1841 for (dim_t i = 0; i < numShared; i++) {
1842 sort(rowIndices[i].begin(), rowIndices[i].end());
1843 }
1844
1845 // create main and couple blocks
1846 paso::Pattern_ptr mainPattern = createPasoPattern(getConnections(), numDOF);
1847 paso::Pattern_ptr colPattern = createPasoPattern(colIndices, numShared);
1848 paso::Pattern_ptr rowPattern = createPasoPattern(rowIndices, numDOF);
1849
1850 // allocate paso distribution
1851 escript::Distribution_ptr distribution(new escript::Distribution(
1852 m_mpiInfo, m_nodeDistribution));
1853
1854 // finally create the system matrix pattern
1855 m_pattern.reset(new paso::SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
1856 distribution, distribution, mainPattern, colPattern, rowPattern,
1857 conn, conn));
1858 return m_pattern;
1859 }
1860 #endif // ESYS_HAVE_PASO
1861
1862 //private
1863 void Rectangle::populateSampleIds()
1864 {
1865 // degrees of freedom are numbered from left to right, bottom to top in
1866 // each rank, continuing on the next rank (ranks also go left-right,
1867 // bottom-top).
1868 // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which
1869 // helps when writing out data rank after rank.
1870
1871 // build node distribution vector first.
1872 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is
1873 // constant for all ranks in this implementation
1874 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1875 const dim_t numDOF=getNumDOF();
1876 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1877 m_nodeDistribution[k]=k*numDOF;
1878 }
1879 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1880 try {
1881 m_nodeId.resize(getNumNodes());
1882 m_dofId.resize(numDOF);
1883 m_elementId.resize(getNumElements());
1884 } catch (const std::length_error& le) {
1885 throw RipleyException("The system does not have sufficient memory for a domain of this size.");
1886 }
1887
1888 // populate face element counts
1889 //left
1890 if (m_offset[0]==0)
1891 m_faceCount[0]=m_NE[1];
1892 else
1893 m_faceCount[0]=0;
1894 //right
1895 if (m_mpiInfo->rank%m_NX[0]==m_NX[0]-1)
1896 m_faceCount[1]=m_NE[1];
1897 else
1898 m_faceCount[1]=0;
1899 //bottom
1900 if (m_offset[1]==0)
1901 m_faceCount[2]=m_NE[0];
1902 else
1903 m_faceCount[2]=0;
1904 //top
1905 if (m_mpiInfo->rank/m_NX[0]==m_NX[1]-1)
1906 m_faceCount[3]=m_NE[0];
1907 else
1908 m_faceCount[3]=0;
1909
1910 const dim_t NFE = getNumFaceElements();
1911 m_faceId.resize(NFE);
1912
1913 const index_t left = getFirstInDim(0);
1914 const index_t bottom = getFirstInDim(1);
1915 const dim_t nDOF0 = getNumDOFInAxis(0);
1916 const dim_t nDOF1 = getNumDOFInAxis(1);
1917 const dim_t NE0 = m_NE[0];
1918 const dim_t NE1 = m_NE[1];
1919
1920 #define globalNodeId(x,y) \
1921 ((m_offset[0]+x)/nDOF0)*nDOF0*nDOF1+(m_offset[0]+x)%nDOF0 \
1922 + ((m_offset[1]+y)/nDOF1)*nDOF0*nDOF1*m_NX[0]+((m_offset[1]+y)%nDOF1)*nDOF0
1923
1924 // set corner id's outside the parallel region
1925 m_nodeId[0] = globalNodeId(0, 0);
1926 m_nodeId[m_NN[0]-1] = globalNodeId(m_NN[0]-1, 0);
1927 m_nodeId[m_NN[0]*(m_NN[1]-1)] = globalNodeId(0, m_NN[1]-1);
1928 m_nodeId[m_NN[0]*m_NN[1]-1] = globalNodeId(m_NN[0]-1,m_NN[1]-1);
1929 #undef globalNodeId
1930
1931 #pragma omp parallel
1932 {
1933 // populate degrees of freedom and own nodes (identical id)
1934 #pragma omp for nowait
1935 for (dim_t i=0; i<nDOF1; i++) {
1936 for (dim_t j=0; j<nDOF0; j++) {
1937 const index_t nodeIdx=j+left+(i+bottom)*m_NN[0];
1938 const index_t dofIdx=j+i*nDOF0;
1939 m_dofId[dofIdx] = m_nodeId[nodeIdx]
1940 = m_nodeDistribution[m_mpiInfo->rank]+dofIdx;
1941 }
1942 }
1943
1944 // populate the rest of the nodes (shared with other ranks)
1945 if (m_faceCount[0]==0) { // left column
1946 #pragma omp for nowait
1947 for (dim_t i=0; i<nDOF1; i++) {
1948 const index_t nodeIdx=(i+bottom)*m_NN[0];
1949 const index_t dofId=(i+1)*nDOF0-1;
1950 m_nodeId[nodeIdx]
1951 = m_nodeDistribution[m_mpiInfo->rank-1]+dofId;
1952 }
1953 }
1954 if (m_faceCount[1]==0) { // right column
1955 #pragma omp for nowait
1956 for (dim_t i=0; i<nDOF1; i++) {
1957 const index_t nodeIdx=(i+bottom+1)*m_NN[0]-1;
1958 const index_t dofId=i*nDOF0;
1959 m_nodeId[nodeIdx]
1960 = m_nodeDistribution[m_mpiInfo->rank+1]+dofId;
1961 }
1962 }
1963 if (m_faceCount[2]==0) { // bottom row
1964 #pragma omp for nowait
1965 for (dim_t i=0; i<nDOF0; i++) {
1966 const index_t nodeIdx=i+left;
1967 const index_t dofId=nDOF0*(nDOF1-1)+i;
1968 m_nodeId[nodeIdx]
1969 = m_nodeDistribution[m_mpiInfo->rank-m_NX[0]]+dofId;
1970 }
1971 }
1972 if (m_faceCount[3]==0) { // top row
1973 #pragma omp for nowait
1974 for (dim_t i=0; i<nDOF0; i++) {
1975 const index_t nodeIdx=m_NN[0]*(m_NN[1]-1)+i+left;
1976 const index_t dofId=i;
1977 m_nodeId[nodeIdx]
1978 = m_nodeDistribution[m_mpiInfo->rank+m_NX[0]]+dofId;
1979 }
1980 }
1981
1982 // populate element id's
1983 #pragma omp for nowait
1984 for (dim_t i1=0; i1<NE1; i1++) {
1985 for (dim_t i0=0; i0<NE0; i0++) {
1986 m_elementId[i0+i1*NE0]=(m_offset[1]+i1)*m_gNE[0]+m_offset[0]+i0;
1987 }
1988 }
1989
1990 // face elements
1991 #pragma omp for
1992 for (dim_t k=0; k<NFE; k++)
1993 m_faceId[k]=k;
1994 } // end parallel section
1995
1996 m_nodeTags.assign(getNumNodes(), 0);
1997 updateTagsInUse(Nodes);
1998
1999 m_elementTags.assign(getNumElements(), 0);
2000 updateTagsInUse(Elements);
2001
2002 // generate face offset vector and set face tags
2003 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
2004 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
2005 m_faceOffset.assign(4, -1);
2006 m_faceTags.clear();
2007 index_t offset=0;
2008 for (size_t i=0; i<4; i++) {
2009 if (m_faceCount[i]>0) {
2010 m_faceOffset[i]=offset;
2011 offset+=m_faceCount[i];
2012 m_faceTags.insert(m_faceTags.end(), m_faceCount[i], faceTag[i]);
2013 }
2014 }
2015 setTagMap("left", LEFT);
2016 setTagMap("right", RIGHT);
2017 setTagMap("bottom", BOTTOM);
2018 setTagMap("top", TOP);
2019 updateTagsInUse(FaceElements);
2020
2021 populateDofMap();
2022 }
2023
2024 //private
2025 vector<IndexVector> Rectangle::getConnections(bool includeShared) const
2026 {
2027 // returns a vector v of size numDOF where v[i] is a vector with indices
2028 // of DOFs connected to i (up to 9 in 2D).
2029 // In other words this method returns the occupied (local) matrix columns
2030 // for all (local) matrix rows.
2031 // If includeShared==true then connections to non-owned DOFs are also
2032 // returned (i.e. indices of the column couplings)
2033 const dim_t nDOF0 = getNumDOFInAxis(0);
2034 const dim_t nDOF1 = getNumDOFInAxis(1);
2035 const dim_t numMatrixRows = nDOF0*nDOF1;
2036 vector<IndexVector> indices(numMatrixRows);
2037
2038 if (includeShared) {
2039 const index_t left = getFirstInDim(0);
2040 const index_t bottom = getFirstInDim(1);
2041 const dim_t NN0 = m_NN[0];
2042 const dim_t NN1 = m_NN[1];
2043 #pragma omp parallel for
2044 for (index_t i=0; i < numMatrixRows; i++) {
2045 const index_t x = left + i % nDOF0;
2046 const index_t y = bottom + i / nDOF0;
2047 // loop through potential neighbours and add to index if positions
2048 // are within bounds
2049 for (dim_t i1=y-1; i1<y+2; i1++) {
2050 for (dim_t i0=x-1; i0<x+2; i0++) {
2051 if (i0>=0 && i1>=0 && i0<NN0 && i1<NN1) {
2052 indices[i].push_back(m_dofMap[i1*NN0 + i0]);
2053 }
2054 }
2055 }
2056 sort(indices[i].begin(), indices[i].end());
2057 }
2058 } else {
2059 #pragma omp parallel for
2060 for (index_t i=0; i < numMatrixRows; i++) {
2061 const index_t x = i % nDOF0;
2062 const index_t y = i / nDOF0;
2063 // loop through potential neighbours and add to index if positions
2064 // are within bounds
2065 for (dim_t i1=y-1; i1<y+2; i1++) {
2066 for (dim_t i0=x-1; i0<x+2; i0++) {
2067 if (i0>=0 && i1>=0 && i0<nDOF0 && i1<nDOF1) {
2068 indices[i].push_back(i1*nDOF0 + i0);
2069 }
2070 }
2071 }
2072 }
2073 }
2074 return indices;
2075 }
2076
2077 //private
2078 void Rectangle::populateDofMap()
2079 {
2080 const dim_t nDOF0 = getNumDOFInAxis(0);
2081 const dim_t nDOF1 = getNumDOFInAxis(1);
2082 const index_t left = getFirstInDim(0);
2083 const index_t bottom = getFirstInDim(1);
2084
2085 // populate node->DOF mapping with own degrees of freedom.
2086 // The rest is assigned in the loop further down
2087 m_dofMap.assign(getNumNodes(), 0);
2088 #pragma omp parallel for
2089 for (index_t i=bottom; i<bottom+nDOF1; i++) {
2090 for (index_t j=left; j<left+nDOF0; j++) {
2091 m_dofMap[i*m_NN[0]+j]=(i-bottom)*nDOF0+j-left;
2092 }
2093 }
2094
2095 // build list of shared components and neighbours by looping through
2096 // all potential neighbouring ranks and checking if positions are
2097 // within bounds
2098 const dim_t numDOF=nDOF0*nDOF1;
2099 RankVector neighbour;
2100 IndexVector offsetInShared(1,0);
2101 IndexVector sendShared, recvShared;
2102 const int x=m_mpiInfo->rank%m_NX[0];
2103 const int y=m_mpiInfo->rank/m_NX[0];
2104 // numShared will contain the number of shared DOFs after the following
2105 // blocks
2106 dim_t numShared=0;
2107 // sharing bottom edge
2108 if (y > 0) {
2109 neighbour.push_back((y-1)*m_NX[0] + x);
2110 const dim_t num = nDOF0;
2111 offsetInShared.push_back(offsetInShared.back()+num);
2112 for (dim_t i=0; i<num; i++, numShared++) {
2113 sendShared.push_back(i);
2114 recvShared.push_back(numDOF+numShared);
2115 m_dofMap[left+i]=numDOF+numShared;
2116 }
2117 }
2118 // sharing top edge
2119 if (y < m_NX[1] - 1) {
2120 neighbour.push_back((y+1)*m_NX[0] + x);
2121 const dim_t num = nDOF0;
2122 offsetInShared.push_back(offsetInShared.back()+num);
2123 for (dim_t i=0; i<num; i++, numShared++) {
2124 sendShared.push_back(numDOF-num+i);
2125 recvShared.push_back(numDOF+numShared);
2126 m_dofMap[m_NN[0]*(m_NN[1]-1)+left+i]=numDOF+numShared;
2127 }
2128 }
2129 // sharing left edge
2130 if (x > 0) {
2131 neighbour.push_back(y*m_NX[0] + x-1);
2132 const dim_t num = nDOF1;
2133 offsetInShared.push_back(offsetInShared.back()+num);
2134 for (dim_t i=0; i<num; i++, numShared++) {
2135 sendShared.push_back(i*nDOF0);
2136 recvShared.push_back(numDOF+numShared);
2137 m_dofMap[(bottom+i)*m_NN[0]]=numDOF+numShared;
2138 }
2139 }
2140 // sharing right edge
2141 if (x < m_NX[0] - 1) {
2142 neighbour.push_back(y*m_NX[0] + x+1);
2143 const dim_t num = nDOF1;
2144 offsetInShared.push_back(offsetInShared.back()+num);
2145 for (dim_t i=0; i<num; i++, numShared++) {
2146 sendShared.push_back((i+1)*nDOF0-1);
2147 recvShared.push_back(numDOF+numShared);
2148 m_dofMap[(bottom+1+i)*m_NN[0]-1]=numDOF+numShared;
2149 }
2150 }
2151 // sharing bottom-left node
2152 if (x > 0 && y > 0) {
2153 neighbour.push_back((y-1)*m_NX[0] + x-1);
2154 // sharing a node
2155 offsetInShared.push_back(offsetInShared.back()+1);
2156 sendShared.push_back(0);
2157 recvShared.push_back(numDOF+numShared);
2158 m_dofMap[0]=numDOF+numShared;
2159 ++numShared;
2160 }
2161 // sharing top-left node
2162 if (x > 0 && y < m_NX[1]-1) {
2163 neighbour.push_back((y+1)*m_NX[0] + x-1);
2164 offsetInShared.push_back(offsetInShared.back()+1);
2165 sendShared.push_back(numDOF-nDOF0);
2166 recvShared.push_back(numDOF+numShared);
2167 m_dofMap[m_NN[0]*(m_NN[1]-1)]=numDOF+numShared;
2168 ++numShared;
2169 }
2170 // sharing bottom-right node
2171 if (x < m_NX[0]-1 && y > 0) {
2172 neighbour.push_back((y-1)*m_NX[0] + x+1);
2173 offsetInShared.push_back(offsetInShared.back()+1);
2174 sendShared.push_back(nDOF0-1);
2175 recvShared.push_back(numDOF+numShared);
2176 m_dofMap[m_NN[0]-1]=numDOF+numShared;
2177 ++numShared;
2178 }
2179 // sharing top-right node
2180 if (x < m_NX[0]-1 && y < m_NX[1]-1) {
2181 neighbour.push_back((y+1)*m_NX[0] + x+1);
2182 offsetInShared.push_back(offsetInShared.back()+1);
2183 sendShared.push_back(numDOF-1);
2184 recvShared.push_back(numDOF+numShared);
2185 m_dofMap[m_NN[0]*m_NN[1]-1]=numDOF+numShared;
2186 ++numShared;
2187 }
2188
2189 #ifdef ESYS_HAVE_PASO
2190 createPasoConnector(neighbour, offsetInShared, offsetInShared, sendShared,
2191 recvShared);
2192 #endif
2193
2194 // useful debug output
2195 /*
2196 std::cout << "--- rcv_shcomp ---" << std::endl;
2197 std::cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << std::endl;
2198 for (size_t i=0; i<neighbour.size(); i++) {
2199 std::cout << "neighbor[" << i << "]=" << neighbour[i]
2200 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << std::endl;
2201 }
2202 for (size_t i=0; i<recvShared.size(); i++) {
2203 std::cout << "shared[" << i << "]=" << recvShared[i] << std::endl;
2204 }
2205 std::cout << "--- snd_shcomp ---" << std::endl;
2206 for (size_t i=0; i<sendShared.size(); i++) {
2207 std::cout << "shared[" << i << "]=" << sendShared[i] << std::endl;
2208 }
2209 std::cout << "--- dofMap ---" << std::endl;
2210 for (size_t i=0; i<m_dofMap.size(); i++) {
2211 std::cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << std::endl;
2212 }
2213 */
2214 }
2215
2216 //private
2217 template<typename Scalar>
2218 void Rectangle::addToMatrixAndRHS(AbstractSystemMatrix* S, escript::Data& F,
2219 const vector<Scalar>& EM_S, const vector<Scalar>& EM_F, bool addS,
2220 bool addF, index_t firstNode, int nEq, int nComp) const
2221 {
2222 IndexVector rowIndex(4);
2223 rowIndex[0] = m_dofMap[firstNode];
2224 rowIndex[1] = m_dofMap[firstNode+1];
2225 rowIndex[2] = m_dofMap[firstNode+m_NN[0]];
2226 rowIndex[3] = m_dofMap[firstNode+m_NN[0]+1];
2227 if (addF) {
2228 Scalar* F_p = F.getSampleDataRW(0, static_cast<Scalar>(0));
2229 for (index_t i=0; i<rowIndex.size(); i++) {
2230 if (rowIndex[i]<getNumDOF()) {
2231 for (int eq=0; eq<nEq; eq++) {
2232 F_p[INDEX2(eq, rowIndex[i], nEq)]+=EM_F[INDEX2(eq,i,nEq)];
2233 }
2234 }
2235 }
2236 }
2237 if (addS) {
2238 addToSystemMatrix<Scalar>(S, rowIndex, nEq, EM_S);
2239 }
2240 }
2241
2242 template
2243 void Rectangle::addToMatrixAndRHS<real_t>(AbstractSystemMatrix* S, escript::Data& F,
2244 const vector<real_t>& EM_S, const vector<real_t>& EM_F, bool addS,
2245 bool addF, index_t firstNode, int nEq, int nComp) const;
2246
2247 template
2248 void Rectangle::addToMatrixAndRHS<cplx_t>(AbstractSystemMatrix* S, escript::Data& F,
2249 const vector<cplx_t>& EM_S, const vector<cplx_t>& EM_F, bool addS,
2250 bool addF, index_t firstNode, int nEq, int nComp) const;
2251
2252 //protected
2253 void Rectangle::interpolateNodesOnElements(escript::Data& out,
2254 const escript::Data& in,
2255 bool reduced) const
2256 {
2257 if (in.isComplex()!=out.isComplex())
2258 {
2259 throw RipleyException("Programmer Error: in and out parameters do not have the same complexity.");
2260 }
2261 if (in.isComplex())
2262 {
2263 interpolateNodesOnElementsWorker(out, in, reduced, escript::DataTypes::cplx_t(0));
2264 }
2265 else
2266 {
2267 interpolateNodesOnElementsWorker(out, in, reduced, escript::DataTypes::real_t(0));
2268 }
2269 }
2270
2271 //protected
2272 void Rectangle::interpolateNodesOnFaces(escript::Data& out,
2273 const escript::Data& in,
2274 bool reduced) const
2275 {
2276 if (in.isComplex()!=out.isComplex())
2277 {
2278 throw RipleyException("Programmer Error: in and out parameters do not have the same complexity.");
2279 }
2280 if (in.isComplex())
2281 {
2282 interpolateNodesOnFacesWorker(out, in, reduced, escript::DataTypes::cplx_t(0));
2283 }
2284 else
2285 {
2286 interpolateNodesOnFacesWorker(out, in, reduced, escript::DataTypes::real_t(0));
2287 }
2288 }
2289
2290 // private
2291 template <typename S>
2292 void Rectangle::interpolateNodesOnElementsWorker(escript::Data& out,
2293 const escript::Data& in,
2294 bool reduced, S sentinel) const
2295 {
2296 const dim_t numComp = in.getDataPointSize();
2297 const dim_t NE0 = m_NE[0];
2298 const dim_t NE1 = m_NE[1];
2299 if (reduced) {
2300 out.requireWrite();
2301 const S c0 = 0.25;
2302 #pragma omp parallel
2303 {
2304 vector<S> f_00(numComp);
2305 vector<S> f_01(numComp);
2306 vector<S> f_10(numComp);
2307 vector<S> f_11(numComp);
2308 #pragma omp for
2309 for (index_t k1=0; k1 < NE1; ++k1) {
2310 for (index_t k0=0; k0 < NE0; ++k0) {
2311 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0]), sentinel), numComp*sizeof(S));
2312 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0]), sentinel), numComp*sizeof(S));
2313 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0]), sentinel), numComp*sizeof(S));
2314 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0]), sentinel), numComp*sizeof(S));
2315 S* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]), sentinel);
2316 for (index_t i=0; i < numComp; ++i) {
2317 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
2318 } /* end of component loop i */
2319 } /* end of k0 loop */
2320 } /* end of k1 loop */
2321 } /* end of parallel section */
2322 } else {
2323 out.requireWrite();
2324 const S c0 = 0.16666666666666666667;
2325 const S c1 = 0.044658198738520451079;
2326 const S c2 = 0.62200846792814621559;
2327 #pragma omp parallel
2328 {
2329 vector<S> f_00(numComp);
2330 vector<S> f_01(numComp);
2331 vector<S> f_10(numComp);
2332 vector<S> f_11(numComp);
2333 #pragma omp for
2334 for (index_t k1=0; k1 < NE1; ++k1) {
2335 for (index_t k0=0; k0 < NE0; ++k0) {
2336 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0]), sentinel), numComp*sizeof(S));
2337 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0]), sentinel), numComp*sizeof(S));
2338 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0]), sentinel), numComp*sizeof(S));
2339 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0]), sentinel), numComp*sizeof(S));
2340 S* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]), sentinel);
2341 for (index_t i=0; i < numComp; ++i) {
2342 o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_10[i]) + c1*f_11[i] + c2*f_00[i];
2343 o[INDEX2(i,numComp,1)] = c0*(f_00[i] + f_11[i]) + c1*f_01[i] + c2*f_10[i];
2344 o[INDEX2(i,numComp,2)] = c0*(f_00[i] + f_11[i]) + c1*f_10[i] + c2*f_01[i];
2345 o[INDEX2(i,numComp,3)] = c0*(f_01[i] + f_10[i]) + c1*f_00[i] + c2*f_11[i];
2346 } /* end of component loop i */
2347 } /* end of k0 loop */
2348 } /* end of k1 loop */
2349 } /* end of parallel section */
2350 }
2351 }
2352
2353 //private
2354 template <typename S>
2355 void Rectangle::interpolateNodesOnFacesWorker(escript::Data& out,
2356 const escript::Data& in,
2357 bool reduced, S sentinel) const
2358 {
2359 const dim_t numComp = in.getDataPointSize();
2360 const dim_t NE0 = m_NE[0];
2361 const dim_t NE1 = m_NE[1];
2362 if (reduced) {
2363 out.requireWrite();
2364 #pragma omp parallel
2365 {
2366 vector<S> f_00(numComp);
2367 vector<S> f_01(numComp);
2368 vector<S> f_10(numComp);
2369 vector<S> f_11(numComp);
2370 if (m_faceOffset[0] > -1) {
2371 #pragma omp for nowait
2372 for (index_t k1=0; k1 < NE1; ++k1) {
2373 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0]), sentinel), numComp*sizeof(S));
2374 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0]), sentinel), numComp*sizeof(S));
2375 S* o = out.getSampleDataRW(m_faceOffset[0]+k1, sentinel);
2376 for (index_t i=0; i < numComp; ++i) {
2377 o[INDEX2(i,numComp,0)] = (f_00[i] + f_01[i])/static_cast<S>(2);
2378 } /* end of component loop i */
2379 } /* end of k1 loop */
2380 } /* end of face 0 */
2381 if (m_faceOffset[1] > -1) {
2382 #pragma omp for nowait
2383 for (index_t k1=0; k1 < NE1; ++k1) {
2384 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0]), sentinel), numComp*sizeof(S));
2385 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0]), sentinel), numComp*sizeof(S));
2386 S* o = out.getSampleDataRW(m_faceOffset[1]+k1, sentinel);
2387 for (index_t i=0; i < numComp; ++i) {
2388 o[INDEX2(i,numComp,0)] = (f_10[i] + f_11[i])/static_cast<S>(2);
2389 } /* end of component loop i */
2390 } /* end of k1 loop */
2391 } /* end of face 1 */
2392 if (m_faceOffset[2] > -1) {
2393 #pragma omp for nowait
2394 for (index_t k0=0; k0 < NE0; ++k0) {
2395 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0]), sentinel), numComp*sizeof(S));
2396 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0]), sentinel), numComp*sizeof(S));
2397 S* o = out.getSampleDataRW(m_faceOffset[2]+k0, sentinel);
2398 for (index_t i=0; i < numComp; ++i) {
2399 o[INDEX2(i,numComp,0)] = (f_00[i] + f_10[i])/static_cast<S>(2);
2400 } /* end of component loop i */
2401 } /* end of k0 loop */
2402 } /* end of face 2 */
2403 if (m_faceOffset[3] > -1) {
2404 #pragma omp for nowait
2405 for (index_t k0=0; k0 < NE0; ++k0) {
2406 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0]), sentinel), numComp*sizeof(S));
2407 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0]), sentinel), numComp*sizeof(S));
2408 S* o = out.getSampleDataRW(m_faceOffset[3]+k0, sentinel);
2409 for (index_t i=0; i < numComp; ++i) {
2410 o[INDEX2(i,numComp,0)] = (f_01[i] + f_11[i])/static_cast<S>(2);
2411 } /* end of component loop i */
2412 } /* end of k0 loop */
2413 } /* end of face 3 */
2414 } /* end of parallel section */
2415 } else {
2416 out.requireWrite();
2417 const S c0 = 0.21132486540518711775;
2418 const S c1 = 0.78867513459481288225;
2419 #pragma omp parallel
2420 {
2421 vector<S> f_00(numComp);
2422 vector<S> f_01(numComp);
2423 vector<S> f_10(numComp);
2424 vector<S> f_11(numComp);
2425 if (m_faceOffset[0] > -1) {
2426 #pragma omp for nowait
2427 for (index_t k1=0; k1 < NE1; ++k1) {
2428 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0]), sentinel), numComp*sizeof(S));
2429 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0