1 |
|
|
2 |
/***************************************************************************** |
/***************************************************************************** |
3 |
* |
* |
4 |
* Copyright (c) 2003-2013 by University of Queensland |
* Copyright (c) 2003-2014 by University of Queensland |
5 |
* http://www.uq.edu.au |
* http://www.uq.edu.au |
6 |
* |
* |
7 |
* Primary Business: Queensland, Australia |
* Primary Business: Queensland, Australia |
9 |
* http://www.opensource.org/licenses/osl-3.0.php |
* http://www.opensource.org/licenses/osl-3.0.php |
10 |
* |
* |
11 |
* Development until 2012 by Earth Systems Science Computational Center (ESSCC) |
* Development until 2012 by Earth Systems Science Computational Center (ESSCC) |
12 |
* Development since 2012 by School of Earth Sciences |
* Development 2012-2013 by School of Earth Sciences |
13 |
|
* Development from 2014 by Centre for Geoscience Computing (GeoComp) |
14 |
* |
* |
15 |
*****************************************************************************/ |
*****************************************************************************/ |
16 |
|
|
17 |
|
#include <algorithm> |
18 |
|
|
19 |
#include <ripley/Rectangle.h> |
#include <ripley/Rectangle.h> |
20 |
#include <paso/SystemMatrix.h> |
#include <paso/SystemMatrix.h> |
21 |
#include <esysUtils/esysFileWriter.h> |
#include <esysUtils/esysFileWriter.h> |
22 |
|
#include <ripley/DefaultAssembler2D.h> |
23 |
|
#include <ripley/WaveAssembler2D.h> |
24 |
|
#include <ripley/LameAssembler2D.h> |
25 |
|
#include <ripley/domainhelpers.h> |
26 |
|
#include <boost/scoped_array.hpp> |
27 |
|
#include "esysUtils/EsysRandom.h" |
28 |
|
#include "blocktools.h" |
29 |
|
|
30 |
#ifdef USE_NETCDF |
#ifdef USE_NETCDF |
31 |
#include <netcdfcpp.h> |
#include <netcdfcpp.h> |
46 |
namespace ripley { |
namespace ripley { |
47 |
|
|
48 |
Rectangle::Rectangle(int n0, int n1, double x0, double y0, double x1, |
Rectangle::Rectangle(int n0, int n1, double x0, double y0, double x1, |
49 |
double y1, int d0, int d1) : |
double y1, int d0, int d1, |
50 |
|
const std::vector<double>& points, |
51 |
|
const std::vector<int>& tags, |
52 |
|
const simap_t& tagnamestonums) : |
53 |
RipleyDomain(2) |
RipleyDomain(2) |
54 |
{ |
{ |
55 |
// ignore subdivision parameters for serial run |
// ignore subdivision parameters for serial run |
59 |
} |
} |
60 |
|
|
61 |
bool warn=false; |
bool warn=false; |
62 |
// if number of subdivisions is non-positive, try to subdivide by the same |
std::vector<int> factors; |
63 |
// ratio as the number of elements |
int ranks = m_mpiInfo->size; |
64 |
if (d0<=0 && d1<=0) { |
int epr[2] = {n0,n1}; |
65 |
warn=true; |
int d[2] = {d0,d1}; |
66 |
d0=max(1, (int)(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1)))); |
if (d0<=0 || d1<=0) { |
67 |
d1=m_mpiInfo->size/d0; |
for (int i = 0; i < 2; i++) { |
68 |
if (d0*d1 != m_mpiInfo->size) { |
if (d[i] < 1) { |
69 |
// ratios not the same so subdivide side with more elements only |
d[i] = 1; |
70 |
if (n0>n1) { |
continue; |
|
d0=0; |
|
|
d1=1; |
|
|
} else { |
|
|
d0=1; |
|
|
d1=0; |
|
71 |
} |
} |
72 |
} |
epr[i] = -1; // can no longer be max |
73 |
} |
//remove |
74 |
if (d0<=0) { |
if (ranks % d[i] != 0) { |
75 |
// d1 is preset, determine d0 - throw further down if result is no good |
throw RipleyException("Invalid number of spatial subdivisions"); |
76 |
d0=m_mpiInfo->size/d1; |
} |
77 |
} else if (d1<=0) { |
ranks /= d[i]; |
78 |
// d0 is preset, determine d1 - throw further down if result is no good |
} |
79 |
d1=m_mpiInfo->size/d0; |
factorise(factors, ranks); |
80 |
|
if (factors.size() != 0) { |
81 |
|
warn = true; |
82 |
|
} |
83 |
|
} |
84 |
|
|
85 |
|
while (factors.size() > 0) { |
86 |
|
int i = epr[0] > epr[1] ? 0 : 1; |
87 |
|
int f = factors.back(); |
88 |
|
factors.pop_back(); |
89 |
|
d[i] *= f; |
90 |
|
epr[i] /= f; |
91 |
} |
} |
92 |
|
d0 = d[0]; d1 = d[1]; |
93 |
|
|
94 |
// ensure number of subdivisions is valid and nodes can be distributed |
// ensure number of subdivisions is valid and nodes can be distributed |
95 |
// among number of ranks |
// among number of ranks |
96 |
if (d0*d1 != m_mpiInfo->size) |
if (d0*d1 != m_mpiInfo->size) |
158 |
|
|
159 |
populateSampleIds(); |
populateSampleIds(); |
160 |
createPattern(); |
createPattern(); |
161 |
|
assembler = new DefaultAssembler2D(this, m_dx, m_NX, m_NE, m_NN); |
162 |
|
for (map<string, int>::const_iterator i = tagnamestonums.begin(); |
163 |
|
i != tagnamestonums.end(); i++) { |
164 |
|
setTagMap(i->first, i->second); |
165 |
|
} |
166 |
|
addPoints(tags.size(), &points[0], &tags[0]); |
167 |
} |
} |
168 |
|
|
169 |
Rectangle::~Rectangle() |
Rectangle::~Rectangle() |
170 |
{ |
{ |
171 |
Paso_SystemMatrixPattern_free(m_pattern); |
Paso_SystemMatrixPattern_free(m_pattern); |
172 |
Paso_Connector_free(m_connector); |
Paso_Connector_free(m_connector); |
173 |
|
delete assembler; |
174 |
} |
} |
175 |
|
|
176 |
string Rectangle::getDescription() const |
string Rectangle::getDescription() const |
193 |
} |
} |
194 |
|
|
195 |
void Rectangle::readNcGrid(escript::Data& out, string filename, string varname, |
void Rectangle::readNcGrid(escript::Data& out, string filename, string varname, |
196 |
const vector<int>& first, const vector<int>& numValues, |
const ReaderParameters& params) const |
|
const vector<int>& multiplier) const |
|
197 |
{ |
{ |
198 |
#ifdef USE_NETCDF |
#ifdef USE_NETCDF |
199 |
// check destination function space |
// check destination function space |
208 |
} else |
} else |
209 |
throw RipleyException("readNcGrid(): invalid function space for output data object"); |
throw RipleyException("readNcGrid(): invalid function space for output data object"); |
210 |
|
|
211 |
if (first.size() != 2) |
if (params.first.size() != 2) |
212 |
throw RipleyException("readNcGrid(): argument 'first' must have 2 entries"); |
throw RipleyException("readNcGrid(): argument 'first' must have 2 entries"); |
213 |
|
|
214 |
if (numValues.size() != 2) |
if (params.numValues.size() != 2) |
215 |
throw RipleyException("readNcGrid(): argument 'numValues' must have 2 entries"); |
throw RipleyException("readNcGrid(): argument 'numValues' must have 2 entries"); |
216 |
|
|
217 |
if (multiplier.size() != 2) |
if (params.multiplier.size() != 2) |
218 |
throw RipleyException("readNcGrid(): argument 'multiplier' must have 2 entries"); |
throw RipleyException("readNcGrid(): argument 'multiplier' must have 2 entries"); |
219 |
for (size_t i=0; i<multiplier.size(); i++) |
for (size_t i=0; i<params.multiplier.size(); i++) |
220 |
if (multiplier[i]<1) |
if (params.multiplier[i]<1) |
221 |
throw RipleyException("readNcGrid(): all multipliers must be positive"); |
throw RipleyException("readNcGrid(): all multipliers must be positive"); |
222 |
|
if (params.reverse.size() != 2) |
223 |
|
throw RipleyException("readNcGrid(): argument 'reverse' must have 2 entries"); |
224 |
|
|
225 |
// check file existence and size |
// check file existence and size |
226 |
NcFile f(filename.c_str(), NcFile::ReadOnly); |
NcFile f(filename.c_str(), NcFile::ReadOnly); |
237 |
throw RipleyException("readNcGrid(): only scalar data supported"); |
throw RipleyException("readNcGrid(): only scalar data supported"); |
238 |
|
|
239 |
const int dims = var->num_dims(); |
const int dims = var->num_dims(); |
240 |
const long *edges = var->edges(); |
boost::scoped_array<long> edges(var->edges()); |
241 |
|
|
242 |
// is this a slice of the data object (dims!=2)? |
// is this a slice of the data object (dims!=2)? |
243 |
// note the expected ordering of edges (as in numpy: y,x) |
// note the expected ordering of edges (as in numpy: y,x) |
244 |
if ( (dims==2 && (numValues[1] > edges[0] || numValues[0] > edges[1])) |
if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1])) |
245 |
|| (dims==1 && numValues[1]>1) ) { |
|| (dims==1 && params.numValues[1]>1) ) { |
246 |
throw RipleyException("readNcGrid(): not enough data in file"); |
throw RipleyException("readNcGrid(): not enough data in file"); |
247 |
} |
} |
248 |
|
|
249 |
// check if this rank contributes anything |
// check if this rank contributes anything |
250 |
if (first[0] >= m_offset[0]+myN0 || first[0]+numValues[0]*multiplier[0] <= m_offset[0] || |
if (params.first[0] >= m_offset[0]+myN0 || |
251 |
first[1] >= m_offset[1]+myN1 || first[1]+numValues[1]*multiplier[1] <= m_offset[1]) |
params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] || |
252 |
|
params.first[1] >= m_offset[1]+myN1 || |
253 |
|
params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1]) |
254 |
return; |
return; |
255 |
|
|
256 |
// now determine how much this rank has to write |
// now determine how much this rank has to write |
257 |
|
|
258 |
// first coordinates in data object to write to |
// first coordinates in data object to write to |
259 |
const int first0 = max(0, first[0]-m_offset[0]); |
const int first0 = max(0, params.first[0]-m_offset[0]); |
260 |
const int first1 = max(0, first[1]-m_offset[1]); |
const int first1 = max(0, params.first[1]-m_offset[1]); |
261 |
// indices to first value in file |
// indices to first value in file (not accounting for reverse yet) |
262 |
const int idx0 = max(0, m_offset[0]-first[0]); |
int idx0 = max(0, m_offset[0]-params.first[0]); |
263 |
const int idx1 = max(0, m_offset[1]-first[1]); |
int idx1 = max(0, m_offset[1]-params.first[1]); |
264 |
// number of values to read |
// number of values to read |
265 |
const int num0 = min(numValues[0]-idx0, myN0-first0); |
const int num0 = min(params.numValues[0]-idx0, myN0-first0); |
266 |
const int num1 = min(numValues[1]-idx1, myN1-first1); |
const int num1 = min(params.numValues[1]-idx1, myN1-first1); |
267 |
|
|
268 |
|
// make sure we read the right block if going backwards through file |
269 |
|
if (params.reverse[0]) |
270 |
|
idx0 = edges[dims-1]-num0-idx0; |
271 |
|
if (dims>1 && params.reverse[1]) |
272 |
|
idx1 = edges[dims-2]-num1-idx1; |
273 |
|
|
274 |
vector<double> values(num0*num1); |
vector<double> values(num0*num1); |
275 |
if (dims==2) { |
if (dims==2) { |
283 |
const int dpp = out.getNumDataPointsPerSample(); |
const int dpp = out.getNumDataPointsPerSample(); |
284 |
out.requireWrite(); |
out.requireWrite(); |
285 |
|
|
286 |
|
// helpers for reversing |
287 |
|
const int x0 = (params.reverse[0] ? num0-1 : 0); |
288 |
|
const int x_mult = (params.reverse[0] ? -1 : 1); |
289 |
|
const int y0 = (params.reverse[1] ? num1-1 : 0); |
290 |
|
const int y_mult = (params.reverse[1] ? -1 : 1); |
291 |
|
|
292 |
for (index_t y=0; y<num1; y++) { |
for (index_t y=0; y<num1; y++) { |
293 |
#pragma omp parallel for |
#pragma omp parallel for |
294 |
for (index_t x=0; x<num0; x++) { |
for (index_t x=0; x<num0; x++) { |
295 |
const int baseIndex = first0+x*multiplier[0] |
const int baseIndex = first0+x*params.multiplier[0] |
296 |
+(first1+y*multiplier[1])*myN0; |
+(first1+y*params.multiplier[1])*myN0; |
297 |
const int srcIndex = y*num0+x; |
const int srcIndex = (y0+y_mult*y)*num0+(x0+x_mult*x); |
298 |
if (!isnan(values[srcIndex])) { |
if (!isnan(values[srcIndex])) { |
299 |
for (index_t m1=0; m1<multiplier[1]; m1++) { |
for (index_t m1=0; m1<params.multiplier[1]; m1++) { |
300 |
for (index_t m0=0; m0<multiplier[0]; m0++) { |
for (index_t m0=0; m0<params.multiplier[0]; m0++) { |
301 |
const int dataIndex = baseIndex+m0+m1*myN0; |
const int dataIndex = baseIndex+m0+m1*myN0; |
302 |
double* dest = out.getSampleDataRW(dataIndex); |
double* dest = out.getSampleDataRW(dataIndex); |
303 |
for (index_t q=0; q<dpp; q++) { |
for (index_t q=0; q<dpp; q++) { |
314 |
} |
} |
315 |
|
|
316 |
void Rectangle::readBinaryGrid(escript::Data& out, string filename, |
void Rectangle::readBinaryGrid(escript::Data& out, string filename, |
317 |
const vector<int>& first, |
const ReaderParameters& params) const |
318 |
const vector<int>& numValues, |
{ |
319 |
const vector<int>& multiplier) const |
// the mapping is not universally correct but should work on our |
320 |
|
// supported platforms |
321 |
|
switch (params.dataType) { |
322 |
|
case DATATYPE_INT32: |
323 |
|
readBinaryGridImpl<int>(out, filename, params); |
324 |
|
break; |
325 |
|
case DATATYPE_FLOAT32: |
326 |
|
readBinaryGridImpl<float>(out, filename, params); |
327 |
|
break; |
328 |
|
case DATATYPE_FLOAT64: |
329 |
|
readBinaryGridImpl<double>(out, filename, params); |
330 |
|
break; |
331 |
|
default: |
332 |
|
throw RipleyException("readBinaryGrid(): invalid or unsupported datatype"); |
333 |
|
} |
334 |
|
} |
335 |
|
|
336 |
|
void Rectangle::readBinaryGridFromZipped(escript::Data& out, string filename, |
337 |
|
const ReaderParameters& params) const |
338 |
|
{ |
339 |
|
// the mapping is not universally correct but should work on our |
340 |
|
// supported platforms |
341 |
|
switch (params.dataType) { |
342 |
|
case DATATYPE_INT32: |
343 |
|
readBinaryGridZippedImpl<int>(out, filename, params); |
344 |
|
break; |
345 |
|
case DATATYPE_FLOAT32: |
346 |
|
readBinaryGridZippedImpl<float>(out, filename, params); |
347 |
|
break; |
348 |
|
case DATATYPE_FLOAT64: |
349 |
|
readBinaryGridZippedImpl<double>(out, filename, params); |
350 |
|
break; |
351 |
|
default: |
352 |
|
throw RipleyException("readBinaryGridFromZipped(): invalid or unsupported datatype"); |
353 |
|
} |
354 |
|
} |
355 |
|
|
356 |
|
template<typename ValueType> |
357 |
|
void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename, |
358 |
|
const ReaderParameters& params) const |
359 |
{ |
{ |
360 |
// check destination function space |
// check destination function space |
361 |
int myN0, myN1; |
int myN0, myN1; |
377 |
f.seekg(0, ios::end); |
f.seekg(0, ios::end); |
378 |
const int numComp = out.getDataPointSize(); |
const int numComp = out.getDataPointSize(); |
379 |
const int filesize = f.tellg(); |
const int filesize = f.tellg(); |
380 |
const int reqsize = numValues[0]*numValues[1]*numComp*sizeof(float); |
const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType); |
381 |
if (filesize < reqsize) { |
if (filesize < reqsize) { |
382 |
f.close(); |
f.close(); |
383 |
throw RipleyException("readBinaryGrid(): not enough data in file"); |
throw RipleyException("readBinaryGrid(): not enough data in file"); |
384 |
} |
} |
385 |
|
|
386 |
// check if this rank contributes anything |
// check if this rank contributes anything |
387 |
if (first[0] >= m_offset[0]+myN0 || first[0]+numValues[0] <= m_offset[0] || |
if (params.first[0] >= m_offset[0]+myN0 || |
388 |
first[1] >= m_offset[1]+myN1 || first[1]+numValues[1] <= m_offset[1]) { |
params.first[0]+params.numValues[0] <= m_offset[0] || |
389 |
|
params.first[1] >= m_offset[1]+myN1 || |
390 |
|
params.first[1]+params.numValues[1] <= m_offset[1]) { |
391 |
f.close(); |
f.close(); |
392 |
return; |
return; |
393 |
} |
} |
395 |
// now determine how much this rank has to write |
// now determine how much this rank has to write |
396 |
|
|
397 |
// first coordinates in data object to write to |
// first coordinates in data object to write to |
398 |
const int first0 = max(0, first[0]-m_offset[0]); |
const int first0 = max(0, params.first[0]-m_offset[0]); |
399 |
const int first1 = max(0, first[1]-m_offset[1]); |
const int first1 = max(0, params.first[1]-m_offset[1]); |
400 |
// indices to first value in file |
// indices to first value in file |
401 |
const int idx0 = max(0, m_offset[0]-first[0]); |
const int idx0 = max(0, m_offset[0]-params.first[0]); |
402 |
const int idx1 = max(0, m_offset[1]-first[1]); |
const int idx1 = max(0, m_offset[1]-params.first[1]); |
403 |
// number of values to read |
// number of values to read |
404 |
const int num0 = min(numValues[0]-idx0, myN0-first0); |
const int num0 = min(params.numValues[0]-idx0, myN0-first0); |
405 |
const int num1 = min(numValues[1]-idx1, myN1-first1); |
const int num1 = min(params.numValues[1]-idx1, myN1-first1); |
406 |
|
|
407 |
out.requireWrite(); |
out.requireWrite(); |
408 |
vector<float> values(num0*numComp); |
vector<ValueType> values(num0*numComp); |
409 |
const int dpp = out.getNumDataPointsPerSample(); |
const int dpp = out.getNumDataPointsPerSample(); |
410 |
|
|
411 |
for (index_t y=0; y<num1; y++) { |
for (int y=0; y<num1; y++) { |
412 |
const int fileofs = numComp*(idx0+(idx1+y)*numValues[0]); |
const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]); |
413 |
f.seekg(fileofs*sizeof(float)); |
f.seekg(fileofs*sizeof(ValueType)); |
414 |
f.read((char*)&values[0], num0*numComp*sizeof(float)); |
f.read((char*)&values[0], num0*numComp*sizeof(ValueType)); |
415 |
for (index_t x=0; x<num0; x++) { |
for (int x=0; x<num0; x++) { |
416 |
const int baseIndex = first0+x*multiplier[0] |
const int baseIndex = first0+x*params.multiplier[0] |
417 |
+(first1+y*multiplier[1])*myN0; |
+(first1+y*params.multiplier[1])*myN0; |
418 |
for (index_t m1=0; m1<multiplier[1]; m1++) { |
for (int m1=0; m1<params.multiplier[1]; m1++) { |
419 |
for (index_t m0=0; m0<multiplier[0]; m0++) { |
for (int m0=0; m0<params.multiplier[0]; m0++) { |
420 |
|
const int dataIndex = baseIndex+m0+m1*myN0; |
421 |
|
double* dest = out.getSampleDataRW(dataIndex); |
422 |
|
for (int c=0; c<numComp; c++) { |
423 |
|
ValueType val = values[x*numComp+c]; |
424 |
|
|
425 |
|
if (params.byteOrder != BYTEORDER_NATIVE) { |
426 |
|
char* cval = reinterpret_cast<char*>(&val); |
427 |
|
// this will alter val!! |
428 |
|
byte_swap32(cval); |
429 |
|
} |
430 |
|
if (!std::isnan(val)) { |
431 |
|
for (int q=0; q<dpp; q++) { |
432 |
|
*dest++ = static_cast<double>(val); |
433 |
|
} |
434 |
|
} |
435 |
|
} |
436 |
|
} |
437 |
|
} |
438 |
|
} |
439 |
|
} |
440 |
|
|
441 |
|
f.close(); |
442 |
|
} |
443 |
|
|
444 |
|
template<typename ValueType> |
445 |
|
void Rectangle::readBinaryGridZippedImpl(escript::Data& out, const string& filename, |
446 |
|
const ReaderParameters& params) const |
447 |
|
{ |
448 |
|
// check destination function space |
449 |
|
int myN0, myN1; |
450 |
|
if (out.getFunctionSpace().getTypeCode() == Nodes) { |
451 |
|
myN0 = m_NN[0]; |
452 |
|
myN1 = m_NN[1]; |
453 |
|
} else if (out.getFunctionSpace().getTypeCode() == Elements || |
454 |
|
out.getFunctionSpace().getTypeCode() == ReducedElements) { |
455 |
|
myN0 = m_NE[0]; |
456 |
|
myN1 = m_NE[1]; |
457 |
|
} else |
458 |
|
throw RipleyException("readBinaryGrid(): invalid function space for output data object"); |
459 |
|
|
460 |
|
// check file existence and size |
461 |
|
ifstream f(filename.c_str(), ifstream::binary); |
462 |
|
if (f.fail()) { |
463 |
|
throw RipleyException("readBinaryGridFromZipped(): cannot open file"); |
464 |
|
} |
465 |
|
f.seekg(0, ios::end); |
466 |
|
const int numComp = out.getDataPointSize(); |
467 |
|
int filesize = f.tellg(); |
468 |
|
f.seekg(0, ios::beg); |
469 |
|
std::vector<char> compressed(filesize); |
470 |
|
f.read((char*)&compressed[0], filesize); |
471 |
|
f.close(); |
472 |
|
std::vector<char> decompressed = unzip(compressed); |
473 |
|
filesize = decompressed.size(); |
474 |
|
const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType); |
475 |
|
if (filesize < reqsize) { |
476 |
|
throw RipleyException("readBinaryGridFromZipped(): not enough data in file"); |
477 |
|
} |
478 |
|
|
479 |
|
// check if this rank contributes anything |
480 |
|
if (params.first[0] >= m_offset[0]+myN0 || |
481 |
|
params.first[0]+params.numValues[0] <= m_offset[0] || |
482 |
|
params.first[1] >= m_offset[1]+myN1 || |
483 |
|
params.first[1]+params.numValues[1] <= m_offset[1]) { |
484 |
|
f.close(); |
485 |
|
return; |
486 |
|
} |
487 |
|
|
488 |
|
// now determine how much this rank has to write |
489 |
|
|
490 |
|
// first coordinates in data object to write to |
491 |
|
const int first0 = max(0, params.first[0]-m_offset[0]); |
492 |
|
const int first1 = max(0, params.first[1]-m_offset[1]); |
493 |
|
// indices to first value in file |
494 |
|
const int idx0 = max(0, m_offset[0]-params.first[0]); |
495 |
|
const int idx1 = max(0, m_offset[1]-params.first[1]); |
496 |
|
// number of values to read |
497 |
|
const int num0 = min(params.numValues[0]-idx0, myN0-first0); |
498 |
|
const int num1 = min(params.numValues[1]-idx1, myN1-first1); |
499 |
|
|
500 |
|
out.requireWrite(); |
501 |
|
vector<ValueType> values(num0*numComp); |
502 |
|
const int dpp = out.getNumDataPointsPerSample(); |
503 |
|
|
504 |
|
for (int y=0; y<num1; y++) { |
505 |
|
const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]); |
506 |
|
memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType)); |
507 |
|
for (int x=0; x<num0; x++) { |
508 |
|
const int baseIndex = first0+x*params.multiplier[0] |
509 |
|
+(first1+y*params.multiplier[1])*myN0; |
510 |
|
for (int m1=0; m1<params.multiplier[1]; m1++) { |
511 |
|
for (int m0=0; m0<params.multiplier[0]; m0++) { |
512 |
const int dataIndex = baseIndex+m0+m1*myN0; |
const int dataIndex = baseIndex+m0+m1*myN0; |
513 |
double* dest = out.getSampleDataRW(dataIndex); |
double* dest = out.getSampleDataRW(dataIndex); |
514 |
for (index_t c=0; c<numComp; c++) { |
for (int c=0; c<numComp; c++) { |
515 |
if (!isnan(values[x*numComp+c])) { |
ValueType val = values[x*numComp+c]; |
516 |
for (index_t q=0; q<dpp; q++) { |
|
517 |
*dest++ = static_cast<double>(values[x*numComp+c]); |
if (params.byteOrder != BYTEORDER_NATIVE) { |
518 |
|
char* cval = reinterpret_cast<char*>(&val); |
519 |
|
// this will alter val!! |
520 |
|
byte_swap32(cval); |
521 |
|
} |
522 |
|
if (!std::isnan(val)) { |
523 |
|
for (int q=0; q<dpp; q++) { |
524 |
|
*dest++ = static_cast<double>(val); |
525 |
} |
} |
526 |
} |
} |
527 |
} |
} |
533 |
f.close(); |
f.close(); |
534 |
} |
} |
535 |
|
|
536 |
void Rectangle::writeBinaryGrid(const escript::Data& in, string filename, int byteOrder) const |
void Rectangle::writeBinaryGrid(const escript::Data& in, string filename, |
537 |
|
int byteOrder, int dataType) const |
538 |
|
{ |
539 |
|
// the mapping is not universally correct but should work on our |
540 |
|
// supported platforms |
541 |
|
switch (dataType) { |
542 |
|
case DATATYPE_INT32: |
543 |
|
writeBinaryGridImpl<int>(in, filename, byteOrder); |
544 |
|
break; |
545 |
|
case DATATYPE_FLOAT32: |
546 |
|
writeBinaryGridImpl<float>(in, filename, byteOrder); |
547 |
|
break; |
548 |
|
case DATATYPE_FLOAT64: |
549 |
|
writeBinaryGridImpl<double>(in, filename, byteOrder); |
550 |
|
break; |
551 |
|
default: |
552 |
|
throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype"); |
553 |
|
} |
554 |
|
} |
555 |
|
|
556 |
|
template<typename ValueType> |
557 |
|
void Rectangle::writeBinaryGridImpl(const escript::Data& in, |
558 |
|
const string& filename, int byteOrder) const |
559 |
{ |
{ |
560 |
// check function space and determine number of points |
// check function space and determine number of points |
561 |
int myN0, myN1; |
int myN0, myN1; |
576 |
|
|
577 |
const int numComp = in.getDataPointSize(); |
const int numComp = in.getDataPointSize(); |
578 |
const int dpp = in.getNumDataPointsPerSample(); |
const int dpp = in.getNumDataPointsPerSample(); |
|
const int fileSize = sizeof(float)*numComp*dpp*totalN0*totalN1; |
|
579 |
|
|
580 |
if (numComp > 1 || dpp > 1) |
if (numComp > 1 || dpp > 1) |
581 |
throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported"); |
throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported"); |
582 |
|
|
583 |
escript::Data* _in = const_cast<escript::Data*>(&in); |
const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1; |
584 |
|
|
585 |
// from here on we know that each sample consists of one value |
// from here on we know that each sample consists of one value |
586 |
FileWriter* fw = new FileWriter(); |
FileWriter fw; |
587 |
fw->openFile(filename, fileSize); |
fw.openFile(filename, fileSize); |
588 |
MPIBarrier(); |
MPIBarrier(); |
589 |
|
|
590 |
for (index_t y=0; y<myN1; y++) { |
for (index_t y=0; y<myN1; y++) { |
591 |
const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0)*sizeof(float); |
const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0)*sizeof(ValueType); |
592 |
ostringstream oss; |
ostringstream oss; |
593 |
|
|
594 |
for (index_t x=0; x<myN0; x++) { |
for (index_t x=0; x<myN0; x++) { |
595 |
const double* sample = _in->getSampleDataRO(y*myN0+x); |
const double* sample = in.getSampleDataRO(y*myN0+x); |
596 |
float fvalue = (float)(*sample); |
ValueType fvalue = static_cast<ValueType>(*sample); |
597 |
if (byteOrder == RIPLEY_BYTE_ORDER) { |
if (byteOrder == BYTEORDER_NATIVE) { |
598 |
oss.write((char*)&fvalue, sizeof(fvalue)); |
oss.write((char*)&fvalue, sizeof(fvalue)); |
599 |
} else { |
} else { |
600 |
char* value = reinterpret_cast<char*>(&fvalue); |
char* value = reinterpret_cast<char*>(&fvalue); |
601 |
oss.write(RIPLEY_BYTE_SWAP32(value), sizeof(fvalue)); |
oss.write(byte_swap32(value), sizeof(fvalue)); |
602 |
} |
} |
603 |
} |
} |
604 |
fw->writeAt(oss, fileofs); |
fw.writeAt(oss, fileofs); |
605 |
} |
} |
606 |
fw->close(); |
fw.close(); |
607 |
} |
} |
608 |
|
|
609 |
void Rectangle::dump(const string& fileName) const |
void Rectangle::dump(const string& fileName) const |
763 |
case FaceElements: |
case FaceElements: |
764 |
case ReducedFaceElements: |
case ReducedFaceElements: |
765 |
return &m_faceId[0]; |
return &m_faceId[0]; |
766 |
|
case Points: |
767 |
|
return &m_diracPointNodeIDs[0]; |
768 |
default: |
default: |
769 |
break; |
break; |
770 |
} |
} |
1022 |
} |
} |
1023 |
|
|
1024 |
//protected |
//protected |
1025 |
void Rectangle::assembleGradient(escript::Data& out, escript::Data& in) const |
void Rectangle::assembleGradient(escript::Data& out, const escript::Data& in) const |
1026 |
{ |
{ |
1027 |
const dim_t numComp = in.getDataPointSize(); |
const dim_t numComp = in.getDataPointSize(); |
1028 |
const double cx0 = -1./m_dx[0]; |
const double cx0 = .21132486540518711775/m_dx[0]; |
1029 |
const double cx1 = -.78867513459481288225/m_dx[0]; |
const double cx1 = .78867513459481288225/m_dx[0]; |
1030 |
const double cx2 = -.5/m_dx[0]; |
const double cx2 = 1./m_dx[0]; |
1031 |
const double cx3 = -.21132486540518711775/m_dx[0]; |
const double cy0 = .21132486540518711775/m_dx[1]; |
1032 |
const double cx4 = .21132486540518711775/m_dx[0]; |
const double cy1 = .78867513459481288225/m_dx[1]; |
1033 |
const double cx5 = .5/m_dx[0]; |
const double cy2 = 1./m_dx[1]; |
|
const double cx6 = .78867513459481288225/m_dx[0]; |
|
|
const double cx7 = 1./m_dx[0]; |
|
|
const double cy0 = -1./m_dx[1]; |
|
|
const double cy1 = -.78867513459481288225/m_dx[1]; |
|
|
const double cy2 = -.5/m_dx[1]; |
|
|
const double cy3 = -.21132486540518711775/m_dx[1]; |
|
|
const double cy4 = .21132486540518711775/m_dx[1]; |
|
|
const double cy5 = .5/m_dx[1]; |
|
|
const double cy6 = .78867513459481288225/m_dx[1]; |
|
|
const double cy7 = 1./m_dx[1]; |
|
1034 |
|
|
1035 |
if (out.getFunctionSpace().getTypeCode() == Elements) { |
if (out.getFunctionSpace().getTypeCode() == Elements) { |
1036 |
out.requireWrite(); |
out.requireWrite(); |
1049 |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double)); |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1050 |
double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0])); |
double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0])); |
1051 |
for (index_t i=0; i < numComp; ++i) { |
for (index_t i=0; i < numComp; ++i) { |
1052 |
o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; |
o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0; |
1053 |
o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; |
o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0; |
1054 |
o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; |
o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0; |
1055 |
o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; |
o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1; |
1056 |
o[INDEX3(i,0,2,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; |
o[INDEX3(i,0,2,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1; |
1057 |
o[INDEX3(i,1,2,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; |
o[INDEX3(i,1,2,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0; |
1058 |
o[INDEX3(i,0,3,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; |
o[INDEX3(i,0,3,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1; |
1059 |
o[INDEX3(i,1,3,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; |
o[INDEX3(i,1,3,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1; |
1060 |
} // end of component loop i |
} // end of component loop i |
1061 |
} // end of k0 loop |
} // end of k0 loop |
1062 |
} // end of k1 loop |
} // end of k1 loop |
1078 |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double)); |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1079 |
double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0])); |
double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0])); |
1080 |
for (index_t i=0; i < numComp; ++i) { |
for (index_t i=0; i < numComp; ++i) { |
1081 |
o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); |
o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2; |
1082 |
o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]); |
o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2; |
1083 |
} // end of component loop i |
} // end of component loop i |
1084 |
} // end of k0 loop |
} // end of k0 loop |
1085 |
} // end of k1 loop |
} // end of k1 loop |
1101 |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double)); |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1102 |
double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
1103 |
for (index_t i=0; i < numComp; ++i) { |
for (index_t i=0; i < numComp; ++i) { |
1104 |
o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; |
o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0; |
1105 |
o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7; |
o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2; |
1106 |
o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; |
o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1; |
1107 |
o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7; |
o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy2; |
1108 |
} // end of component loop i |
} // end of component loop i |
1109 |
} // end of k1 loop |
} // end of k1 loop |
1110 |
} // end of face 0 |
} // end of face 0 |
1117 |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double)); |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1118 |
double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
1119 |
for (index_t i=0; i < numComp; ++i) { |
for (index_t i=0; i < numComp; ++i) { |
1120 |
o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; |
o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0; |
1121 |
o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7; |
o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2; |
1122 |
o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; |
o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1; |
1123 |
o[INDEX3(i,1,1,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7; |
o[INDEX3(i,1,1,numComp,2)] = (f_11[i]-f_10[i])*cy2; |
1124 |
} // end of component loop i |
} // end of component loop i |
1125 |
} // end of k1 loop |
} // end of k1 loop |
1126 |
} // end of face 1 |
} // end of face 1 |
1133 |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double)); |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double)); |
1134 |
double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
1135 |
for (index_t i=0; i < numComp; ++i) { |
for (index_t i=0; i < numComp; ++i) { |
1136 |
o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; |
o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2; |
1137 |
o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; |
o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0; |
1138 |
o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; |
o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx2; |
1139 |
o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; |
o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1; |
1140 |
} // end of component loop i |
} // end of component loop i |
1141 |
} // end of k0 loop |
} // end of k0 loop |
1142 |
} // end of face 2 |
} // end of face 2 |
1149 |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
1150 |
double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
1151 |
for (index_t i=0; i < numComp; ++i) { |
for (index_t i=0; i < numComp; ++i) { |
1152 |
o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; |
o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2; |
1153 |
o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; |
o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0; |
1154 |
o[INDEX3(i,0,1,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; |
o[INDEX3(i,0,1,numComp,2)] = (f_11[i]-f_01[i])*cx2; |
1155 |
o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; |
o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1; |
1156 |
} // end of component loop i |
} // end of component loop i |
1157 |
} // end of k0 loop |
} // end of k0 loop |
1158 |
} // end of face 3 |
} // end of face 3 |
1175 |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double)); |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1176 |
double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
1177 |
for (index_t i=0; i < numComp; ++i) { |
for (index_t i=0; i < numComp; ++i) { |
1178 |
o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); |
o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2; |
1179 |
o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7; |
o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2; |
1180 |
} // end of component loop i |
} // end of component loop i |
1181 |
} // end of k1 loop |
} // end of k1 loop |
1182 |
} // end of face 0 |
} // end of face 0 |
1189 |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double)); |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1190 |
double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
1191 |
for (index_t i=0; i < numComp; ++i) { |
for (index_t i=0; i < numComp; ++i) { |
1192 |
o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); |
o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2; |
1193 |
o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7; |
o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2; |
1194 |
} // end of component loop i |
} // end of component loop i |
1195 |
} // end of k1 loop |
} // end of k1 loop |
1196 |
} // end of face 1 |
} // end of face 1 |
1203 |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double)); |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double)); |
1204 |
double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
1205 |
for (index_t i=0; i < numComp; ++i) { |
for (index_t i=0; i < numComp; ++i) { |
1206 |
o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; |
o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2; |
1207 |
o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]); |
o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2; |
1208 |
} // end of component loop i |
} // end of component loop i |
1209 |
} // end of k0 loop |
} // end of k0 loop |
1210 |
} // end of face 2 |
} // end of face 2 |
1217 |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
1218 |
double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
1219 |
for (index_t i=0; i < numComp; ++i) { |
for (index_t i=0; i < numComp; ++i) { |
1220 |
o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; |
o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2; |
1221 |
o[INDEX3(i,1,0,numComp,2)] = cy5*(f_01[i] + f_11[i]) + cy2*(f_00[i] + f_10[i]); |
o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2; |
1222 |
} // end of component loop i |
} // end of component loop i |
1223 |
} // end of k0 loop |
} // end of k0 loop |
1224 |
} // end of face 3 |
} // end of face 3 |
1227 |
} |
} |
1228 |
|
|
1229 |
//protected |
//protected |
1230 |
void Rectangle::assembleIntegrate(vector<double>& integrals, escript::Data& arg) const |
void Rectangle::assembleIntegrate(vector<double>& integrals, |
1231 |
|
const escript::Data& arg) const |
1232 |
{ |
{ |
1233 |
const dim_t numComp = arg.getDataPointSize(); |
const dim_t numComp = arg.getDataPointSize(); |
1234 |
const index_t left = (m_offset[0]==0 ? 0 : 1); |
const index_t left = (m_offset[0]==0 ? 0 : 1); |
1414 |
} |
} |
1415 |
|
|
1416 |
//protected |
//protected |
1417 |
void Rectangle::nodesToDOF(escript::Data& out, escript::Data& in) const |
void Rectangle::nodesToDOF(escript::Data& out, const escript::Data& in) const |
1418 |
{ |
{ |
1419 |
const dim_t numComp = in.getDataPointSize(); |
const dim_t numComp = in.getDataPointSize(); |
1420 |
out.requireWrite(); |
out.requireWrite(); |
1434 |
} |
} |
1435 |
|
|
1436 |
//protected |
//protected |
1437 |
void Rectangle::dofToNodes(escript::Data& out, escript::Data& in) const |
void Rectangle::dofToNodes(escript::Data& out, const escript::Data& in) const |
1438 |
{ |
{ |
1439 |
const dim_t numComp = in.getDataPointSize(); |
const dim_t numComp = in.getDataPointSize(); |
1440 |
Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp); |
Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp); |
1441 |
in.requireWrite(); |
// expand data object if necessary to be able to grab the whole data |
1442 |
Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0)); |
const_cast<escript::Data*>(&in)->expand(); |
1443 |
|
Paso_Coupler_startCollect(coupler, in.getSampleDataRO(0)); |
1444 |
|
|
1445 |
const dim_t numDOF = getNumDOF(); |
const dim_t numDOF = getNumDOF(); |
1446 |
out.requireWrite(); |
out.requireWrite(); |
1635 |
RankVector neighbour; |
RankVector neighbour; |
1636 |
IndexVector offsetInShared(1,0); |
IndexVector offsetInShared(1,0); |
1637 |
IndexVector sendShared, recvShared; |
IndexVector sendShared, recvShared; |
1638 |
int numShared=0; |
int numShared=0, expectedShared=0; |
1639 |
const int x=m_mpiInfo->rank%m_NX[0]; |
const int x=m_mpiInfo->rank%m_NX[0]; |
1640 |
const int y=m_mpiInfo->rank/m_NX[0]; |
const int y=m_mpiInfo->rank/m_NX[0]; |
1641 |
|
if (x > 0) |
1642 |
|
expectedShared += nDOF1; |
1643 |
|
if (x < m_NX[0] - 1) |
1644 |
|
expectedShared += nDOF1; |
1645 |
|
if (y > 0) |
1646 |
|
expectedShared += nDOF0; |
1647 |
|
if (y < m_NX[1] - 1) |
1648 |
|
expectedShared += nDOF0; |
1649 |
|
if (x > 0 && y > 0) expectedShared++; |
1650 |
|
if (x > 0 && y < m_NX[1] - 1) expectedShared++; |
1651 |
|
if (x < m_NX[0] - 1 && y > 0) expectedShared++; |
1652 |
|
if (x < m_NX[0] - 1 && y < m_NX[1] - 1) expectedShared++; |
1653 |
|
|
1654 |
|
vector<IndexVector> rowIndices(expectedShared); |
1655 |
|
|
1656 |
for (int i1=-1; i1<2; i1++) { |
for (int i1=-1; i1<2; i1++) { |
1657 |
for (int i0=-1; i0<2; i0++) { |
for (int i0=-1; i0<2; i0++) { |
1658 |
// skip this rank |
// skip this rank |
1672 |
sendShared.push_back(firstDOF+i); |
sendShared.push_back(firstDOF+i); |
1673 |
recvShared.push_back(numDOF+numShared); |
recvShared.push_back(numDOF+numShared); |
1674 |
if (i>0) |
if (i>0) |
1675 |
colIndices[firstDOF+i-1].push_back(numShared); |
doublyLink(colIndices, rowIndices, firstDOF+i-1, numShared); |
1676 |
colIndices[firstDOF+i].push_back(numShared); |
doublyLink(colIndices, rowIndices, firstDOF+i, numShared); |
1677 |
if (i<nDOF0-1) |
if (i<nDOF0-1) |
1678 |
colIndices[firstDOF+i+1].push_back(numShared); |
doublyLink(colIndices, rowIndices, firstDOF+i+1, numShared); |
1679 |
m_dofMap[firstNode+i]=numDOF+numShared; |
m_dofMap[firstNode+i]=numDOF+numShared; |
1680 |
} |
} |
1681 |
} else if (i1==0) { |
} else if (i1==0) { |
1687 |
sendShared.push_back(firstDOF+i*nDOF0); |
sendShared.push_back(firstDOF+i*nDOF0); |
1688 |
recvShared.push_back(numDOF+numShared); |
recvShared.push_back(numDOF+numShared); |
1689 |
if (i>0) |
if (i>0) |
1690 |
colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared); |
doublyLink(colIndices, rowIndices, firstDOF+(i-1)*nDOF0, numShared); |
1691 |
colIndices[firstDOF+i*nDOF0].push_back(numShared); |
doublyLink(colIndices, rowIndices, firstDOF+i*nDOF0, numShared); |
1692 |
if (i<nDOF1-1) |
if (i<nDOF1-1) |
1693 |
colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared); |
doublyLink(colIndices, rowIndices, firstDOF+(i+1)*nDOF0, numShared); |
1694 |
m_dofMap[firstNode+i*m_NN[0]]=numDOF+numShared; |
m_dofMap[firstNode+i*m_NN[0]]=numDOF+numShared; |
1695 |
} |
} |
1696 |
} else { |
} else { |
1700 |
offsetInShared.push_back(offsetInShared.back()+1); |
offsetInShared.push_back(offsetInShared.back()+1); |
1701 |
sendShared.push_back(dof); |
sendShared.push_back(dof); |
1702 |
recvShared.push_back(numDOF+numShared); |
recvShared.push_back(numDOF+numShared); |
1703 |
colIndices[dof].push_back(numShared); |
doublyLink(colIndices, rowIndices, dof, numShared); |
1704 |
m_dofMap[node]=numDOF+numShared; |
m_dofMap[node]=numDOF+numShared; |
1705 |
++numShared; |
++numShared; |
1706 |
} |
} |
1707 |
} |
} |
1708 |
} |
} |
1709 |
} |
} |
1710 |
|
|
1711 |
|
#pragma omp parallel for |
1712 |
|
for (int i = 0; i < numShared; i++) { |
1713 |
|
std::sort(rowIndices[i].begin(), rowIndices[i].end()); |
1714 |
|
} |
1715 |
|
|
1716 |
// create connector |
// create connector |
1717 |
Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc( |
Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc( |
1718 |
numDOF, neighbour.size(), &neighbour[0], &sendShared[0], |
numDOF, neighbour.size(), &neighbour[0], &sendShared[0], |
1727 |
// create main and couple blocks |
// create main and couple blocks |
1728 |
Paso_Pattern *mainPattern = createMainPattern(); |
Paso_Pattern *mainPattern = createMainPattern(); |
1729 |
Paso_Pattern *colPattern, *rowPattern; |
Paso_Pattern *colPattern, *rowPattern; |
1730 |
createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern); |
createCouplePatterns(colIndices, rowIndices, numShared, &colPattern, &rowPattern); |
1731 |
|
|
1732 |
// allocate paso distribution |
// allocate paso distribution |
1733 |
Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo, |
Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo, |
1830 |
|
|
1831 |
//protected |
//protected |
1832 |
void Rectangle::interpolateNodesOnElements(escript::Data& out, |
void Rectangle::interpolateNodesOnElements(escript::Data& out, |
1833 |
escript::Data& in, bool reduced) const |
const escript::Data& in, |
1834 |
|
bool reduced) const |
1835 |
{ |
{ |
1836 |
const dim_t numComp = in.getDataPointSize(); |
const dim_t numComp = in.getDataPointSize(); |
1837 |
if (reduced) { |
if (reduced) { |
1889 |
} |
} |
1890 |
|
|
1891 |
//protected |
//protected |
1892 |
void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in, |
void Rectangle::interpolateNodesOnFaces(escript::Data& out, |
1893 |
|
const escript::Data& in, |
1894 |
bool reduced) const |
bool reduced) const |
1895 |
{ |
{ |
1896 |
const dim_t numComp = in.getDataPointSize(); |
const dim_t numComp = in.getDataPointSize(); |
1897 |
if (reduced) { |
if (reduced) { |
1898 |
out.requireWrite(); |
out.requireWrite(); |
|
const double c0 = 0.5; |
|
1899 |
#pragma omp parallel |
#pragma omp parallel |
1900 |
{ |
{ |
1901 |
vector<double> f_00(numComp); |
vector<double> f_00(numComp); |
1909 |
memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double)); |
memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double)); |
1910 |
double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
1911 |
for (index_t i=0; i < numComp; ++i) { |
for (index_t i=0; i < numComp; ++i) { |
1912 |
o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i]); |
o[INDEX2(i,numComp,0)] = (f_00[i] + f_01[i])/2; |
1913 |
} /* end of component loop i */ |
} /* end of component loop i */ |
1914 |
} /* end of k1 loop */ |
} /* end of k1 loop */ |
1915 |
} /* end of face 0 */ |
} /* end of face 0 */ |
1920 |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double)); |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1921 |
double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
1922 |
for (index_t i=0; i < numComp; ++i) { |
for (index_t i=0; i < numComp; ++i) { |
1923 |
o[INDEX2(i,numComp,0)] = c0*(f_10[i] + f_11[i]); |
o[INDEX2(i,numComp,0)] = (f_10[i] + f_11[i])/2; |
1924 |
} /* end of component loop i */ |
} /* end of component loop i */ |
1925 |
} /* end of k1 loop */ |
} /* end of k1 loop */ |
1926 |
} /* end of face 1 */ |
} /* end of face 1 */ |
1931 |
memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double)); |
memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double)); |
1932 |
double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
1933 |
for (index_t i=0; i < numComp; ++i) { |
for (index_t i=0; i < numComp; ++i) { |
1934 |
o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_10[i]); |
o[INDEX2(i,numComp,0)] = (f_00[i] + f_10[i])/2; |
1935 |
} /* end of component loop i */ |
} /* end of component loop i */ |
1936 |
} /* end of k0 loop */ |
} /* end of k0 loop */ |
1937 |
} /* end of face 2 */ |
} /* end of face 2 */ |
1942 |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
1943 |
double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
1944 |
for (index_t i=0; i < numComp; ++i) { |
for (index_t i=0; i < numComp; ++i) { |
1945 |
o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_11[i]); |
o[INDEX2(i,numComp,0)] = (f_01[i] + f_11[i])/2; |
1946 |
} /* end of component loop i */ |
} /* end of component loop i */ |
1947 |
} /* end of k0 loop */ |
} /* end of k0 loop */ |
1948 |
} /* end of face 3 */ |
} /* end of face 3 */ |
2009 |
} |
} |
2010 |
} |
} |
2011 |
|
|
|
//protected |
|
|
void Rectangle::assemblePDESingle(Paso_SystemMatrix* mat, |
|
|
escript::Data& rhs, const escript::Data& A, const escript::Data& B, |
|
|
const escript::Data& C, const escript::Data& D, |
|
|
const escript::Data& X, const escript::Data& Y) const |
|
|
{ |
|
|
const double w0 = -0.1555021169820365539*m_dx[1]/m_dx[0]; |
|
|
const double w1 = 0.041666666666666666667; |
|
|
const double w2 = -0.15550211698203655390; |
|
|
const double w3 = 0.041666666666666666667*m_dx[0]/m_dx[1]; |
|
|
const double w4 = 0.15550211698203655390; |
|
|
const double w5 = -0.041666666666666666667; |
|
|
const double w6 = -0.01116454968463011277*m_dx[1]/m_dx[0]; |
|
|
const double w7 = 0.011164549684630112770; |
|
|
const double w8 = -0.011164549684630112770; |
|
|
const double w9 = -0.041666666666666666667*m_dx[1]/m_dx[0]; |
|
|
const double w10 = -0.041666666666666666667*m_dx[0]/m_dx[1]; |
|
|
const double w11 = 0.1555021169820365539*m_dx[1]/m_dx[0]; |
|
|
const double w12 = 0.1555021169820365539*m_dx[0]/m_dx[1]; |
|
|
const double w13 = 0.01116454968463011277*m_dx[0]/m_dx[1]; |
|
|
const double w14 = 0.01116454968463011277*m_dx[1]/m_dx[0]; |
|
|
const double w15 = 0.041666666666666666667*m_dx[1]/m_dx[0]; |
|
|
const double w16 = -0.01116454968463011277*m_dx[0]/m_dx[1]; |
|
|
const double w17 = -0.1555021169820365539*m_dx[0]/m_dx[1]; |
|
|
const double w18 = -0.33333333333333333333*m_dx[1]/m_dx[0]; |
|
|
const double w19 = 0.25; |
|
|
const double w20 = -0.25; |
|
|
const double w21 = 0.16666666666666666667*m_dx[0]/m_dx[1]; |
|
|
const double w22 = -0.16666666666666666667*m_dx[1]/m_dx[0]; |
|
|
const double w23 = -0.16666666666666666667*m_dx[0]/m_dx[1]; |
|
|
const double w24 = 0.33333333333333333333*m_dx[1]/m_dx[0]; |
|
|
const double w25 = 0.33333333333333333333*m_dx[0]/m_dx[1]; |
|
|
const double w26 = 0.16666666666666666667*m_dx[1]/m_dx[0]; |
|
|
const double w27 = -0.33333333333333333333*m_dx[0]/m_dx[1]; |
|
|
const double w28 = -0.032861463941450536761*m_dx[1]; |
|
|
const double w29 = -0.032861463941450536761*m_dx[0]; |
|
|
const double w30 = -0.12264065304058601714*m_dx[1]; |
|
|
const double w31 = -0.0023593469594139828636*m_dx[1]; |
|
|
const double w32 = -0.008805202725216129906*m_dx[0]; |
|
|
const double w33 = -0.008805202725216129906*m_dx[1]; |
|
|
const double w34 = 0.032861463941450536761*m_dx[1]; |
|
|
const double w35 = 0.008805202725216129906*m_dx[1]; |
|
|
const double w36 = 0.008805202725216129906*m_dx[0]; |
|
|
const double w37 = 0.0023593469594139828636*m_dx[1]; |
|
|
const double w38 = 0.12264065304058601714*m_dx[1]; |
|
|
const double w39 = 0.032861463941450536761*m_dx[0]; |
|
|
const double w40 = -0.12264065304058601714*m_dx[0]; |
|
|
const double w41 = -0.0023593469594139828636*m_dx[0]; |
|
|
const double w42 = 0.0023593469594139828636*m_dx[0]; |
|
|
const double w43 = 0.12264065304058601714*m_dx[0]; |
|
|
const double w44 = -0.16666666666666666667*m_dx[1]; |
|
|
const double w45 = -0.083333333333333333333*m_dx[0]; |
|
|
const double w46 = 0.083333333333333333333*m_dx[1]; |
|
|
const double w47 = 0.16666666666666666667*m_dx[1]; |
|
|
const double w48 = 0.083333333333333333333*m_dx[0]; |
|
|
const double w49 = -0.16666666666666666667*m_dx[0]; |
|
|
const double w50 = 0.16666666666666666667*m_dx[0]; |
|
|
const double w51 = -0.083333333333333333333*m_dx[1]; |
|
|
const double w52 = 0.025917019497006092316*m_dx[0]*m_dx[1]; |
|
|
const double w53 = 0.0018607582807716854616*m_dx[0]*m_dx[1]; |
|
|
const double w54 = 0.0069444444444444444444*m_dx[0]*m_dx[1]; |
|
|
const double w55 = 0.09672363354357992482*m_dx[0]*m_dx[1]; |
|
|
const double w56 = 0.00049858867864229740201*m_dx[0]*m_dx[1]; |
|
|
const double w57 = 0.055555555555555555556*m_dx[0]*m_dx[1]; |
|
|
const double w58 = 0.027777777777777777778*m_dx[0]*m_dx[1]; |
|
|
const double w59 = 0.11111111111111111111*m_dx[0]*m_dx[1]; |
|
|
const double w60 = -0.19716878364870322056*m_dx[1]; |
|
|
const double w61 = -0.19716878364870322056*m_dx[0]; |
|
|
const double w62 = -0.052831216351296779436*m_dx[0]; |
|
|
const double w63 = -0.052831216351296779436*m_dx[1]; |
|
|
const double w64 = 0.19716878364870322056*m_dx[1]; |
|
|
const double w65 = 0.052831216351296779436*m_dx[1]; |
|
|
const double w66 = 0.19716878364870322056*m_dx[0]; |
|
|
const double w67 = 0.052831216351296779436*m_dx[0]; |
|
|
const double w68 = -0.5*m_dx[1]; |
|
|
const double w69 = -0.5*m_dx[0]; |
|
|
const double w70 = 0.5*m_dx[1]; |
|
|
const double w71 = 0.5*m_dx[0]; |
|
|
const double w72 = 0.1555021169820365539*m_dx[0]*m_dx[1]; |
|
|
const double w73 = 0.041666666666666666667*m_dx[0]*m_dx[1]; |
|
|
const double w74 = 0.01116454968463011277*m_dx[0]*m_dx[1]; |
|
|
const double w75 = 0.25*m_dx[0]*m_dx[1]; |
|
2012 |
|
|
|
rhs.requireWrite(); |
|
|
#pragma omp parallel |
|
|
{ |
|
|
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring |
|
|
#pragma omp for |
|
|
for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) { |
|
|
for (index_t k0=0; k0<m_NE[0]; ++k0) { |
|
|
bool add_EM_S=false; |
|
|
bool add_EM_F=false; |
|
|
vector<double> EM_S(4*4, 0); |
|
|
vector<double> EM_F(4, 0); |
|
|
const index_t e = k0 + m_NE[0]*k1; |
|
|
/////////////// |
|
|
// process A // |
|
|
/////////////// |
|
|
if (!A.isEmpty()) { |
|
|
add_EM_S=true; |
|
|
const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); |
|
|
if (A.actsExpanded()) { |
|
|
const double A_00_0 = A_p[INDEX3(0,0,0,2,2)]; |
|
|
const double A_10_0 = A_p[INDEX3(1,0,0,2,2)]; |
|
|
const double A_01_0 = A_p[INDEX3(0,1,0,2,2)]; |
|
|
const double A_11_0 = A_p[INDEX3(1,1,0,2,2)]; |
|
|
const double A_00_1 = A_p[INDEX3(0,0,1,2,2)]; |
|
|
const double A_10_1 = A_p[INDEX3(1,0,1,2,2)]; |
|
|
const double A_01_1 = A_p[INDEX3(0,1,1,2,2)]; |
|
|
const double A_11_1 = A_p[INDEX3(1,1,1,2,2)]; |
|
|
const double A_00_2 = A_p[INDEX3(0,0,2,2,2)]; |
|
|
const double A_10_2 = A_p[INDEX3(1,0,2,2,2)]; |
|
|
const double A_01_2 = A_p[INDEX3(0,1,2,2,2)]; |
|
|
const double A_11_2 = A_p[INDEX3(1,1,2,2,2)]; |
|
|
const double A_00_3 = A_p[INDEX3(0,0,3,2,2)]; |
|
|
const double A_10_3 = A_p[INDEX3(1,0,3,2,2)]; |
|
|
const double A_01_3 = A_p[INDEX3(0,1,3,2,2)]; |
|
|
const double A_11_3 = A_p[INDEX3(1,1,3,2,2)]; |
|
|
const double tmp0_0 = A_01_0 + A_01_3; |
|
|
const double tmp1_0 = A_00_0 + A_00_1; |
|
|
const double tmp2_0 = A_11_0 + A_11_1 + A_11_2 + A_11_3; |
|
|
const double tmp3_0 = A_00_2 + A_00_3; |
|
|
const double tmp4_0 = A_10_1 + A_10_2; |
|
|
const double tmp5_0 = A_00_0 + A_00_1 + A_00_2 + A_00_3; |
|
|
const double tmp6_0 = A_01_3 + A_10_0; |
|
|
const double tmp7_0 = A_01_0 + A_10_3; |
|
|
const double tmp8_0 = A_01_1 + A_01_2 + A_10_1 + A_10_2; |
|
|
const double tmp9_0 = A_01_0 + A_10_0; |
|
|
const double tmp12_0 = A_11_0 + A_11_2; |
|
|
const double tmp10_0 = A_01_3 + A_10_3; |
|
|
const double tmp14_0 = A_01_0 + A_01_3 + A_10_0 + A_10_3; |
|
|
const double tmp13_0 = A_01_2 + A_10_1; |
|
|
const double tmp11_0 = A_11_1 + A_11_3; |
|
|
const double tmp18_0 = A_01_1 + A_10_1; |
|
|
const double tmp15_0 = A_01_1 + A_10_2; |
|
|
const double tmp16_0 = A_10_0 + A_10_3; |
|
|
const double tmp17_0 = A_01_1 + A_01_2; |
|
|
const double tmp19_0 = A_01_2 + A_10_2; |
|
|
const double tmp0_1 = A_10_3*w8; |
|
|
const double tmp1_1 = tmp0_0*w1; |
|
|
const double tmp2_1 = A_01_1*w4; |
|
|
const double tmp3_1 = tmp1_0*w0; |
|
|
const double tmp4_1 = A_01_2*w7; |
|
|
const double tmp5_1 = tmp2_0*w3; |
|
|
const double tmp6_1 = tmp3_0*w6; |
|
|
const double tmp7_1 = A_10_0*w2; |
|
|
const double tmp8_1 = tmp4_0*w5; |
|
|
const double tmp9_1 = tmp2_0*w10; |
|
|
const double tmp14_1 = A_10_0*w8; |
|
|
const double tmp23_1 = tmp3_0*w14; |
|
|
const double tmp35_1 = A_01_0*w8; |
|
|
const double tmp54_1 = tmp13_0*w8; |
|
|
const double tmp20_1 = tmp9_0*w4; |
|
|
const double tmp25_1 = tmp12_0*w12; |
|
|
const double tmp44_1 = tmp7_0*w7; |
|
|
const double tmp26_1 = tmp10_0*w4; |
|
|
const double tmp52_1 = tmp18_0*w8; |
|
|
const double tmp48_1 = A_10_1*w7; |
|
|
const double tmp46_1 = A_01_3*w8; |
|
|
const double tmp50_1 = A_01_0*w2; |
|
|
const double tmp56_1 = tmp19_0*w8; |
|
|
const double tmp19_1 = A_10_3*w2; |
|
|
const double tmp47_1 = A_10_2*w4; |
|
|
const double tmp16_1 = tmp3_0*w0; |
|
|
const double tmp18_1 = tmp1_0*w6; |
|
|
const double tmp31_1 = tmp11_0*w12; |
|
|
const double tmp55_1 = tmp15_0*w2; |
|
|
const double tmp39_1 = A_10_2*w7; |
|
|
const double tmp11_1 = tmp6_0*w7; |
|
|
const double tmp40_1 = tmp11_0*w17; |
|
|
const double tmp34_1 = tmp15_0*w8; |
|
|
const double tmp33_1 = tmp14_0*w5; |
|
|
const double tmp24_1 = tmp11_0*w13; |
|
|
const double tmp43_1 = tmp17_0*w5; |
|
|
const double tmp15_1 = A_01_2*w4; |
|
|
const double tmp53_1 = tmp19_0*w2; |
|
|
const double tmp27_1 = tmp3_0*w11; |
|
|
const double tmp32_1 = tmp13_0*w2; |
|
|
const double tmp10_1 = tmp5_0*w9; |
|
|
const double tmp37_1 = A_10_1*w4; |
|
|
const double tmp38_1 = tmp5_0*w15; |
|
|
const double tmp17_1 = A_01_1*w7; |
|
|
const double tmp12_1 = tmp7_0*w4; |
|
|
const double tmp22_1 = tmp10_0*w7; |
|
|
const double tmp57_1 = tmp18_0*w2; |
|
|
const double tmp28_1 = tmp9_0*w7; |
|
|
const double tmp29_1 = tmp1_0*w14; |
|
|
const double tmp51_1 = tmp11_0*w16; |
|
|
const double tmp42_1 = tmp12_0*w16; |
|
|
const double tmp49_1 = tmp12_0*w17; |
|
|
const double tmp21_1 = tmp1_0*w11; |
|
|
const double tmp45_1 = tmp6_0*w4; |
|
|
const double tmp13_1 = tmp8_0*w1; |
|
|
const double tmp36_1 = tmp16_0*w1; |
|
|
const double tmp41_1 = A_01_3*w2; |
|
|
const double tmp30_1 = tmp12_0*w13; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp13_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1 + tmp24_1 + tmp25_1; |
|
|
EM_S[INDEX2(1,0,4)]+=tmp36_1 + tmp37_1 + tmp39_1 + tmp3_1 + tmp43_1 + tmp46_1 + tmp50_1 + tmp5_1 + tmp6_1; |
|
|
EM_S[INDEX2(2,0,4)]+=tmp0_1 + tmp15_1 + tmp17_1 + tmp1_1 + tmp38_1 + tmp49_1 + tmp51_1 + tmp7_1 + tmp8_1; |
|
|
EM_S[INDEX2(3,0,4)]+=tmp10_1 + tmp32_1 + tmp33_1 + tmp34_1 + tmp9_1; |
|
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1 + tmp8_1; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp21_1 + tmp23_1 + tmp30_1 + tmp31_1 + tmp33_1 + tmp56_1 + tmp57_1; |
|
|
EM_S[INDEX2(2,1,4)]+=tmp10_1 + tmp13_1 + tmp44_1 + tmp45_1 + tmp9_1; |
|
|
EM_S[INDEX2(3,1,4)]+=tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1 + tmp40_1 + tmp41_1 + tmp42_1 + tmp43_1; |
|
|
EM_S[INDEX2(0,2,4)]+=tmp36_1 + tmp38_1 + tmp43_1 + tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
|
|
EM_S[INDEX2(1,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp9_1; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp24_1 + tmp25_1 + tmp27_1 + tmp29_1 + tmp33_1 + tmp52_1 + tmp53_1; |
|
|
EM_S[INDEX2(3,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp1_1 + tmp5_1 + tmp8_1; |
|
|
EM_S[INDEX2(0,3,4)]+=tmp10_1 + tmp33_1 + tmp54_1 + tmp55_1 + tmp9_1; |
|
|
EM_S[INDEX2(1,3,4)]+=tmp14_1 + tmp19_1 + tmp1_1 + tmp2_1 + tmp38_1 + tmp40_1 + tmp42_1 + tmp4_1 + tmp8_1; |
|
|
EM_S[INDEX2(2,3,4)]+=tmp16_1 + tmp18_1 + tmp35_1 + tmp36_1 + tmp41_1 + tmp43_1 + tmp47_1 + tmp48_1 + tmp5_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp13_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
|
|
} else { // constant data |
|
|
const double A_00 = A_p[INDEX2(0,0,2)]; |
|
|
const double A_10 = A_p[INDEX2(1,0,2)]; |
|
|
const double A_01 = A_p[INDEX2(0,1,2)]; |
|
|
const double A_11 = A_p[INDEX2(1,1,2)]; |
|
|
const double tmp0_0 = A_01 + A_10; |
|
|
const double tmp0_1 = A_00*w18; |
|
|
const double tmp1_1 = A_01*w19; |
|
|
const double tmp2_1 = A_10*w20; |
|
|
const double tmp3_1 = A_11*w21; |
|
|
const double tmp4_1 = A_00*w22; |
|
|
const double tmp5_1 = tmp0_0*w19; |
|
|
const double tmp6_1 = A_11*w23; |
|
|
const double tmp7_1 = A_11*w25; |
|
|
const double tmp8_1 = A_00*w24; |
|
|
const double tmp9_1 = tmp0_0*w20; |
|
|
const double tmp10_1 = A_01*w20; |
|
|
const double tmp11_1 = A_11*w27; |
|
|
const double tmp12_1 = A_00*w26; |
|
|
const double tmp13_1 = A_10*w19; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp7_1 + tmp8_1; |
|
|
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; |
|
|
EM_S[INDEX2(2,0,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; |
|
|
EM_S[INDEX2(3,0,4)]+=tmp4_1 + tmp6_1 + tmp9_1; |
|
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp7_1 + tmp8_1 + tmp9_1; |
|
|
EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp5_1 + tmp6_1; |
|
|
EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; |
|
|
EM_S[INDEX2(0,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; |
|
|
EM_S[INDEX2(1,2,4)]+=tmp4_1 + tmp5_1 + tmp6_1; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp7_1 + tmp8_1 + tmp9_1; |
|
|
EM_S[INDEX2(3,2,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX2(0,3,4)]+=tmp4_1 + tmp6_1 + tmp9_1; |
|
|
EM_S[INDEX2(1,3,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; |
|
|
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp7_1 + tmp8_1; |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process B // |
|
|
/////////////// |
|
|
if (!B.isEmpty()) { |
|
|
add_EM_S=true; |
|
|
const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); |
|
|
if (B.actsExpanded()) { |
|
|
const double B_0_0 = B_p[INDEX2(0,0,2)]; |
|
|
const double B_1_0 = B_p[INDEX2(1,0,2)]; |
|
|
const double B_0_1 = B_p[INDEX2(0,1,2)]; |
|
|
const double B_1_1 = B_p[INDEX2(1,1,2)]; |
|
|
const double B_0_2 = B_p[INDEX2(0,2,2)]; |
|
|
const double B_1_2 = B_p[INDEX2(1,2,2)]; |
|
|
const double B_0_3 = B_p[INDEX2(0,3,2)]; |
|
|
const double B_1_3 = B_p[INDEX2(1,3,2)]; |
|
|
const double tmp0_0 = B_1_0 + B_1_1; |
|
|
const double tmp1_0 = B_1_2 + B_1_3; |
|
|
const double tmp2_0 = B_0_1 + B_0_3; |
|
|
const double tmp3_0 = B_0_0 + B_0_2; |
|
|
const double tmp63_1 = B_1_1*w42; |
|
|
const double tmp79_1 = B_1_1*w40; |
|
|
const double tmp37_1 = tmp3_0*w35; |
|
|
const double tmp8_1 = tmp0_0*w32; |
|
|
const double tmp71_1 = B_0_1*w34; |
|
|
const double tmp19_1 = B_0_3*w31; |
|
|
const double tmp15_1 = B_0_3*w34; |
|
|
const double tmp9_1 = tmp3_0*w34; |
|
|
const double tmp35_1 = B_1_0*w36; |
|
|
const double tmp66_1 = B_0_3*w28; |
|
|
const double tmp28_1 = B_1_0*w42; |
|
|
const double tmp22_1 = B_1_0*w40; |
|
|
const double tmp16_1 = B_1_2*w29; |
|
|
const double tmp6_1 = tmp2_0*w35; |
|
|
const double tmp55_1 = B_1_3*w40; |
|
|
const double tmp50_1 = B_1_3*w42; |
|
|
const double tmp7_1 = tmp1_0*w29; |
|
|
const double tmp1_1 = tmp1_0*w32; |
|
|
const double tmp57_1 = B_0_3*w30; |
|
|
const double tmp18_1 = B_1_1*w32; |
|
|
const double tmp53_1 = B_1_0*w41; |
|
|
const double tmp61_1 = B_1_3*w36; |
|
|
const double tmp27_1 = B_0_3*w38; |
|
|
const double tmp64_1 = B_0_2*w30; |
|
|
const double tmp76_1 = B_0_1*w38; |
|
|
const double tmp39_1 = tmp2_0*w34; |
|
|
const double tmp62_1 = B_0_1*w31; |
|
|
const double tmp56_1 = B_0_0*w31; |
|
|
const double tmp49_1 = B_1_1*w36; |
|
|
const double tmp2_1 = B_0_2*w31; |
|
|
const double tmp23_1 = B_0_2*w33; |
|
|
const double tmp38_1 = B_1_1*w43; |
|
|
const double tmp74_1 = B_1_2*w41; |
|
|
const double tmp43_1 = B_1_1*w41; |
|
|
const double tmp58_1 = B_0_2*w28; |
|
|
const double tmp67_1 = B_0_0*w33; |
|
|
const double tmp33_1 = tmp0_0*w39; |
|
|
const double tmp4_1 = B_0_0*w28; |
|
|
const double tmp20_1 = B_0_0*w30; |
|
|
const double tmp13_1 = B_0_2*w38; |
|
|
const double tmp65_1 = B_1_2*w43; |
|
|
const double tmp0_1 = tmp0_0*w29; |
|
|
const double tmp41_1 = tmp3_0*w33; |
|
|
const double tmp73_1 = B_0_2*w37; |
|
|
const double tmp69_1 = B_0_0*w38; |
|
|
const double tmp48_1 = B_1_2*w39; |
|
|
const double tmp59_1 = B_0_1*w33; |
|
|
const double tmp17_1 = B_1_3*w41; |
|
|
const double tmp5_1 = B_0_3*w33; |
|
|
const double tmp3_1 = B_0_1*w30; |
|
|
const double tmp21_1 = B_0_1*w28; |
|
|
const double tmp42_1 = B_1_0*w29; |
|
|
const double tmp54_1 = B_1_2*w32; |
|
|
const double tmp60_1 = B_1_0*w39; |
|
|
const double tmp32_1 = tmp1_0*w36; |
|
|
const double tmp10_1 = B_0_1*w37; |
|
|
const double tmp14_1 = B_0_0*w35; |
|
|
const double tmp29_1 = B_0_1*w35; |
|
|
const double tmp26_1 = B_1_2*w36; |
|
|
const double tmp30_1 = B_1_3*w43; |
|
|
const double tmp70_1 = B_0_2*w35; |
|
|
const double tmp34_1 = B_1_3*w39; |
|
|
const double tmp51_1 = B_1_0*w43; |
|
|
const double tmp31_1 = B_0_2*w34; |
|
|
const double tmp45_1 = tmp3_0*w28; |
|
|
const double tmp11_1 = tmp1_0*w39; |
|
|
const double tmp52_1 = B_1_1*w29; |
|
|
const double tmp44_1 = B_1_3*w32; |
|
|
const double tmp25_1 = B_1_1*w39; |
|
|
const double tmp47_1 = tmp2_0*w33; |
|
|
const double tmp72_1 = B_1_3*w29; |
|
|
const double tmp40_1 = tmp2_0*w28; |
|
|
const double tmp46_1 = B_1_2*w40; |
|
|
const double tmp36_1 = B_1_2*w42; |
|
|
const double tmp24_1 = B_0_0*w37; |
|
|
const double tmp77_1 = B_0_3*w35; |
|
|
const double tmp68_1 = B_0_3*w37; |
|
|
const double tmp78_1 = B_0_0*w34; |
|
|
const double tmp12_1 = tmp0_0*w36; |
|
|
const double tmp75_1 = B_1_0*w32; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; |
|
|
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; |
|
|
EM_S[INDEX2(2,0,4)]+=tmp45_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
|
|
EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp6_1 + tmp9_1; |
|
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; |
|
|
EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; |
|
|
EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; |
|
|
EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; |
|
|
EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; |
|
|
EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
|
|
EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp7_1 + tmp8_1; |
|
|
EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp39_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; |
|
|
EM_S[INDEX2(2,3,4)]+=tmp11_1 + tmp12_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
|
|
} else { // constant data |
|
|
const double B_0 = B_p[0]; |
|
|
const double B_1 = B_p[1]; |
|
|
const double tmp0_1 = B_0*w44; |
|
|
const double tmp1_1 = B_1*w45; |
|
|
const double tmp2_1 = B_0*w46; |
|
|
const double tmp3_1 = B_0*w47; |
|
|
const double tmp4_1 = B_1*w48; |
|
|
const double tmp5_1 = B_1*w49; |
|
|
const double tmp6_1 = B_1*w50; |
|
|
const double tmp7_1 = B_0*w51; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp5_1; |
|
|
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp3_1; |
|
|
EM_S[INDEX2(2,0,4)]+=tmp6_1 + tmp7_1; |
|
|
EM_S[INDEX2(3,0,4)]+=tmp2_1 + tmp4_1; |
|
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp3_1 + tmp5_1; |
|
|
EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp7_1; |
|
|
EM_S[INDEX2(3,1,4)]+=tmp2_1 + tmp6_1; |
|
|
EM_S[INDEX2(0,2,4)]+=tmp5_1 + tmp7_1; |
|
|
EM_S[INDEX2(1,2,4)]+=tmp1_1 + tmp2_1; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp0_1 + tmp6_1; |
|
|
EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; |
|
|
EM_S[INDEX2(0,3,4)]+=tmp1_1 + tmp7_1; |
|
|
EM_S[INDEX2(1,3,4)]+=tmp2_1 + tmp5_1; |
|
|
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp4_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp3_1 + tmp6_1; |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process C // |
|
|
/////////////// |
|
|
if (!C.isEmpty()) { |
|
|
add_EM_S=true; |
|
|
const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); |
|
|
if (C.actsExpanded()) { |
|
|
const double C_0_0 = C_p[INDEX2(0,0,2)]; |
|
|
const double C_1_0 = C_p[INDEX2(1,0,2)]; |
|
|
const double C_0_1 = C_p[INDEX2(0,1,2)]; |
|
|
const double C_1_1 = C_p[INDEX2(1,1,2)]; |
|
|
const double C_0_2 = C_p[INDEX2(0,2,2)]; |
|
|
const double C_1_2 = C_p[INDEX2(1,2,2)]; |
|
|
const double C_0_3 = C_p[INDEX2(0,3,2)]; |
|
|
const double C_1_3 = C_p[INDEX2(1,3,2)]; |
|
|
const double tmp0_0 = C_1_0 + C_1_1; |
|
|
const double tmp1_0 = C_1_2 + C_1_3; |
|
|
const double tmp2_0 = C_0_1 + C_0_3; |
|
|
const double tmp3_0 = C_0_0 + C_0_2; |
|
|
const double tmp64_1 = C_0_2*w30; |
|
|
const double tmp14_1 = C_0_2*w28; |
|
|
const double tmp19_1 = C_0_3*w31; |
|
|
const double tmp22_1 = C_1_0*w40; |
|
|
const double tmp37_1 = tmp3_0*w35; |
|
|
const double tmp29_1 = C_0_1*w35; |
|
|
const double tmp73_1 = C_0_2*w37; |
|
|
const double tmp74_1 = C_1_2*w41; |
|
|
const double tmp52_1 = C_1_3*w39; |
|
|
const double tmp25_1 = C_1_1*w39; |
|
|
const double tmp62_1 = C_0_1*w31; |
|
|
const double tmp79_1 = C_1_1*w40; |
|
|
const double tmp43_1 = C_1_1*w36; |
|
|
const double tmp27_1 = C_0_3*w38; |
|
|
const double tmp28_1 = C_1_0*w42; |
|
|
const double tmp63_1 = C_1_1*w42; |
|
|
const double tmp59_1 = C_0_3*w34; |
|
|
const double tmp72_1 = C_1_3*w29; |
|
|
const double tmp40_1 = tmp2_0*w35; |
|
|
const double tmp13_1 = C_0_3*w30; |
|
|
const double tmp51_1 = C_1_2*w40; |
|
|
const double tmp54_1 = C_1_2*w42; |
|
|
const double tmp12_1 = C_0_0*w31; |
|
|
const double tmp2_1 = tmp1_0*w32; |
|
|
const double tmp68_1 = C_0_2*w31; |
|
|
const double tmp75_1 = C_1_0*w32; |
|
|
const double tmp49_1 = C_1_1*w41; |
|
|
const double tmp4_1 = C_0_2*w35; |
|
|
const double tmp66_1 = C_0_3*w28; |
|
|
const double tmp56_1 = C_0_1*w37; |
|
|
const double tmp5_1 = C_0_1*w34; |
|
|
const double tmp38_1 = tmp2_0*w34; |
|
|
const double tmp76_1 = C_0_1*w38; |
|
|
const double tmp21_1 = C_0_1*w28; |
|
|
const double tmp69_1 = C_0_1*w30; |
|
|
const double tmp53_1 = C_1_0*w36; |
|
|
const double tmp42_1 = C_1_2*w39; |
|
|
const double tmp32_1 = tmp1_0*w29; |
|
|
const double tmp45_1 = C_1_0*w43; |
|
|
const double tmp33_1 = tmp0_0*w32; |
|
|
const double tmp35_1 = C_1_0*w41; |
|
|
const double tmp26_1 = C_1_2*w36; |
|
|
const double tmp67_1 = C_0_0*w33; |
|
|
const double tmp31_1 = C_0_2*w34; |
|
|
const double tmp20_1 = C_0_0*w30; |
|
|
const double tmp70_1 = C_0_0*w28; |
|
|
const double tmp8_1 = tmp0_0*w39; |
|
|
const double tmp30_1 = C_1_3*w43; |
|
|
const double tmp0_1 = tmp0_0*w29; |
|
|
const double tmp17_1 = C_1_3*w41; |
|
|
const double tmp58_1 = C_0_0*w35; |
|
|
const double tmp9_1 = tmp3_0*w33; |
|
|
const double tmp61_1 = C_1_3*w36; |
|
|
const double tmp41_1 = tmp3_0*w34; |
|
|
const double tmp50_1 = C_1_3*w32; |
|
|
const double tmp18_1 = C_1_1*w32; |
|
|
const double tmp6_1 = tmp1_0*w36; |
|
|
const double tmp3_1 = C_0_0*w38; |
|
|
const double tmp34_1 = C_1_1*w29; |
|
|
const double tmp77_1 = C_0_3*w35; |
|
|
const double tmp65_1 = C_1_2*w43; |
|
|
const double tmp71_1 = C_0_3*w33; |
|
|
const double tmp55_1 = C_1_1*w43; |
|
|
const double tmp46_1 = tmp3_0*w28; |
|
|
const double tmp24_1 = C_0_0*w37; |
|
|
const double tmp10_1 = tmp1_0*w39; |
|
|
const double tmp48_1 = C_1_0*w29; |
|
|
const double tmp15_1 = C_0_1*w33; |
|
|
const double tmp36_1 = C_1_2*w32; |
|
|
const double tmp60_1 = C_1_0*w39; |
|
|
const double tmp47_1 = tmp2_0*w33; |
|
|
const double tmp16_1 = C_1_2*w29; |
|
|
const double tmp1_1 = C_0_3*w37; |
|
|
const double tmp7_1 = tmp2_0*w28; |
|
|
const double tmp39_1 = C_1_3*w40; |
|
|
const double tmp44_1 = C_1_3*w42; |
|
|
const double tmp57_1 = C_0_2*w38; |
|
|
const double tmp78_1 = C_0_0*w34; |
|
|
const double tmp11_1 = tmp0_0*w36; |
|
|
const double tmp23_1 = C_0_2*w33; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; |
|
|
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp2_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; |
|
|
EM_S[INDEX2(2,0,4)]+=tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
|
|
EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp7_1 + tmp9_1; |
|
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; |
|
|
EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; |
|
|
EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; |
|
|
EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; |
|
|
EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; |
|
|
EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
|
|
EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp6_1 + tmp8_1; |
|
|
EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp38_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; |
|
|
EM_S[INDEX2(2,3,4)]+=tmp10_1 + tmp11_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
|
|
} else { // constant data |
|
|
const double C_0 = C_p[0]; |
|
|
const double C_1 = C_p[1]; |
|
|
const double tmp0_1 = C_0*w47; |
|
|
const double tmp1_1 = C_1*w45; |
|
|
const double tmp2_1 = C_1*w48; |
|
|
const double tmp3_1 = C_0*w51; |
|
|
const double tmp4_1 = C_0*w44; |
|
|
const double tmp5_1 = C_1*w49; |
|
|
const double tmp6_1 = C_1*w50; |
|
|
const double tmp7_1 = C_0*w46; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp4_1 + tmp5_1; |
|
|
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp4_1; |
|
|
EM_S[INDEX2(2,0,4)]+=tmp3_1 + tmp5_1; |
|
|
EM_S[INDEX2(3,0,4)]+=tmp1_1 + tmp3_1; |
|
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp0_1 + tmp5_1; |
|
|
EM_S[INDEX2(2,1,4)]+=tmp1_1 + tmp7_1; |
|
|
EM_S[INDEX2(3,1,4)]+=tmp5_1 + tmp7_1; |
|
|
EM_S[INDEX2(0,2,4)]+=tmp3_1 + tmp6_1; |
|
|
EM_S[INDEX2(1,2,4)]+=tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp4_1 + tmp6_1; |
|
|
EM_S[INDEX2(3,2,4)]+=tmp2_1 + tmp4_1; |
|
|
EM_S[INDEX2(0,3,4)]+=tmp2_1 + tmp7_1; |
|
|
EM_S[INDEX2(1,3,4)]+=tmp6_1 + tmp7_1; |
|
|
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp2_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp6_1; |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process D // |
|
|
/////////////// |
|
|
if (!D.isEmpty()) { |
|
|
add_EM_S=true; |
|
|
const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); |
|
|
if (D.actsExpanded()) { |
|
|
const double D_0 = D_p[0]; |
|
|
const double D_1 = D_p[1]; |
|
|
const double D_2 = D_p[2]; |
|
|
const double D_3 = D_p[3]; |
|
|
const double tmp0_0 = D_0 + D_1; |
|
|
const double tmp1_0 = D_2 + D_3; |
|
|
const double tmp2_0 = D_0 + D_1 + D_2 + D_3; |
|
|
const double tmp3_0 = D_1 + D_2; |
|
|
const double tmp4_0 = D_1 + D_3; |
|
|
const double tmp5_0 = D_0 + D_2; |
|
|
const double tmp6_0 = D_0 + D_3; |
|
|
const double tmp0_1 = tmp0_0*w52; |
|
|
const double tmp1_1 = tmp1_0*w53; |
|
|
const double tmp2_1 = tmp2_0*w54; |
|
|
const double tmp3_1 = tmp1_0*w52; |
|
|
const double tmp4_1 = tmp0_0*w53; |
|
|
const double tmp5_1 = tmp3_0*w54; |
|
|
const double tmp6_1 = D_0*w55; |
|
|
const double tmp7_1 = D_3*w56; |
|
|
const double tmp8_1 = D_3*w55; |
|
|
const double tmp9_1 = D_0*w56; |
|
|
const double tmp10_1 = tmp4_0*w52; |
|
|
const double tmp11_1 = tmp5_0*w53; |
|
|
const double tmp12_1 = tmp5_0*w52; |
|
|
const double tmp13_1 = tmp4_0*w53; |
|
|
const double tmp14_1 = tmp6_0*w54; |
|
|
const double tmp15_1 = D_2*w55; |
|
|
const double tmp16_1 = D_1*w56; |
|
|
const double tmp17_1 = D_1*w55; |
|
|
const double tmp18_1 = D_2*w56; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp6_1 + tmp7_1; |
|
|
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX2(2,0,4)]+=tmp12_1 + tmp13_1; |
|
|
EM_S[INDEX2(3,0,4)]+=tmp2_1; |
|
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp14_1 + tmp17_1 + tmp18_1; |
|
|
EM_S[INDEX2(2,1,4)]+=tmp2_1; |
|
|
EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1; |
|
|
EM_S[INDEX2(0,2,4)]+=tmp12_1 + tmp13_1; |
|
|
EM_S[INDEX2(1,2,4)]+=tmp2_1; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1; |
|
|
EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; |
|
|
EM_S[INDEX2(0,3,4)]+=tmp2_1; |
|
|
EM_S[INDEX2(1,3,4)]+=tmp10_1 + tmp11_1; |
|
|
EM_S[INDEX2(2,3,4)]+=tmp3_1 + tmp4_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp8_1 + tmp9_1; |
|
|
} else { // constant data |
|
|
const double tmp0_1 = D_p[0]*w57; |
|
|
const double tmp1_1 = D_p[0]*w58; |
|
|
const double tmp2_1 = D_p[0]*w59; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp2_1; |
|
|
EM_S[INDEX2(1,0,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(2,0,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(3,0,4)]+=tmp1_1; |
|
|
EM_S[INDEX2(0,1,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp2_1; |
|
|
EM_S[INDEX2(2,1,4)]+=tmp1_1; |
|
|
EM_S[INDEX2(3,1,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(0,2,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(1,2,4)]+=tmp1_1; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp2_1; |
|
|
EM_S[INDEX2(3,2,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(0,3,4)]+=tmp1_1; |
|
|
EM_S[INDEX2(1,3,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(2,3,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp2_1; |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process X // |
|
|
/////////////// |
|
|
if (!X.isEmpty()) { |
|
|
add_EM_F=true; |
|
|
const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); |
|
|
if (X.actsExpanded()) { |
|
|
const double X_0_0 = X_p[INDEX2(0,0,2)]; |
|
|
const double X_1_0 = X_p[INDEX2(1,0,2)]; |
|
|
const double X_0_1 = X_p[INDEX2(0,1,2)]; |
|
|
const double X_1_1 = X_p[INDEX2(1,1,2)]; |
|
|
const double X_0_2 = X_p[INDEX2(0,2,2)]; |
|
|
const double X_1_2 = X_p[INDEX2(1,2,2)]; |
|
|
const double X_0_3 = X_p[INDEX2(0,3,2)]; |
|
|
const double X_1_3 = X_p[INDEX2(1,3,2)]; |
|
|
const double tmp0_0 = X_0_2 + X_0_3; |
|
|
const double tmp1_0 = X_1_1 + X_1_3; |
|
|
const double tmp2_0 = X_1_0 + X_1_2; |
|
|
const double tmp3_0 = X_0_0 + X_0_1; |
|
|
const double tmp0_1 = tmp0_0*w63; |
|
|
const double tmp1_1 = tmp1_0*w62; |
|
|
const double tmp2_1 = tmp2_0*w61; |
|
|
const double tmp3_1 = tmp3_0*w60; |
|
|
const double tmp4_1 = tmp0_0*w65; |
|
|
const double tmp5_1 = tmp3_0*w64; |
|
|
const double tmp6_1 = tmp2_0*w62; |
|
|
const double tmp7_1 = tmp1_0*w61; |
|
|
const double tmp8_1 = tmp2_0*w66; |
|
|
const double tmp9_1 = tmp3_0*w63; |
|
|
const double tmp10_1 = tmp0_0*w60; |
|
|
const double tmp11_1 = tmp1_0*w67; |
|
|
const double tmp12_1 = tmp1_0*w66; |
|
|
const double tmp13_1 = tmp3_0*w65; |
|
|
const double tmp14_1 = tmp0_0*w64; |
|
|
const double tmp15_1 = tmp2_0*w67; |
|
|
EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
|
|
EM_F[1]+=tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1; |
|
|
EM_F[2]+=tmp10_1 + tmp11_1 + tmp8_1 + tmp9_1; |
|
|
EM_F[3]+=tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
|
|
} else { // constant data |
|
|
const double tmp0_1 = X_p[1]*w69; |
|
|
const double tmp1_1 = X_p[0]*w68; |
|
|
const double tmp2_1 = X_p[0]*w70; |
|
|
const double tmp3_1 = X_p[1]*w71; |
|
|
EM_F[0]+=tmp0_1 + tmp1_1; |
|
|
EM_F[1]+=tmp0_1 + tmp2_1; |
|
|
EM_F[2]+=tmp1_1 + tmp3_1; |
|
|
EM_F[3]+=tmp2_1 + tmp3_1; |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process Y // |
|
|
/////////////// |
|
|
if (!Y.isEmpty()) { |
|
|
add_EM_F=true; |
|
|
const double* Y_p=const_cast<escript::Data*>(&Y)->getSampleDataRO(e); |
|
|
if (Y.actsExpanded()) { |
|
|
const double Y_0 = Y_p[0]; |
|
|
const double Y_1 = Y_p[1]; |
|
|
const double Y_2 = Y_p[2]; |
|
|
const double Y_3 = Y_p[3]; |
|
|
const double tmp0_0 = Y_1 + Y_2; |
|
|
const double tmp1_0 = Y_0 + Y_3; |
|
|
const double tmp0_1 = Y_0*w72; |
|
|
const double tmp1_1 = tmp0_0*w73; |
|
|
const double tmp2_1 = Y_3*w74; |
|
|
const double tmp3_1 = Y_1*w72; |
|
|
const double tmp4_1 = tmp1_0*w73; |
|
|
const double tmp5_1 = Y_2*w74; |
|
|
const double tmp6_1 = Y_2*w72; |
|
|
const double tmp7_1 = Y_1*w74; |
|
|
const double tmp8_1 = Y_3*w72; |
|
|
const double tmp9_1 = Y_0*w74; |
|
|
EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1; |
|
|
EM_F[1]+=tmp3_1 + tmp4_1 + tmp5_1; |
|
|
EM_F[2]+=tmp4_1 + tmp6_1 + tmp7_1; |
|
|
EM_F[3]+=tmp1_1 + tmp8_1 + tmp9_1; |
|
|
} else { // constant data |
|
|
const double tmp0_1 = Y_p[0]*w75; |
|
|
EM_F[0]+=tmp0_1; |
|
|
EM_F[1]+=tmp0_1; |
|
|
EM_F[2]+=tmp0_1; |
|
|
EM_F[3]+=tmp0_1; |
|
|
} |
|
|
} |
|
2013 |
|
|
2014 |
// add to matrix (if add_EM_S) and RHS (if add_EM_F) |
namespace |
2015 |
const index_t firstNode=m_NN[0]*k1+k0; |
{ |
2016 |
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, firstNode); |
// Calculates a guassian blur colvolution matrix for 2D |
2017 |
} // end k0 loop |
// See wiki article on the subject |
2018 |
} // end k1 loop |
double* get2DGauss(unsigned radius, double sigma) |
2019 |
} // end of colouring |
{ |
2020 |
} // end of parallel region |
double* arr=new double[(radius*2+1)*(radius*2+1)]; |
2021 |
|
double common=M_1_PI*0.5*1/(sigma*sigma); |
2022 |
|
double total=0; |
2023 |
|
int r=static_cast<int>(radius); |
2024 |
|
for (int y=-r;y<=r;++y) |
2025 |
|
{ |
2026 |
|
for (int x=-r;x<=r;++x) |
2027 |
|
{ |
2028 |
|
arr[(x+r)+(y+r)*(r*2+1)]=common*exp(-(x*x+y*y)/(2*sigma*sigma)); |
2029 |
|
total+=arr[(x+r)+(y+r)*(r*2+1)]; |
2030 |
|
} |
2031 |
|
} |
2032 |
|
double invtotal=1/total; |
2033 |
|
for (size_t p=0;p<(radius*2+1)*(radius*2+1);++p) |
2034 |
|
{ |
2035 |
|
arr[p]*=invtotal; |
2036 |
|
} |
2037 |
|
return arr; |
2038 |
|
} |
2039 |
|
|
2040 |
|
// applies conv to source to get a point. |
2041 |
|
// (xp, yp) are the coords in the source matrix not the destination matrix |
2042 |
|
double Convolve2D(double* conv, double* source, size_t xp, size_t yp, unsigned radius, size_t width) |
2043 |
|
{ |
2044 |
|
size_t bx=xp-radius, by=yp-radius; |
2045 |
|
size_t sbase=bx+by*width; |
2046 |
|
double result=0; |
2047 |
|
for (int y=0;y<2*radius+1;++y) |
2048 |
|
{ |
2049 |
|
for (int x=0;x<2*radius+1;++x) |
2050 |
|
{ |
2051 |
|
result+=conv[x+y*(2*radius+1)] * source[sbase + x+y*width]; |
2052 |
|
} |
2053 |
|
} |
2054 |
|
return result; |
2055 |
|
} |
2056 |
} |
} |
2057 |
|
|
|
//protected |
|
|
void Rectangle::assemblePDESingleReduced(Paso_SystemMatrix* mat, |
|
|
escript::Data& rhs, const escript::Data& A, const escript::Data& B, |
|
|
const escript::Data& C, const escript::Data& D, |
|
|
const escript::Data& X, const escript::Data& Y) const |
|
|
{ |
|
|
const double w0 = -.25*m_dx[1]/m_dx[0]; |
|
|
const double w1 = .25; |
|
|
const double w2 = -.25; |
|
|
const double w3 = .25*m_dx[0]/m_dx[1]; |
|
|
const double w4 = -.25*m_dx[0]/m_dx[1]; |
|
|
const double w5 = .25*m_dx[1]/m_dx[0]; |
|
|
const double w6 = -.125*m_dx[1]; |
|
|
const double w7 = -.125*m_dx[0]; |
|
|
const double w8 = .125*m_dx[1]; |
|
|
const double w9 = .125*m_dx[0]; |
|
|
const double w10 = .0625*m_dx[0]*m_dx[1]; |
|
|
const double w11 = -.5*m_dx[1]; |
|
|
const double w12 = -.5*m_dx[0]; |
|
|
const double w13 = .5*m_dx[1]; |
|
|
const double w14 = .5*m_dx[0]; |
|
|
const double w15 = .25*m_dx[0]*m_dx[1]; |
|
2058 |
|
|
2059 |
rhs.requireWrite(); |
/* This is a wrapper for filtered (and non-filtered) randoms |
2060 |
#pragma omp parallel |
* For detailed doco see randomFillWorker |
2061 |
|
*/ |
2062 |
|
escript::Data Rectangle::randomFill(const escript::DataTypes::ShapeType& shape, |
2063 |
|
const escript::FunctionSpace& what, |
2064 |
|
long seed, const boost::python::tuple& filter) const |
2065 |
|
{ |
2066 |
|
int numvals=escript::DataTypes::noValues(shape); |
2067 |
|
if (len(filter)>0 && (numvals!=1)) |
2068 |
{ |
{ |
2069 |
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring |
throw RipleyException("Ripley only supports filters for scalar data."); |
2070 |
#pragma omp for |
} |
2071 |
for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) { |
escript::Data res=randomFillWorker(shape, seed, filter); |
2072 |
for (index_t k0=0; k0<m_NE[0]; ++k0) { |
if (res.getFunctionSpace()!=what) |
2073 |
bool add_EM_S=false; |
{ |
2074 |
bool add_EM_F=false; |
escript::Data r=escript::Data(res, what); |
2075 |
vector<double> EM_S(4*4, 0); |
return r; |
2076 |
vector<double> EM_F(4, 0); |
} |
2077 |
const index_t e = k0 + m_NE[0]*k1; |
return res; |
|
/////////////// |
|
|
// process A // |
|
|
/////////////// |
|
|
if (!A.isEmpty()) { |
|
|
add_EM_S=true; |
|
|
const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); |
|
|
const double A_00 = A_p[INDEX2(0,0,2)]; |
|
|
const double A_10 = A_p[INDEX2(1,0,2)]; |
|
|
const double A_01 = A_p[INDEX2(0,1,2)]; |
|
|
const double A_11 = A_p[INDEX2(1,1,2)]; |
|
|
const double tmp0_0 = A_01 + A_10; |
|
|
const double tmp0_1 = A_11*w3; |
|
|
const double tmp1_1 = A_00*w0; |
|
|
const double tmp2_1 = A_01*w1; |
|
|
const double tmp3_1 = A_10*w2; |
|
|
const double tmp4_1 = tmp0_0*w1; |
|
|
const double tmp5_1 = A_11*w4; |
|
|
const double tmp6_1 = A_00*w5; |
|
|
const double tmp7_1 = tmp0_0*w2; |
|
|
const double tmp8_1 = A_10*w1; |
|
|
const double tmp9_1 = A_01*w2; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp4_1 + tmp6_1; |
|
|
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1 + tmp8_1 + tmp9_1; |
|
|
EM_S[INDEX2(2,0,4)]+=tmp2_1 + tmp3_1 + tmp5_1 + tmp6_1; |
|
|
EM_S[INDEX2(3,0,4)]+=tmp1_1 + tmp5_1 + tmp7_1; |
|
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp0_1 + tmp6_1 + tmp7_1; |
|
|
EM_S[INDEX2(2,1,4)]+=tmp1_1 + tmp4_1 + tmp5_1; |
|
|
EM_S[INDEX2(3,1,4)]+=tmp5_1 + tmp6_1 + tmp8_1 + tmp9_1; |
|
|
EM_S[INDEX2(0,2,4)]+=tmp5_1 + tmp6_1 + tmp8_1 + tmp9_1; |
|
|
EM_S[INDEX2(1,2,4)]+=tmp1_1 + tmp4_1 + tmp5_1; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp0_1 + tmp6_1 + tmp7_1; |
|
|
EM_S[INDEX2(3,2,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX2(0,3,4)]+=tmp1_1 + tmp5_1 + tmp7_1; |
|
|
EM_S[INDEX2(1,3,4)]+=tmp2_1 + tmp3_1 + tmp5_1 + tmp6_1; |
|
|
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp1_1 + tmp8_1 + tmp9_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp4_1 + tmp6_1; |
|
|
} |
|
|
/////////////// |
|
|
// process B // |
|
|
/////////////// |
|
|
if (!B.isEmpty()) { |
|
|
add_EM_S=true; |
|
|
const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); |
|
|
const double tmp2_1 = B_p[0]*w8; |
|
|
const double tmp0_1 = B_p[0]*w6; |
|
|
const double tmp3_1 = B_p[1]*w9; |
|
|
const double tmp1_1 = B_p[1]*w7; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp2_1; |
|
|
EM_S[INDEX2(2,0,4)]+=tmp0_1 + tmp3_1; |
|
|
EM_S[INDEX2(3,0,4)]+=tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp1_1 + tmp2_1; |
|
|
EM_S[INDEX2(2,1,4)]+=tmp0_1 + tmp3_1; |
|
|
EM_S[INDEX2(3,1,4)]+=tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX2(0,2,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX2(1,2,4)]+=tmp1_1 + tmp2_1; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp0_1 + tmp3_1; |
|
|
EM_S[INDEX2(3,2,4)]+=tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX2(0,3,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX2(1,3,4)]+=tmp1_1 + tmp2_1; |
|
|
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp3_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp2_1 + tmp3_1; |
|
|
} |
|
|
/////////////// |
|
|
// process C // |
|
|
/////////////// |
|
|
if (!C.isEmpty()) { |
|
|
add_EM_S=true; |
|
|
const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); |
|
|
const double tmp1_1 = C_p[1]*w7; |
|
|
const double tmp0_1 = C_p[0]*w8; |
|
|
const double tmp3_1 = C_p[0]*w6; |
|
|
const double tmp2_1 = C_p[1]*w9; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp1_1 + tmp3_1; |
|
|
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp3_1; |
|
|
EM_S[INDEX2(2,0,4)]+=tmp1_1 + tmp3_1; |
|
|
EM_S[INDEX2(3,0,4)]+=tmp1_1 + tmp3_1; |
|
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX2(2,1,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX2(3,1,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX2(0,2,4)]+=tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX2(1,2,4)]+=tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX2(3,2,4)]+=tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX2(0,3,4)]+=tmp0_1 + tmp2_1; |
|
|
EM_S[INDEX2(1,3,4)]+=tmp0_1 + tmp2_1; |
|
|
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp2_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp2_1; |
|
|
} |
|
|
/////////////// |
|
|
// process D // |
|
|
/////////////// |
|
|
if (!D.isEmpty()) { |
|
|
add_EM_S=true; |
|
|
const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); |
|
|
const double tmp0_1 = D_p[0]*w10; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(1,0,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(2,0,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(3,0,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(0,1,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(2,1,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(3,1,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(0,2,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(1,2,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(3,2,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(0,3,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(1,3,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(2,3,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp0_1; |
|
|
} |
|
|
/////////////// |
|
|
// process X // |
|
|
/////////////// |
|
|
if (!X.isEmpty()) { |
|
|
add_EM_F=true; |
|
|
const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); |
|
|
const double tmp0_1 = X_p[0]*w11; |
|
|
const double tmp2_1 = X_p[0]*w13; |
|
|
const double tmp1_1 = X_p[1]*w12; |
|
|
const double tmp3_1 = X_p[1]*w14; |
|
|
EM_F[0]+=tmp0_1 + tmp1_1; |
|
|
EM_F[1]+=tmp1_1 + tmp2_1; |
|
|
EM_F[2]+=tmp0_1 + tmp3_1; |
|
|
EM_F[3]+=tmp2_1 + tmp3_1; |
|
|
} |
|
|
/////////////// |
|
|
// process Y // |
|
|
/////////////// |
|
|
if (!Y.isEmpty()) { |
|
|
add_EM_F=true; |
|
|
const double* Y_p=const_cast<escript::Data*>(&Y)->getSampleDataRO(e); |
|
|
const double tmp0_1 = Y_p[0]*w15; |
|
|
EM_F[0]+=tmp0_1; |
|
|
EM_F[1]+=tmp0_1; |
|
|
EM_F[2]+=tmp0_1; |
|
|
EM_F[3]+=tmp0_1; |
|
|
} |
|
|
|
|
|
// add to matrix (if add_EM_S) and RHS (if add_EM_F) |
|
|
const index_t firstNode=m_NN[0]*k1+k0; |
|
|
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, firstNode); |
|
|
} // end k0 loop |
|
|
} // end k1 loop |
|
|
} // end of colouring |
|
|
} // end of parallel region |
|
2078 |
} |
} |
2079 |
|
|
|
//protected |
|
|
void Rectangle::assemblePDESystem(Paso_SystemMatrix* mat, |
|
|
escript::Data& rhs, const escript::Data& A, const escript::Data& B, |
|
|
const escript::Data& C, const escript::Data& D, |
|
|
const escript::Data& X, const escript::Data& Y) const |
|
|
{ |
|
|
dim_t numEq, numComp; |
|
|
if (!mat) |
|
|
numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize()); |
|
|
else { |
|
|
numEq=mat->logical_row_block_size; |
|
|
numComp=mat->logical_col_block_size; |
|
|
} |
|
|
|
|
|
const double w0 = -0.1555021169820365539*m_dx[1]/m_dx[0]; |
|
|
const double w1 = 0.041666666666666666667; |
|
|
const double w2 = -0.15550211698203655390; |
|
|
const double w3 = 0.041666666666666666667*m_dx[0]/m_dx[1]; |
|
|
const double w4 = 0.15550211698203655390; |
|
|
const double w5 = -0.041666666666666666667; |
|
|
const double w6 = -0.01116454968463011277*m_dx[1]/m_dx[0]; |
|
|
const double w7 = 0.011164549684630112770; |
|
|
const double w8 = -0.011164549684630112770; |
|
|
const double w9 = -0.041666666666666666667*m_dx[1]/m_dx[0]; |
|
|
const double w10 = -0.041666666666666666667*m_dx[0]/m_dx[1]; |
|
|
const double w11 = 0.1555021169820365539*m_dx[1]/m_dx[0]; |
|
|
const double w12 = 0.1555021169820365539*m_dx[0]/m_dx[1]; |
|
|
const double w13 = 0.01116454968463011277*m_dx[0]/m_dx[1]; |
|
|
const double w14 = 0.01116454968463011277*m_dx[1]/m_dx[0]; |
|
|
const double w15 = 0.041666666666666666667*m_dx[1]/m_dx[0]; |
|
|
const double w16 = -0.01116454968463011277*m_dx[0]/m_dx[1]; |
|
|
const double w17 = -0.1555021169820365539*m_dx[0]/m_dx[1]; |
|
|
const double w18 = -0.33333333333333333333*m_dx[1]/m_dx[0]; |
|
|
const double w19 = 0.25000000000000000000; |
|
|
const double w20 = -0.25000000000000000000; |
|
|
const double w21 = 0.16666666666666666667*m_dx[0]/m_dx[1]; |
|
|
const double w22 = -0.16666666666666666667*m_dx[1]/m_dx[0]; |
|
|
const double w23 = -0.16666666666666666667*m_dx[0]/m_dx[1]; |
|
|
const double w24 = 0.33333333333333333333*m_dx[1]/m_dx[0]; |
|
|
const double w25 = 0.33333333333333333333*m_dx[0]/m_dx[1]; |
|
|
const double w26 = 0.16666666666666666667*m_dx[1]/m_dx[0]; |
|
|
const double w27 = -0.33333333333333333333*m_dx[0]/m_dx[1]; |
|
|
const double w28 = -0.032861463941450536761*m_dx[1]; |
|
|
const double w29 = -0.032861463941450536761*m_dx[0]; |
|
|
const double w30 = -0.12264065304058601714*m_dx[1]; |
|
|
const double w31 = -0.0023593469594139828636*m_dx[1]; |
|
|
const double w32 = -0.008805202725216129906*m_dx[0]; |
|
|
const double w33 = -0.008805202725216129906*m_dx[1]; |
|
|
const double w34 = 0.032861463941450536761*m_dx[1]; |
|
|
const double w35 = 0.008805202725216129906*m_dx[1]; |
|
|
const double w36 = 0.008805202725216129906*m_dx[0]; |
|
|
const double w37 = 0.0023593469594139828636*m_dx[1]; |
|
|
const double w38 = 0.12264065304058601714*m_dx[1]; |
|
|
const double w39 = 0.032861463941450536761*m_dx[0]; |
|
|
const double w40 = -0.12264065304058601714*m_dx[0]; |
|
|
const double w41 = -0.0023593469594139828636*m_dx[0]; |
|
|
const double w42 = 0.0023593469594139828636*m_dx[0]; |
|
|
const double w43 = 0.12264065304058601714*m_dx[0]; |
|
|
const double w44 = -0.16666666666666666667*m_dx[1]; |
|
|
const double w45 = -0.083333333333333333333*m_dx[0]; |
|
|
const double w46 = 0.083333333333333333333*m_dx[1]; |
|
|
const double w47 = 0.16666666666666666667*m_dx[1]; |
|
|
const double w48 = 0.083333333333333333333*m_dx[0]; |
|
|
const double w49 = -0.16666666666666666667*m_dx[0]; |
|
|
const double w50 = 0.16666666666666666667*m_dx[0]; |
|
|
const double w51 = -0.083333333333333333333*m_dx[1]; |
|
|
const double w52 = 0.025917019497006092316*m_dx[0]*m_dx[1]; |
|
|
const double w53 = 0.0018607582807716854616*m_dx[0]*m_dx[1]; |
|
|
const double w54 = 0.0069444444444444444444*m_dx[0]*m_dx[1]; |
|
|
const double w55 = 0.09672363354357992482*m_dx[0]*m_dx[1]; |
|
|
const double w56 = 0.00049858867864229740201*m_dx[0]*m_dx[1]; |
|
|
const double w57 = 0.055555555555555555556*m_dx[0]*m_dx[1]; |
|
|
const double w58 = 0.027777777777777777778*m_dx[0]*m_dx[1]; |
|
|
const double w59 = 0.11111111111111111111*m_dx[0]*m_dx[1]; |
|
|
const double w60 = -0.19716878364870322056*m_dx[1]; |
|
|
const double w61 = -0.19716878364870322056*m_dx[0]; |
|
|
const double w62 = -0.052831216351296779436*m_dx[0]; |
|
|
const double w63 = -0.052831216351296779436*m_dx[1]; |
|
|
const double w64 = 0.19716878364870322056*m_dx[1]; |
|
|
const double w65 = 0.052831216351296779436*m_dx[1]; |
|
|
const double w66 = 0.19716878364870322056*m_dx[0]; |
|
|
const double w67 = 0.052831216351296779436*m_dx[0]; |
|
|
const double w68 = -0.5*m_dx[1]; |
|
|
const double w69 = -0.5*m_dx[0]; |
|
|
const double w70 = 0.5*m_dx[1]; |
|
|
const double w71 = 0.5*m_dx[0]; |
|
|
const double w72 = 0.1555021169820365539*m_dx[0]*m_dx[1]; |
|
|
const double w73 = 0.041666666666666666667*m_dx[0]*m_dx[1]; |
|
|
const double w74 = 0.01116454968463011277*m_dx[0]*m_dx[1]; |
|
|
const double w75 = 0.25*m_dx[0]*m_dx[1]; |
|
2080 |
|
|
2081 |
rhs.requireWrite(); |
/* This routine produces a Data object filled with smoothed random data. |
2082 |
#pragma omp parallel |
The dimensions of the rectangle being filled are internal[0] x internal[1] points. |
2083 |
{ |
A parameter radius gives the size of the stencil used for the smoothing. |
2084 |
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring |
A point on the left hand edge for example, will still require `radius` extra points to the left |
2085 |
#pragma omp for |
in order to complete the stencil. |
|
for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) { |
|
|
for (index_t k0=0; k0<m_NE[0]; ++k0) { |
|
|
bool add_EM_S=false; |
|
|
bool add_EM_F=false; |
|
|
vector<double> EM_S(4*4*numEq*numComp, 0); |
|
|
vector<double> EM_F(4*numEq, 0); |
|
|
const index_t e = k0 + m_NE[0]*k1; |
|
|
/////////////// |
|
|
// process A // |
|
|
/////////////// |
|
|
if (!A.isEmpty()) { |
|
|
add_EM_S=true; |
|
|
const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); |
|
|
if (A.actsExpanded()) { |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double A_00_0 = A_p[INDEX5(k,0,m,0,0, numEq,2,numComp,2)]; |
|
|
const double A_01_0 = A_p[INDEX5(k,0,m,1,0, numEq,2,numComp,2)]; |
|
|
const double A_10_0 = A_p[INDEX5(k,1,m,0,0, numEq,2,numComp,2)]; |
|
|
const double A_11_0 = A_p[INDEX5(k,1,m,1,0, numEq,2,numComp,2)]; |
|
|
const double A_00_1 = A_p[INDEX5(k,0,m,0,1, numEq,2,numComp,2)]; |
|
|
const double A_01_1 = A_p[INDEX5(k,0,m,1,1, numEq,2,numComp,2)]; |
|
|
const double A_10_1 = A_p[INDEX5(k,1,m,0,1, numEq,2,numComp,2)]; |
|
|
const double A_11_1 = A_p[INDEX5(k,1,m,1,1, numEq,2,numComp,2)]; |
|
|
const double A_00_2 = A_p[INDEX5(k,0,m,0,2, numEq,2,numComp,2)]; |
|
|
const double A_01_2 = A_p[INDEX5(k,0,m,1,2, numEq,2,numComp,2)]; |
|
|
const double A_10_2 = A_p[INDEX5(k,1,m,0,2, numEq,2,numComp,2)]; |
|
|
const double A_11_2 = A_p[INDEX5(k,1,m,1,2, numEq,2,numComp,2)]; |
|
|
const double A_00_3 = A_p[INDEX5(k,0,m,0,3, numEq,2,numComp,2)]; |
|
|
const double A_01_3 = A_p[INDEX5(k,0,m,1,3, numEq,2,numComp,2)]; |
|
|
const double A_10_3 = A_p[INDEX5(k,1,m,0,3, numEq,2,numComp,2)]; |
|
|
const double A_11_3 = A_p[INDEX5(k,1,m,1,3, numEq,2,numComp,2)]; |
|
|
const double tmp0_0 = A_01_0 + A_01_3; |
|
|
const double tmp1_0 = A_00_0 + A_00_1; |
|
|
const double tmp2_0 = A_11_0 + A_11_1 + A_11_2 + A_11_3; |
|
|
const double tmp3_0 = A_00_2 + A_00_3; |
|
|
const double tmp4_0 = A_10_1 + A_10_2; |
|
|
const double tmp5_0 = A_00_0 + A_00_1 + A_00_2 + A_00_3; |
|
|
const double tmp6_0 = A_01_3 + A_10_0; |
|
|
const double tmp7_0 = A_01_0 + A_10_3; |
|
|
const double tmp8_0 = A_01_1 + A_01_2 + A_10_1 + A_10_2; |
|
|
const double tmp9_0 = A_01_0 + A_10_0; |
|
|
const double tmp10_0 = A_01_3 + A_10_3; |
|
|
const double tmp11_0 = A_11_1 + A_11_3; |
|
|
const double tmp12_0 = A_11_0 + A_11_2; |
|
|
const double tmp13_0 = A_01_2 + A_10_1; |
|
|
const double tmp14_0 = A_01_0 + A_01_3 + A_10_0 + A_10_3; |
|
|
const double tmp15_0 = A_01_1 + A_10_2; |
|
|
const double tmp16_0 = A_10_0 + A_10_3; |
|
|
const double tmp17_0 = A_01_1 + A_01_2; |
|
|
const double tmp18_0 = A_01_1 + A_10_1; |
|
|
const double tmp19_0 = A_01_2 + A_10_2; |
|
|
const double tmp0_1 = A_10_3*w8; |
|
|
const double tmp1_1 = tmp0_0*w1; |
|
|
const double tmp2_1 = A_01_1*w4; |
|
|
const double tmp3_1 = tmp1_0*w0; |
|
|
const double tmp4_1 = A_01_2*w7; |
|
|
const double tmp5_1 = tmp2_0*w3; |
|
|
const double tmp6_1 = tmp3_0*w6; |
|
|
const double tmp7_1 = A_10_0*w2; |
|
|
const double tmp8_1 = tmp4_0*w5; |
|
|
const double tmp9_1 = tmp2_0*w10; |
|
|
const double tmp10_1 = tmp5_0*w9; |
|
|
const double tmp11_1 = tmp6_0*w7; |
|
|
const double tmp12_1 = tmp7_0*w4; |
|
|
const double tmp13_1 = tmp8_0*w1; |
|
|
const double tmp14_1 = A_10_0*w8; |
|
|
const double tmp15_1 = A_01_2*w4; |
|
|
const double tmp16_1 = tmp3_0*w0; |
|
|
const double tmp17_1 = A_01_1*w7; |
|
|
const double tmp18_1 = tmp1_0*w6; |
|
|
const double tmp19_1 = A_10_3*w2; |
|
|
const double tmp20_1 = tmp9_0*w4; |
|
|
const double tmp21_1 = tmp1_0*w11; |
|
|
const double tmp22_1 = tmp10_0*w7; |
|
|
const double tmp23_1 = tmp3_0*w14; |
|
|
const double tmp24_1 = tmp11_0*w13; |
|
|
const double tmp25_1 = tmp12_0*w12; |
|
|
const double tmp26_1 = tmp10_0*w4; |
|
|
const double tmp27_1 = tmp3_0*w11; |
|
|
const double tmp28_1 = tmp9_0*w7; |
|
|
const double tmp29_1 = tmp1_0*w14; |
|
|
const double tmp30_1 = tmp12_0*w13; |
|
|
const double tmp31_1 = tmp11_0*w12; |
|
|
const double tmp32_1 = tmp13_0*w2; |
|
|
const double tmp33_1 = tmp14_0*w5; |
|
|
const double tmp34_1 = tmp15_0*w8; |
|
|
const double tmp35_1 = A_01_0*w8; |
|
|
const double tmp36_1 = tmp16_0*w1; |
|
|
const double tmp37_1 = A_10_1*w4; |
|
|
const double tmp38_1 = tmp5_0*w15; |
|
|
const double tmp39_1 = A_10_2*w7; |
|
|
const double tmp40_1 = tmp11_0*w17; |
|
|
const double tmp41_1 = A_01_3*w2; |
|
|
const double tmp42_1 = tmp12_0*w16; |
|
|
const double tmp43_1 = tmp17_0*w5; |
|
|
const double tmp44_1 = tmp7_0*w7; |
|
|
const double tmp45_1 = tmp6_0*w4; |
|
|
const double tmp46_1 = A_01_3*w8; |
|
|
const double tmp47_1 = A_10_2*w4; |
|
|
const double tmp48_1 = A_10_1*w7; |
|
|
const double tmp49_1 = tmp12_0*w17; |
|
|
const double tmp50_1 = A_01_0*w2; |
|
|
const double tmp51_1 = tmp11_0*w16; |
|
|
const double tmp52_1 = tmp18_0*w8; |
|
|
const double tmp53_1 = tmp19_0*w2; |
|
|
const double tmp54_1 = tmp13_0*w8; |
|
|
const double tmp55_1 = tmp15_0*w2; |
|
|
const double tmp56_1 = tmp19_0*w8; |
|
|
const double tmp57_1 = tmp18_0*w2; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp13_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1 + tmp24_1 + tmp25_1; |
|
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp36_1 + tmp37_1 + tmp39_1 + tmp3_1 + tmp43_1 + tmp46_1 + tmp50_1 + tmp5_1 + tmp6_1; |
|
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp0_1 + tmp15_1 + tmp17_1 + tmp1_1 + tmp38_1 + tmp49_1 + tmp51_1 + tmp7_1 + tmp8_1; |
|
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp10_1 + tmp32_1 + tmp33_1 + tmp34_1 + tmp9_1; |
|
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1 + tmp8_1; |
|
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp21_1 + tmp23_1 + tmp30_1 + tmp31_1 + tmp33_1 + tmp56_1 + tmp57_1; |
|
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp10_1 + tmp13_1 + tmp44_1 + tmp45_1 + tmp9_1; |
|
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1 + tmp40_1 + tmp41_1 + tmp42_1 + tmp43_1; |
|
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp36_1 + tmp38_1 + tmp43_1 + tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
|
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp9_1; |
|
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp24_1 + tmp25_1 + tmp27_1 + tmp29_1 + tmp33_1 + tmp52_1 + tmp53_1; |
|
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp14_1 + tmp15_1 + tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp1_1 + tmp5_1 + tmp8_1; |
|
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp10_1 + tmp33_1 + tmp54_1 + tmp55_1 + tmp9_1; |
|
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp14_1 + tmp19_1 + tmp1_1 + tmp2_1 + tmp38_1 + tmp40_1 + tmp42_1 + tmp4_1 + tmp8_1; |
|
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp16_1 + tmp18_1 + tmp35_1 + tmp36_1 + tmp41_1 + tmp43_1 + tmp47_1 + tmp48_1 + tmp5_1; |
|
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp13_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
|
|
} |
|
|
} |
|
|
} else { // constant data |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double A_00 = A_p[INDEX4(k,0,m,0, numEq,2, numComp)]; |
|
|
const double A_01 = A_p[INDEX4(k,0,m,1, numEq,2, numComp)]; |
|
|
const double A_10 = A_p[INDEX4(k,1,m,0, numEq,2, numComp)]; |
|
|
const double A_11 = A_p[INDEX4(k,1,m,1, numEq,2, numComp)]; |
|
|
const double tmp0_0 = A_01 + A_10; |
|
|
const double tmp0_1 = A_00*w18; |
|
|
const double tmp1_1 = A_01*w19; |
|
|
const double tmp2_1 = A_10*w20; |
|
|
const double tmp3_1 = A_11*w21; |
|
|
const double tmp4_1 = A_00*w22; |
|
|
const double tmp5_1 = tmp0_0*w19; |
|
|
const double tmp6_1 = A_11*w23; |
|
|
const double tmp7_1 = A_11*w25; |
|
|
const double tmp8_1 = A_00*w24; |
|
|
const double tmp9_1 = tmp0_0*w20; |
|
|
const double tmp10_1 = A_01*w20; |
|
|
const double tmp11_1 = A_11*w27; |
|
|
const double tmp12_1 = A_00*w26; |
|
|
const double tmp13_1 = A_10*w19; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp5_1 + tmp7_1 + tmp8_1; |
|
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; |
|
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp4_1 + tmp6_1 + tmp9_1; |
|
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp7_1 + tmp8_1 + tmp9_1; |
|
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp4_1 + tmp5_1 + tmp6_1; |
|
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; |
|
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; |
|
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp4_1 + tmp5_1 + tmp6_1; |
|
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp7_1 + tmp8_1 + tmp9_1; |
|
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp4_1 + tmp6_1 + tmp9_1; |
|
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; |
|
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp5_1 + tmp7_1 + tmp8_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process B // |
|
|
/////////////// |
|
|
if (!B.isEmpty()) { |
|
|
add_EM_S=true; |
|
|
const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); |
|
|
if (B.actsExpanded()) { |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double B_0_0 = B_p[INDEX4(k,0,m,0, numEq,2,numComp)]; |
|
|
const double B_1_0 = B_p[INDEX4(k,1,m,0, numEq,2,numComp)]; |
|
|
const double B_0_1 = B_p[INDEX4(k,0,m,1, numEq,2,numComp)]; |
|
|
const double B_1_1 = B_p[INDEX4(k,1,m,1, numEq,2,numComp)]; |
|
|
const double B_0_2 = B_p[INDEX4(k,0,m,2, numEq,2,numComp)]; |
|
|
const double B_1_2 = B_p[INDEX4(k,1,m,2, numEq,2,numComp)]; |
|
|
const double B_0_3 = B_p[INDEX4(k,0,m,3, numEq,2,numComp)]; |
|
|
const double B_1_3 = B_p[INDEX4(k,1,m,3, numEq,2,numComp)]; |
|
|
const double tmp0_0 = B_1_0 + B_1_1; |
|
|
const double tmp1_0 = B_1_2 + B_1_3; |
|
|
const double tmp2_0 = B_0_1 + B_0_3; |
|
|
const double tmp3_0 = B_0_0 + B_0_2; |
|
|
const double tmp63_1 = B_1_1*w42; |
|
|
const double tmp79_1 = B_1_1*w40; |
|
|
const double tmp37_1 = tmp3_0*w35; |
|
|
const double tmp8_1 = tmp0_0*w32; |
|
|
const double tmp71_1 = B_0_1*w34; |
|
|
const double tmp19_1 = B_0_3*w31; |
|
|
const double tmp15_1 = B_0_3*w34; |
|
|
const double tmp9_1 = tmp3_0*w34; |
|
|
const double tmp35_1 = B_1_0*w36; |
|
|
const double tmp66_1 = B_0_3*w28; |
|
|
const double tmp28_1 = B_1_0*w42; |
|
|
const double tmp22_1 = B_1_0*w40; |
|
|
const double tmp16_1 = B_1_2*w29; |
|
|
const double tmp6_1 = tmp2_0*w35; |
|
|
const double tmp55_1 = B_1_3*w40; |
|
|
const double tmp50_1 = B_1_3*w42; |
|
|
const double tmp7_1 = tmp1_0*w29; |
|
|
const double tmp1_1 = tmp1_0*w32; |
|
|
const double tmp57_1 = B_0_3*w30; |
|
|
const double tmp18_1 = B_1_1*w32; |
|
|
const double tmp53_1 = B_1_0*w41; |
|
|
const double tmp61_1 = B_1_3*w36; |
|
|
const double tmp27_1 = B_0_3*w38; |
|
|
const double tmp64_1 = B_0_2*w30; |
|
|
const double tmp76_1 = B_0_1*w38; |
|
|
const double tmp39_1 = tmp2_0*w34; |
|
|
const double tmp62_1 = B_0_1*w31; |
|
|
const double tmp56_1 = B_0_0*w31; |
|
|
const double tmp49_1 = B_1_1*w36; |
|
|
const double tmp2_1 = B_0_2*w31; |
|
|
const double tmp23_1 = B_0_2*w33; |
|
|
const double tmp38_1 = B_1_1*w43; |
|
|
const double tmp74_1 = B_1_2*w41; |
|
|
const double tmp43_1 = B_1_1*w41; |
|
|
const double tmp58_1 = B_0_2*w28; |
|
|
const double tmp67_1 = B_0_0*w33; |
|
|
const double tmp33_1 = tmp0_0*w39; |
|
|
const double tmp4_1 = B_0_0*w28; |
|
|
const double tmp20_1 = B_0_0*w30; |
|
|
const double tmp13_1 = B_0_2*w38; |
|
|
const double tmp65_1 = B_1_2*w43; |
|
|
const double tmp0_1 = tmp0_0*w29; |
|
|
const double tmp41_1 = tmp3_0*w33; |
|
|
const double tmp73_1 = B_0_2*w37; |
|
|
const double tmp69_1 = B_0_0*w38; |
|
|
const double tmp48_1 = B_1_2*w39; |
|
|
const double tmp59_1 = B_0_1*w33; |
|
|
const double tmp17_1 = B_1_3*w41; |
|
|
const double tmp5_1 = B_0_3*w33; |
|
|
const double tmp3_1 = B_0_1*w30; |
|
|
const double tmp21_1 = B_0_1*w28; |
|
|
const double tmp42_1 = B_1_0*w29; |
|
|
const double tmp54_1 = B_1_2*w32; |
|
|
const double tmp60_1 = B_1_0*w39; |
|
|
const double tmp32_1 = tmp1_0*w36; |
|
|
const double tmp10_1 = B_0_1*w37; |
|
|
const double tmp14_1 = B_0_0*w35; |
|
|
const double tmp29_1 = B_0_1*w35; |
|
|
const double tmp26_1 = B_1_2*w36; |
|
|
const double tmp30_1 = B_1_3*w43; |
|
|
const double tmp70_1 = B_0_2*w35; |
|
|
const double tmp34_1 = B_1_3*w39; |
|
|
const double tmp51_1 = B_1_0*w43; |
|
|
const double tmp31_1 = B_0_2*w34; |
|
|
const double tmp45_1 = tmp3_0*w28; |
|
|
const double tmp11_1 = tmp1_0*w39; |
|
|
const double tmp52_1 = B_1_1*w29; |
|
|
const double tmp44_1 = B_1_3*w32; |
|
|
const double tmp25_1 = B_1_1*w39; |
|
|
const double tmp47_1 = tmp2_0*w33; |
|
|
const double tmp72_1 = B_1_3*w29; |
|
|
const double tmp40_1 = tmp2_0*w28; |
|
|
const double tmp46_1 = B_1_2*w40; |
|
|
const double tmp36_1 = B_1_2*w42; |
|
|
const double tmp24_1 = B_0_0*w37; |
|
|
const double tmp77_1 = B_0_3*w35; |
|
|
const double tmp68_1 = B_0_3*w37; |
|
|
const double tmp78_1 = B_0_0*w34; |
|
|
const double tmp12_1 = tmp0_0*w36; |
|
|
const double tmp75_1 = B_1_0*w32; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; |
|
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; |
|
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp45_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
|
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp6_1 + tmp9_1; |
|
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; |
|
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; |
|
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; |
|
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; |
|
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; |
|
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; |
|
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; |
|
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
|
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp40_1 + tmp41_1 + tmp7_1 + tmp8_1; |
|
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp37_1 + tmp39_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; |
|
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp11_1 + tmp12_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; |
|
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
|
|
} |
|
|
} |
|
|
} else { // constant data |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double B_0 = B_p[INDEX3(k,0,m, numEq, 2)]; |
|
|
const double B_1 = B_p[INDEX3(k,1,m, numEq, 2)]; |
|
|
const double tmp6_1 = B_1*w50; |
|
|
const double tmp1_1 = B_1*w45; |
|
|
const double tmp5_1 = B_1*w49; |
|
|
const double tmp4_1 = B_1*w48; |
|
|
const double tmp0_1 = B_0*w44; |
|
|
const double tmp2_1 = B_0*w46; |
|
|
const double tmp7_1 = B_0*w51; |
|
|
const double tmp3_1 = B_0*w47; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp0_1 + tmp5_1; |
|
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp1_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp6_1 + tmp7_1; |
|
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2_1 + tmp4_1; |
|
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp3_1 + tmp5_1; |
|
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp4_1 + tmp7_1; |
|
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp2_1 + tmp6_1; |
|
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp5_1 + tmp7_1; |
|
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp1_1 + tmp2_1; |
|
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp0_1 + tmp6_1; |
|
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp3_1 + tmp4_1; |
|
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp1_1 + tmp7_1; |
|
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp2_1 + tmp5_1; |
|
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp4_1; |
|
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp3_1 + tmp6_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process C // |
|
|
/////////////// |
|
|
if (!C.isEmpty()) { |
|
|
add_EM_S=true; |
|
|
const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); |
|
|
if (C.actsExpanded()) { |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double C_0_0 = C_p[INDEX4(k,m,0, 0, numEq,numComp,2)]; |
|
|
const double C_1_0 = C_p[INDEX4(k,m,1, 0, numEq,numComp,2)]; |
|
|
const double C_0_1 = C_p[INDEX4(k,m,0, 1, numEq,numComp,2)]; |
|
|
const double C_1_1 = C_p[INDEX4(k,m,1, 1, numEq,numComp,2)]; |
|
|
const double C_0_2 = C_p[INDEX4(k,m,0, 2, numEq,numComp,2)]; |
|
|
const double C_1_2 = C_p[INDEX4(k,m,1, 2, numEq,numComp,2)]; |
|
|
const double C_0_3 = C_p[INDEX4(k,m,0, 3, numEq,numComp,2)]; |
|
|
const double C_1_3 = C_p[INDEX4(k,m,1, 3, numEq,numComp,2)]; |
|
|
const double tmp2_0 = C_0_1 + C_0_3; |
|
|
const double tmp1_0 = C_1_2 + C_1_3; |
|
|
const double tmp3_0 = C_0_0 + C_0_2; |
|
|
const double tmp0_0 = C_1_0 + C_1_1; |
|
|
const double tmp64_1 = C_0_2*w30; |
|
|
const double tmp14_1 = C_0_2*w28; |
|
|
const double tmp19_1 = C_0_3*w31; |
|
|
const double tmp22_1 = C_1_0*w40; |
|
|
const double tmp37_1 = tmp3_0*w35; |
|
|
const double tmp29_1 = C_0_1*w35; |
|
|
const double tmp73_1 = C_0_2*w37; |
|
|
const double tmp74_1 = C_1_2*w41; |
|
|
const double tmp52_1 = C_1_3*w39; |
|
|
const double tmp25_1 = C_1_1*w39; |
|
|
const double tmp62_1 = C_0_1*w31; |
|
|
const double tmp79_1 = C_1_1*w40; |
|
|
const double tmp43_1 = C_1_1*w36; |
|
|
const double tmp27_1 = C_0_3*w38; |
|
|
const double tmp28_1 = C_1_0*w42; |
|
|
const double tmp63_1 = C_1_1*w42; |
|
|
const double tmp59_1 = C_0_3*w34; |
|
|
const double tmp72_1 = C_1_3*w29; |
|
|
const double tmp40_1 = tmp2_0*w35; |
|
|
const double tmp13_1 = C_0_3*w30; |
|
|
const double tmp51_1 = C_1_2*w40; |
|
|
const double tmp54_1 = C_1_2*w42; |
|
|
const double tmp12_1 = C_0_0*w31; |
|
|
const double tmp2_1 = tmp1_0*w32; |
|
|
const double tmp68_1 = C_0_2*w31; |
|
|
const double tmp75_1 = C_1_0*w32; |
|
|
const double tmp49_1 = C_1_1*w41; |
|
|
const double tmp4_1 = C_0_2*w35; |
|
|
const double tmp66_1 = C_0_3*w28; |
|
|
const double tmp56_1 = C_0_1*w37; |
|
|
const double tmp5_1 = C_0_1*w34; |
|
|
const double tmp38_1 = tmp2_0*w34; |
|
|
const double tmp76_1 = C_0_1*w38; |
|
|
const double tmp21_1 = C_0_1*w28; |
|
|
const double tmp69_1 = C_0_1*w30; |
|
|
const double tmp53_1 = C_1_0*w36; |
|
|
const double tmp42_1 = C_1_2*w39; |
|
|
const double tmp32_1 = tmp1_0*w29; |
|
|
const double tmp45_1 = C_1_0*w43; |
|
|
const double tmp33_1 = tmp0_0*w32; |
|
|
const double tmp35_1 = C_1_0*w41; |
|
|
const double tmp26_1 = C_1_2*w36; |
|
|
const double tmp67_1 = C_0_0*w33; |
|
|
const double tmp31_1 = C_0_2*w34; |
|
|
const double tmp20_1 = C_0_0*w30; |
|
|
const double tmp70_1 = C_0_0*w28; |
|
|
const double tmp8_1 = tmp0_0*w39; |
|
|
const double tmp30_1 = C_1_3*w43; |
|
|
const double tmp0_1 = tmp0_0*w29; |
|
|
const double tmp17_1 = C_1_3*w41; |
|
|
const double tmp58_1 = C_0_0*w35; |
|
|
const double tmp9_1 = tmp3_0*w33; |
|
|
const double tmp61_1 = C_1_3*w36; |
|
|
const double tmp41_1 = tmp3_0*w34; |
|
|
const double tmp50_1 = C_1_3*w32; |
|
|
const double tmp18_1 = C_1_1*w32; |
|
|
const double tmp6_1 = tmp1_0*w36; |
|
|
const double tmp3_1 = C_0_0*w38; |
|
|
const double tmp34_1 = C_1_1*w29; |
|
|
const double tmp77_1 = C_0_3*w35; |
|
|
const double tmp65_1 = C_1_2*w43; |
|
|
const double tmp71_1 = C_0_3*w33; |
|
|
const double tmp55_1 = C_1_1*w43; |
|
|
const double tmp46_1 = tmp3_0*w28; |
|
|
const double tmp24_1 = C_0_0*w37; |
|
|
const double tmp10_1 = tmp1_0*w39; |
|
|
const double tmp48_1 = C_1_0*w29; |
|
|
const double tmp15_1 = C_0_1*w33; |
|
|
const double tmp36_1 = C_1_2*w32; |
|
|
const double tmp60_1 = C_1_0*w39; |
|
|
const double tmp47_1 = tmp2_0*w33; |
|
|
const double tmp16_1 = C_1_2*w29; |
|
|
const double tmp1_1 = C_0_3*w37; |
|
|
const double tmp7_1 = tmp2_0*w28; |
|
|
const double tmp39_1 = C_1_3*w40; |
|
|
const double tmp44_1 = C_1_3*w42; |
|
|
const double tmp57_1 = C_0_2*w38; |
|
|
const double tmp78_1 = C_0_0*w34; |
|
|
const double tmp11_1 = tmp0_0*w36; |
|
|
const double tmp23_1 = C_0_2*w33; |
|
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; |
|
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; |
|
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; |
|
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
|
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp7_1 + tmp9_1; |
|
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; |
|
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; |
|
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; |
|
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
|
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp37_1 + tmp38_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; |
|
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; |
|
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; |
|
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp2_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; |
|
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp40_1 + tmp41_1 + tmp6_1 + tmp8_1; |
|
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; |
|
|
} |
|
|
} |
|
|
} else { // constant data |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double C_0 = C_p[INDEX3(k, m, 0, numEq, numComp)]; |
|
|
const double C_1 = C_p[INDEX3(k, m, 1, numEq, numComp)]; |
|
|
const double tmp1_1 = C_1*w45; |
|
|
const double tmp3_1 = C_0*w51; |
|
|
const double tmp4_1 = C_0*w44; |
|
|
const double tmp7_1 = C_0*w46; |
|
|
const double tmp5_1 = C_1*w49; |
|
|
const double tmp2_1 = C_1*w48; |
|
|
const double tmp0_1 = C_0*w47; |
|
|
const double tmp6_1 = C_1*w50; |
|
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp2_1 + tmp4_1; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp4_1 + tmp5_1; |
|
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp0_1 + tmp6_1; |
|
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp1_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp5_1 + tmp7_1; |
|
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp1_1 + tmp7_1; |
|
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp3_1 + tmp6_1; |
|
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp3_1 + tmp5_1; |
|
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp6_1 + tmp7_1; |
|
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp2_1; |
|
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp4_1 + tmp6_1; |
|
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp1_1 + tmp4_1; |
|
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp2_1 + tmp7_1; |
|
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp0_1 + tmp5_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process D // |
|
|
/////////////// |
|
|
if (!D.isEmpty()) { |
|
|
add_EM_S=true; |
|
|
const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); |
|
|
if (D.actsExpanded()) { |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double D_0 = D_p[INDEX3(k, m, 0, numEq, numComp)]; |
|
|
const double D_1 = D_p[INDEX3(k, m, 1, numEq, numComp)]; |
|
|
const double D_2 = D_p[INDEX3(k, m, 2, numEq, numComp)]; |
|
|
const double D_3 = D_p[INDEX3(k, m, 3, numEq, numComp)]; |
|
|
const double tmp4_0 = D_1 + D_3; |
|
|
const double tmp2_0 = D_0 + D_1 + D_2 + D_3; |
|
|
const double tmp5_0 = D_0 + D_2; |
|
|
const double tmp0_0 = D_0 + D_1; |
|
|
const double tmp6_0 = D_0 + D_3; |
|
|
const double tmp1_0 = D_2 + D_3; |
|
|
const double tmp3_0 = D_1 + D_2; |
|
|
const double tmp16_1 = D_1*w56; |
|
|
const double tmp14_1 = tmp6_0*w54; |
|
|
const double tmp8_1 = D_3*w55; |
|
|
const double tmp2_1 = tmp2_0*w54; |
|
|
const double tmp12_1 = tmp5_0*w52; |
|
|
const double tmp4_1 = tmp0_0*w53; |
|
|
const double tmp3_1 = tmp1_0*w52; |
|
|
const double tmp13_1 = tmp4_0*w53; |
|
|
const double tmp10_1 = tmp4_0*w52; |
|
|
const double tmp1_1 = tmp1_0*w53; |
|
|
const double tmp7_1 = D_3*w56; |
|
|
const double tmp0_1 = tmp0_0*w52; |
|
|
const double tmp11_1 = tmp5_0*w53; |
|
|
const double tmp9_1 = D_0*w56; |
|
|
const double tmp5_1 = tmp3_0*w54; |
|
|
const double tmp18_1 = D_2*w56; |
|
|
const double tmp17_1 = D_1*w55; |
|
|
const double tmp6_1 = D_0*w55; |
|
|
const double tmp15_1 = D_2*w55; |
|
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2_1; |
|
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp3_1 + tmp4_1; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp5_1 + tmp6_1 + tmp7_1; |
|
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp5_1 + tmp8_1 + tmp9_1; |
|
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2_1; |
|
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp10_1 + tmp11_1; |
|
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp2_1; |
|
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp12_1 + tmp13_1; |
|
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp12_1 + tmp13_1; |
|
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp10_1 + tmp11_1; |
|
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp3_1 + tmp4_1; |
|
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp14_1 + tmp15_1 + tmp16_1; |
|
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp2_1; |
|
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp14_1 + tmp17_1 + tmp18_1; |
|
|
} |
|
|
} |
|
|
} else { // constant data |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double D_0 = D_p[INDEX2(k, m, numEq)]; |
|
|
const double tmp2_1 = D_0*w59; |
|
|
const double tmp1_1 = D_0*w58; |
|
|
const double tmp0_1 = D_0*w57; |
|
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp1_1; |
|
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp2_1; |
|
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp2_1; |
|
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp1_1; |
|
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp1_1; |
|
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp2_1; |
|
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp1_1; |
|
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp2_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process X // |
|
|
/////////////// |
|
|
if (!X.isEmpty()) { |
|
|
add_EM_F=true; |
|
|
const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); |
|
|
if (X.actsExpanded()) { |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
const double X_0_0 = X_p[INDEX3(k, 0, 0, numEq, 2)]; |
|
|
const double X_1_0 = X_p[INDEX3(k, 1, 0, numEq, 2)]; |
|
|
const double X_0_1 = X_p[INDEX3(k, 0, 1, numEq, 2)]; |
|
|
const double X_1_1 = X_p[INDEX3(k, 1, 1, numEq, 2)]; |
|
|
const double X_0_2 = X_p[INDEX3(k, 0, 2, numEq, 2)]; |
|
|
const double X_1_2 = X_p[INDEX3(k, 1, 2, numEq, 2)]; |
|
|
const double X_0_3 = X_p[INDEX3(k, 0, 3, numEq, 2)]; |
|
|
const double X_1_3 = X_p[INDEX3(k, 1, 3, numEq, 2)]; |
|
|
const double tmp1_0 = X_1_1 + X_1_3; |
|
|
const double tmp3_0 = X_0_0 + X_0_1; |
|
|
const double tmp2_0 = X_1_0 + X_1_2; |
|
|
const double tmp0_0 = X_0_2 + X_0_3; |
|
|
const double tmp8_1 = tmp2_0*w66; |
|
|
const double tmp5_1 = tmp3_0*w64; |
|
|
const double tmp14_1 = tmp0_0*w64; |
|
|
const double tmp3_1 = tmp3_0*w60; |
|
|
const double tmp9_1 = tmp3_0*w63; |
|
|
const double tmp13_1 = tmp3_0*w65; |
|
|
const double tmp12_1 = tmp1_0*w66; |
|
|
const double tmp10_1 = tmp0_0*w60; |
|
|
const double tmp2_1 = tmp2_0*w61; |
|
|
const double tmp6_1 = tmp2_0*w62; |
|
|
const double tmp4_1 = tmp0_0*w65; |
|
|
const double tmp11_1 = tmp1_0*w67; |
|
|
const double tmp1_1 = tmp1_0*w62; |
|
|
const double tmp7_1 = tmp1_0*w61; |
|
|
const double tmp0_1 = tmp0_0*w63; |
|
|
const double tmp15_1 = tmp2_0*w67; |
|
|
EM_F[INDEX2(k,0,numEq)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
|
|
EM_F[INDEX2(k,1,numEq)]+=tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1; |
|
|
EM_F[INDEX2(k,2,numEq)]+=tmp10_1 + tmp11_1 + tmp8_1 + tmp9_1; |
|
|
EM_F[INDEX2(k,3,numEq)]+=tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
|
|
} |
|
|
} else { // constant data |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
const double X_0 = X_p[INDEX2(k, 0, numEq)]; |
|
|
const double X_1 = X_p[INDEX2(k, 1, numEq)]; |
|
|
const double tmp0_1 = X_1*w69; |
|
|
const double tmp1_1 = X_0*w68; |
|
|
const double tmp2_1 = X_0*w70; |
|
|
const double tmp3_1 = X_1*w71; |
|
|
EM_F[INDEX2(k,0,numEq)]+=tmp0_1 + tmp1_1; |
|
|
EM_F[INDEX2(k,1,numEq)]+=tmp0_1 + tmp2_1; |
|
|
EM_F[INDEX2(k,2,numEq)]+=tmp1_1 + tmp3_1; |
|
|
EM_F[INDEX2(k,3,numEq)]+=tmp2_1 + tmp3_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process Y // |
|
|
/////////////// |
|
|
if (!Y.isEmpty()) { |
|
|
add_EM_F=true; |
|
|
const double* Y_p=const_cast<escript::Data*>(&Y)->getSampleDataRO(e); |
|
|
if (Y.actsExpanded()) { |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
const double Y_0 = Y_p[INDEX2(k, 0, numEq)]; |
|
|
const double Y_1 = Y_p[INDEX2(k, 1, numEq)]; |
|
|
const double Y_2 = Y_p[INDEX2(k, 2, numEq)]; |
|
|
const double Y_3 = Y_p[INDEX2(k, 3, numEq)]; |
|
|
const double tmp0_0 = Y_1 + Y_2; |
|
|
const double tmp1_0 = Y_0 + Y_3; |
|
|
const double tmp0_1 = Y_0*w72; |
|
|
const double tmp1_1 = tmp0_0*w73; |
|
|
const double tmp2_1 = Y_3*w74; |
|
|
const double tmp3_1 = Y_1*w72; |
|
|
const double tmp4_1 = tmp1_0*w73; |
|
|
const double tmp5_1 = Y_2*w74; |
|
|
const double tmp6_1 = Y_2*w72; |
|
|
const double tmp7_1 = Y_1*w74; |
|
|
const double tmp8_1 = Y_3*w72; |
|
|
const double tmp9_1 = Y_0*w74; |
|
|
EM_F[INDEX2(k,0,numEq)]+=tmp0_1 + tmp1_1 + tmp2_1; |
|
|
EM_F[INDEX2(k,1,numEq)]+=tmp3_1 + tmp4_1 + tmp5_1; |
|
|
EM_F[INDEX2(k,2,numEq)]+=tmp4_1 + tmp6_1 + tmp7_1; |
|
|
EM_F[INDEX2(k,3,numEq)]+=tmp1_1 + tmp8_1 + tmp9_1; |
|
|
} |
|
|
} else { // constant data |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
const double tmp0_1 = Y_p[k]*w75; |
|
|
EM_F[INDEX2(k,0,numEq)]+=tmp0_1; |
|
|
EM_F[INDEX2(k,1,numEq)]+=tmp0_1; |
|
|
EM_F[INDEX2(k,2,numEq)]+=tmp0_1; |
|
|
EM_F[INDEX2(k,3,numEq)]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
} |
|
2086 |
|
|
2087 |
// add to matrix (if add_EM_S) and RHS (if add_EM_F) |
All local calculation is done on an array called `src`, with |
2088 |
const index_t firstNode=m_NN[0]*k1+k0; |
dimensions = ext[0] * ext[1]. |
2089 |
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, |
Where ext[i]= internal[i]+2*radius. |
|
firstNode, numEq, numComp); |
|
|
} // end k0 loop |
|
|
} // end k1 loop |
|
|
} // end of colouring |
|
|
} // end of parallel region |
|
|
} |
|
2090 |
|
|
2091 |
//protected |
Now for MPI there is overlap to deal with. We need to share both the overlapping |
2092 |
void Rectangle::assemblePDESystemReduced(Paso_SystemMatrix* mat, |
values themselves but also the external region. |
|
escript::Data& rhs, const escript::Data& A, const escript::Data& B, |
|
|
const escript::Data& C, const escript::Data& D, |
|
|
const escript::Data& X, const escript::Data& Y) const |
|
|
{ |
|
|
dim_t numEq, numComp; |
|
|
if (!mat) |
|
|
numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize()); |
|
|
else { |
|
|
numEq=mat->logical_row_block_size; |
|
|
numComp=mat->logical_col_block_size; |
|
|
} |
|
|
|
|
|
const double w0 = -.25*m_dx[1]/m_dx[0]; |
|
|
const double w1 = .25; |
|
|
const double w2 = -.25; |
|
|
const double w3 = .25*m_dx[0]/m_dx[1]; |
|
|
const double w4 = -.25*m_dx[0]/m_dx[1]; |
|
|
const double w5 = .25*m_dx[1]/m_dx[0]; |
|
|
const double w6 = -.125*m_dx[1]; |
|
|
const double w7 = -.125*m_dx[0]; |
|
|
const double w8 = .125*m_dx[1]; |
|
|
const double w9 = .125*m_dx[0]; |
|
|
const double w10 = .0625*m_dx[0]*m_dx[1]; |
|
|
const double w11 = -.5*m_dx[1]; |
|
|
const double w12 = -.5*m_dx[0]; |
|
|
const double w13 = .5*m_dx[1]; |
|
|
const double w14 = .5*m_dx[0]; |
|
|
const double w15 = .25*m_dx[0]*m_dx[1]; |
|
2093 |
|
|
2094 |
rhs.requireWrite(); |
In a hypothetical 1-D case: |
|
#pragma omp parallel |
|
|
{ |
|
|
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring |
|
|
#pragma omp for |
|
|
for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) { |
|
|
for (index_t k0=0; k0<m_NE[0]; ++k0) { |
|
|
bool add_EM_S=false; |
|
|
bool add_EM_F=false; |
|
|
vector<double> EM_S(4*4*numEq*numComp, 0); |
|
|
vector<double> EM_F(4*numEq, 0); |
|
|
const index_t e = k0 + m_NE[0]*k1; |
|
|
/////////////// |
|
|
// process A // |
|
|
/////////////// |
|
|
if (!A.isEmpty()) { |
|
|
add_EM_S=true; |
|
|
const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double A_00 = A_p[INDEX4(k,0,m,0, numEq,2, numComp)]; |
|
|
const double A_01 = A_p[INDEX4(k,0,m,1, numEq,2, numComp)]; |
|
|
const double A_10 = A_p[INDEX4(k,1,m,0, numEq,2, numComp)]; |
|
|
const double A_11 = A_p[INDEX4(k,1,m,1, numEq,2, numComp)]; |
|
|
const double tmp0_0 = A_01 + A_10; |
|
|
const double tmp0_1 = A_11*w3; |
|
|
const double tmp1_1 = A_00*w0; |
|
|
const double tmp2_1 = A_01*w1; |
|
|
const double tmp3_1 = A_10*w2; |
|
|
const double tmp4_1 = tmp0_0*w1; |
|
|
const double tmp5_1 = A_11*w4; |
|
|
const double tmp6_1 = A_00*w5; |
|
|
const double tmp7_1 = tmp0_0*w2; |
|
|
const double tmp8_1 = A_10*w1; |
|
|
const double tmp9_1 = A_01*w2; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp0_1 + tmp4_1 + tmp6_1; |
|
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp8_1 + tmp9_1; |
|
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp2_1 + tmp3_1 + tmp5_1 + tmp6_1; |
|
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp1_1 + tmp5_1 + tmp7_1; |
|
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp0_1 + tmp6_1 + tmp7_1; |
|
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp1_1 + tmp4_1 + tmp5_1; |
|
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp5_1 + tmp6_1 + tmp8_1 + tmp9_1; |
|
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp5_1 + tmp6_1 + tmp8_1 + tmp9_1; |
|
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp1_1 + tmp4_1 + tmp5_1; |
|
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp0_1 + tmp6_1 + tmp7_1; |
|
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp1_1 + tmp5_1 + tmp7_1; |
|
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp2_1 + tmp3_1 + tmp5_1 + tmp6_1; |
|
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp8_1 + tmp9_1; |
|
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp0_1 + tmp4_1 + tmp6_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process B // |
|
|
/////////////// |
|
|
if (!B.isEmpty()) { |
|
|
add_EM_S=true; |
|
|
const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double B_0 = B_p[INDEX3(k,0,m, numEq, 2)]; |
|
|
const double B_1 = B_p[INDEX3(k,1,m, numEq, 2)]; |
|
|
const double tmp0_1 = B_0*w6; |
|
|
const double tmp1_1 = B_1*w7; |
|
|
const double tmp2_1 = B_0*w8; |
|
|
const double tmp3_1 = B_1*w9; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp1_1 + tmp2_1; |
|
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp0_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp1_1 + tmp2_1; |
|
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp0_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp1_1 + tmp2_1; |
|
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp0_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp1_1 + tmp2_1; |
|
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp2_1 + tmp3_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process C // |
|
|
/////////////// |
|
|
if (!C.isEmpty()) { |
|
|
add_EM_S=true; |
|
|
const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double C_0 = C_p[INDEX3(k, m, 0, numEq, numComp)]; |
|
|
const double C_1 = C_p[INDEX3(k, m, 1, numEq, numComp)]; |
|
|
const double tmp0_1 = C_0*w8; |
|
|
const double tmp1_1 = C_1*w7; |
|
|
const double tmp2_1 = C_1*w9; |
|
|
const double tmp3_1 = C_0*w6; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp1_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp1_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp1_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp1_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp2_1 + tmp3_1; |
|
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp0_1 + tmp2_1; |
|
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp0_1 + tmp2_1; |
|
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp2_1; |
|
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp0_1 + tmp2_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process D // |
|
|
/////////////// |
|
|
if (!D.isEmpty()) { |
|
|
add_EM_S=true; |
|
|
const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double tmp0_1 = D_p[INDEX2(k, m, numEq)]*w10; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process X // |
|
|
/////////////// |
|
|
if (!X.isEmpty()) { |
|
|
add_EM_F=true; |
|
|
const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
const double X_0 = X_p[INDEX2(k, 0, numEq)]; |
|
|
const double X_1 = X_p[INDEX2(k, 1, numEq)]; |
|
|
const double tmp0_1 = X_0*w11; |
|
|
const double tmp1_1 = X_1*w12; |
|
|
const double tmp2_1 = X_0*w13; |
|
|
const double tmp3_1 = X_1*w14; |
|
|
EM_F[INDEX2(k,0,numEq)]+=tmp0_1 + tmp1_1; |
|
|
EM_F[INDEX2(k,1,numEq)]+=tmp1_1 + tmp2_1; |
|
|
EM_F[INDEX2(k,2,numEq)]+=tmp0_1 + tmp3_1; |
|
|
EM_F[INDEX2(k,3,numEq)]+=tmp2_1 + tmp3_1; |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process Y // |
|
|
/////////////// |
|
|
if (!Y.isEmpty()) { |
|
|
add_EM_F=true; |
|
|
const double* Y_p=const_cast<escript::Data*>(&Y)->getSampleDataRO(e); |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
const double tmp0_1 = Y_p[k]*w15; |
|
|
EM_F[INDEX2(k,0,numEq)]+=tmp0_1; |
|
|
EM_F[INDEX2(k,1,numEq)]+=tmp0_1; |
|
|
EM_F[INDEX2(k,2,numEq)]+=tmp0_1; |
|
|
EM_F[INDEX2(k,3,numEq)]+=tmp0_1; |
|
|
} |
|
|
} |
|
2095 |
|
|
|
// add to matrix (if add_EM_S) and RHS (if add_EM_F) |
|
|
const index_t firstNode=m_NN[0]*k1+k0; |
|
|
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, |
|
|
firstNode, numEq, numComp); |
|
|
} // end k0 loop |
|
|
} // end k1 loop |
|
|
} // end of colouring |
|
|
} // end of parallel region |
|
|
} |
|
2096 |
|
|
2097 |
//protected |
1234567 |
2098 |
void Rectangle::assemblePDEBoundarySingle(Paso_SystemMatrix* mat, |
would be split into two ranks thus: |
2099 |
escript::Data& rhs, const escript::Data& d, const escript::Data& y) const |
123(4) (4)567 [4 being a shared element] |
|
{ |
|
|
const double w0 = 0.31100423396407310779*m_dx[1]; |
|
|
const double w1 = 0.022329099369260225539*m_dx[1]; |
|
|
const double w10 = 0.022329099369260225539*m_dx[0]; |
|
|
const double w11 = 0.16666666666666666667*m_dx[0]; |
|
|
const double w12 = 0.33333333333333333333*m_dx[0]; |
|
|
const double w13 = 0.39433756729740644113*m_dx[0]; |
|
|
const double w14 = 0.10566243270259355887*m_dx[0]; |
|
|
const double w15 = 0.5*m_dx[0]; |
|
|
const double w2 = 0.083333333333333333333*m_dx[1]; |
|
|
const double w3 = 0.33333333333333333333*m_dx[1]; |
|
|
const double w4 = 0.16666666666666666667*m_dx[1]; |
|
|
const double w5 = 0.39433756729740644113*m_dx[1]; |
|
|
const double w6 = 0.10566243270259355887*m_dx[1]; |
|
|
const double w7 = 0.5*m_dx[1]; |
|
|
const double w8 = 0.083333333333333333333*m_dx[0]; |
|
|
const double w9 = 0.31100423396407310779*m_dx[0]; |
|
|
const bool add_EM_S=!d.isEmpty(); |
|
|
const bool add_EM_F=!y.isEmpty(); |
|
|
rhs.requireWrite(); |
|
|
#pragma omp parallel |
|
|
{ |
|
|
if (m_faceOffset[0] > -1) { |
|
|
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring |
|
|
#pragma omp for |
|
|
for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) { |
|
|
vector<double> EM_S(4*4, 0); |
|
|
vector<double> EM_F(4, 0); |
|
|
const index_t e = k1; |
|
|
/////////////// |
|
|
// process d // |
|
|
/////////////// |
|
|
if (add_EM_S) { |
|
|
const double* d_p=const_cast<escript::Data*>(&d)->getSampleDataRO(e); |
|
|
if (d.actsExpanded()) { |
|
|
const double d_0 = d_p[0]; |
|
|
const double d_1 = d_p[1]; |
|
|
const double tmp0_0 = d_0 + d_1; |
|
|
const double tmp1_1 = d_1*w1; |
|
|
const double tmp4_1 = d_0*w1; |
|
|
const double tmp0_1 = d_0*w0; |
|
|
const double tmp3_1 = d_1*w0; |
|
|
const double tmp2_1 = tmp0_0*w2; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX2(2,0,4)]+=tmp2_1; |
|
|
EM_S[INDEX2(0,2,4)]+=tmp2_1; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp3_1 + tmp4_1; |
|
|
} else { /* constant data */ |
|
|
const double d_0 = d_p[0]; |
|
|
const double tmp1_1 = d_0*w4; |
|
|
const double tmp0_1 = d_0*w3; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(2,0,4)]+=tmp1_1; |
|
|
EM_S[INDEX2(0,2,4)]+=tmp1_1; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process y // |
|
|
/////////////// |
|
|
if (add_EM_F) { |
|
|
const double* y_p=const_cast<escript::Data*>(&y)->getSampleDataRO(e); |
|
|
if (y.actsExpanded()) { |
|
|
const double y_0 = y_p[0]; |
|
|
const double y_1 = y_p[1]; |
|
|
const double tmp3_1 = w5*y_1; |
|
|
const double tmp2_1 = w6*y_0; |
|
|
const double tmp0_1 = w6*y_1; |
|
|
const double tmp1_1 = w5*y_0; |
|
|
EM_F[0]+=tmp0_1 + tmp1_1; |
|
|
EM_F[2]+=tmp2_1 + tmp3_1; |
|
|
} else { /* constant data */ |
|
|
const double y_0 = y_p[0]; |
|
|
const double tmp0_1 = w7*y_0; |
|
|
EM_F[0]+=tmp0_1; |
|
|
EM_F[2]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
const index_t firstNode=m_NN[0]*k1; |
|
|
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, firstNode); |
|
|
} |
|
|
} // end colouring |
|
|
} |
|
2100 |
|
|
2101 |
if (m_faceOffset[1] > -1) { |
If the radius is 2. There will be padding elements on the outside: |
|
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring |
|
|
#pragma omp for |
|
|
for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) { |
|
|
vector<double> EM_S(4*4, 0); |
|
|
vector<double> EM_F(4, 0); |
|
|
const index_t e = m_faceOffset[1]+k1; |
|
|
/////////////// |
|
|
// process d // |
|
|
/////////////// |
|
|
if (add_EM_S) { |
|
|
const double* d_p=const_cast<escript::Data*>(&d)->getSampleDataRO(e); |
|
|
if (d.actsExpanded()) { |
|
|
const double d_0 = d_p[0]; |
|
|
const double d_1 = d_p[1]; |
|
|
const double tmp0_0 = d_0 + d_1; |
|
|
const double tmp4_1 = d_1*w1; |
|
|
const double tmp1_1 = d_0*w1; |
|
|
const double tmp3_1 = d_0*w0; |
|
|
const double tmp0_1 = d_1*w0; |
|
|
const double tmp2_1 = tmp0_0*w2; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp3_1 + tmp4_1; |
|
|
EM_S[INDEX2(3,1,4)]+=tmp2_1; |
|
|
EM_S[INDEX2(1,3,4)]+=tmp2_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp1_1; |
|
|
} else { /* constant data */ |
|
|
const double d_0 = d_p[0]; |
|
|
const double tmp1_1 = d_0*w4; |
|
|
const double tmp0_1 = d_0*w3; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(3,1,4)]+=tmp1_1; |
|
|
EM_S[INDEX2(1,3,4)]+=tmp1_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process y // |
|
|
/////////////// |
|
|
if (add_EM_F) { |
|
|
const double* y_p=const_cast<escript::Data*>(&y)->getSampleDataRO(e); |
|
|
if (y.actsExpanded()) { |
|
|
const double y_0 = y_p[0]; |
|
|
const double y_1 = y_p[1]; |
|
|
const double tmp3_1 = w5*y_1; |
|
|
const double tmp2_1 = w6*y_0; |
|
|
const double tmp0_1 = w6*y_1; |
|
|
const double tmp1_1 = w5*y_0; |
|
|
EM_F[1]+=tmp0_1 + tmp1_1; |
|
|
EM_F[3]+=tmp2_1 + tmp3_1; |
|
|
} else { /* constant data */ |
|
|
const double y_0 = y_p[0]; |
|
|
const double tmp0_1 = w7*y_0; |
|
|
EM_F[1]+=tmp0_1; |
|
|
EM_F[3]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
const index_t firstNode=m_NN[0]*(k1+1)-2; |
|
|
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, firstNode); |
|
|
} |
|
|
} // end colouring |
|
|
} |
|
2102 |
|
|
2103 |
if (m_faceOffset[2] > -1) { |
pp123(4) (4)567pp |
|
for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring |
|
|
#pragma omp for |
|
|
for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) { |
|
|
vector<double> EM_S(4*4, 0); |
|
|
vector<double> EM_F(4, 0); |
|
|
const index_t e = m_faceOffset[2]+k0; |
|
|
/////////////// |
|
|
// process d // |
|
|
/////////////// |
|
|
if (add_EM_S) { |
|
|
const double* d_p=const_cast<escript::Data*>(&d)->getSampleDataRO(e); |
|
|
if (d.actsExpanded()) { |
|
|
const double d_0 = d_p[0]; |
|
|
const double d_1 = d_p[1]; |
|
|
const double tmp0_0 = d_0 + d_1; |
|
|
const double tmp4_1 = d_1*w9; |
|
|
const double tmp2_1 = d_0*w9; |
|
|
const double tmp0_1 = tmp0_0*w8; |
|
|
const double tmp1_1 = d_1*w10; |
|
|
const double tmp3_1 = d_0*w10; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp1_1 + tmp2_1; |
|
|
EM_S[INDEX2(1,0,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(0,1,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp3_1 + tmp4_1; |
|
|
} else { /* constant data */ |
|
|
const double d_0 = d_p[0]; |
|
|
const double tmp0_1 = d_0*w11; |
|
|
const double tmp1_1 = d_0*w12; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp1_1; |
|
|
EM_S[INDEX2(1,0,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(0,1,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp1_1; |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process y // |
|
|
/////////////// |
|
|
if (add_EM_F) { |
|
|
const double* y_p=const_cast<escript::Data*>(&y)->getSampleDataRO(e); |
|
|
if (y.actsExpanded()) { |
|
|
const double y_0 = y_p[0]; |
|
|
const double y_1 = y_p[1]; |
|
|
const double tmp2_1 = w13*y_1; |
|
|
const double tmp1_1 = w14*y_1; |
|
|
const double tmp3_1 = w14*y_0; |
|
|
const double tmp0_1 = w13*y_0; |
|
|
EM_F[0]+=tmp0_1 + tmp1_1; |
|
|
EM_F[1]+=tmp2_1 + tmp3_1; |
|
|
} else { /* constant data */ |
|
|
const double y_0 = y_p[0]; |
|
|
const double tmp0_1 = w15*y_0; |
|
|
EM_F[0]+=tmp0_1; |
|
|
EM_F[1]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
const index_t firstNode=k0; |
|
|
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, firstNode); |
|
|
} |
|
|
} // end colouring |
|
|
} |
|
2104 |
|
|
2105 |
if (m_faceOffset[3] > -1) { |
To ensure that 4 can be correctly computed on both ranks, values from the other rank |
2106 |
for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring |
need to be known. |
|
#pragma omp for |
|
|
for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) { |
|
|
const index_t e = m_faceOffset[3]+k0; |
|
|
vector<double> EM_S(4*4, 0); |
|
|
vector<double> EM_F(4, 0); |
|
|
/////////////// |
|
|
// process d // |
|
|
/////////////// |
|
|
if (add_EM_S) { |
|
|
const double* d_p=const_cast<escript::Data*>(&d)->getSampleDataRO(e); |
|
|
if (d.actsExpanded()) { |
|
|
const double d_0 = d_p[0]; |
|
|
const double d_1 = d_p[1]; |
|
|
const double tmp0_0 = d_0 + d_1; |
|
|
const double tmp2_1 = d_1*w9; |
|
|
const double tmp4_1 = d_0*w9; |
|
|
const double tmp0_1 = tmp0_0*w8; |
|
|
const double tmp3_1 = d_1*w10; |
|
|
const double tmp1_1 = d_0*w10; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp3_1 + tmp4_1; |
|
|
EM_S[INDEX2(3,2,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(2,3,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp1_1 + tmp2_1; |
|
|
} else { /* constant data */ |
|
|
const double d_0 = d_p[0]; |
|
|
const double tmp0_1 = d_0*w11; |
|
|
const double tmp1_1 = d_0*w12; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp1_1; |
|
|
EM_S[INDEX2(3,2,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(2,3,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp1_1; |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process y // |
|
|
/////////////// |
|
|
if (add_EM_F) { |
|
|
const double* y_p=const_cast<escript::Data*>(&y)->getSampleDataRO(e); |
|
|
if (y.actsExpanded()) { |
|
|
const double y_0 = y_p[0]; |
|
|
const double y_1 = y_p[1]; |
|
|
const double tmp2_1 = w13*y_1; |
|
|
const double tmp1_1 = w14*y_1; |
|
|
const double tmp3_1 = w14*y_0; |
|
|
const double tmp0_1 = w13*y_0; |
|
|
EM_F[2]+=tmp0_1 + tmp1_1; |
|
|
EM_F[3]+=tmp2_1 + tmp3_1; |
|
|
} else { /* constant data */ |
|
|
const double y_0 = y_p[0]; |
|
|
const double tmp0_1 = w15*y_0; |
|
|
EM_F[2]+=tmp0_1; |
|
|
EM_F[3]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
const index_t firstNode=m_NN[0]*(m_NN[1]-2)+k0; |
|
|
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, firstNode); |
|
|
} |
|
|
} // end colouring |
|
|
} |
|
|
} // end of parallel section |
|
|
} |
|
2107 |
|
|
2108 |
//protected |
pp123(4)56 23(4)567pp |
|
void Rectangle::assemblePDEBoundarySingleReduced(Paso_SystemMatrix* mat, |
|
|
escript::Data& rhs, const escript::Data& d, const escript::Data& y) const |
|
|
{ |
|
|
const double w0 = 0.25*m_dx[1]; |
|
|
const double w1 = 0.5*m_dx[1]; |
|
|
const double w2 = 0.25*m_dx[0]; |
|
|
const double w3 = 0.5*m_dx[0]; |
|
|
const bool add_EM_S=!d.isEmpty(); |
|
|
const bool add_EM_F=!y.isEmpty(); |
|
|
rhs.requireWrite(); |
|
|
#pragma omp parallel |
|
|
{ |
|
|
if (m_faceOffset[0] > -1) { |
|
|
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring |
|
|
#pragma omp for |
|
|
for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) { |
|
|
vector<double> EM_S(4*4, 0); |
|
|
vector<double> EM_F(4, 0); |
|
|
const index_t e = k1; |
|
|
/////////////// |
|
|
// process d // |
|
|
/////////////// |
|
|
if (add_EM_S) { |
|
|
const double* d_p=const_cast<escript::Data*>(&d)->getSampleDataRO(e); |
|
|
const double d_0 = d_p[0]; |
|
|
const double tmp0_1 = d_0*w0; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(2,0,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(0,2,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp0_1; |
|
|
} |
|
|
/////////////// |
|
|
// process y // |
|
|
/////////////// |
|
|
if (add_EM_F) { |
|
|
const double* y_p=const_cast<escript::Data*>(&y)->getSampleDataRO(e); |
|
|
const double tmp0_1 = w1*y_p[0]; |
|
|
EM_F[0]+=tmp0_1; |
|
|
EM_F[2]+=tmp0_1; |
|
|
} |
|
|
const index_t firstNode=m_NN[0]*k1; |
|
|
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, firstNode); |
|
|
} |
|
|
} // end colouring |
|
|
} |
|
2109 |
|
|
2110 |
if (m_faceOffset[1] > -1) { |
Now in our case, we wout set all the values 23456 on the left rank and send them to the |
2111 |
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring |
right hand rank. |
|
#pragma omp for |
|
|
for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) { |
|
|
vector<double> EM_S(4*4, 0); |
|
|
vector<double> EM_F(4, 0); |
|
|
const index_t e = m_faceOffset[1]+k1; |
|
|
/////////////// |
|
|
// process d // |
|
|
/////////////// |
|
|
if (add_EM_S) { |
|
|
const double* d_p=const_cast<escript::Data*>(&d)->getSampleDataRO(e); |
|
|
const double d_0 = d_p[0]; |
|
|
const double tmp0_1 = d_0*w0; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(3,1,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(1,3,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp0_1; |
|
|
} |
|
|
/////////////// |
|
|
// process y // |
|
|
/////////////// |
|
|
if (add_EM_F) { |
|
|
const double* y_p=const_cast<escript::Data*>(&y)->getSampleDataRO(e); |
|
|
const double tmp0_1 = w1*y_p[0]; |
|
|
EM_F[1]+=tmp0_1; |
|
|
EM_F[3]+=tmp0_1; |
|
|
} |
|
|
const index_t firstNode=m_NN[0]*(k1+1)-2; |
|
|
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, firstNode); |
|
|
} |
|
|
} // end colouring |
|
|
} |
|
2112 |
|
|
2113 |
if (m_faceOffset[2] > -1) { |
So the edges _may_ need to be shared at a distance `inset` from all boundaries. |
2114 |
for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring |
inset=2*radius+1 |
2115 |
#pragma omp for |
This is to ensure that values at distance `radius` from the shared/overlapped element |
2116 |
for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) { |
that ripley has. |
|
vector<double> EM_S(4*4, 0); |
|
|
vector<double> EM_F(4, 0); |
|
|
const index_t e = m_faceOffset[2]+k0; |
|
|
/////////////// |
|
|
// process d // |
|
|
/////////////// |
|
|
if (add_EM_S) { |
|
|
const double* d_p=const_cast<escript::Data*>(&d)->getSampleDataRO(e); |
|
|
const double tmp0_1 = d_p[0]*w2; |
|
|
EM_S[INDEX2(0,0,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(1,0,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(0,1,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(1,1,4)]+=tmp0_1; |
|
|
} |
|
|
/////////////// |
|
|
// process y // |
|
|
/////////////// |
|
|
if (add_EM_F) { |
|
|
const double* y_p=const_cast<escript::Data*>(&y)->getSampleDataRO(e); |
|
|
const double tmp0_1 = w3*y_p[0]; |
|
|
EM_F[0]+=tmp0_1; |
|
|
EM_F[1]+=tmp0_1; |
|
|
} |
|
|
const index_t firstNode=k0; |
|
|
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, firstNode); |
|
|
} |
|
|
} // end colouring |
|
|
} |
|
2117 |
|
|
|
if (m_faceOffset[3] > -1) { |
|
|
for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring |
|
|
#pragma omp for |
|
|
for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) { |
|
|
vector<double> EM_S(4*4, 0); |
|
|
vector<double> EM_F(4, 0); |
|
|
const index_t e = m_faceOffset[3]+k0; |
|
|
/////////////// |
|
|
// process d // |
|
|
/////////////// |
|
|
if (add_EM_S) { |
|
|
const double* d_p=const_cast<escript::Data*>(&d)->getSampleDataRO(e); |
|
|
const double tmp0_1 = d_p[0]*w2; |
|
|
EM_S[INDEX2(2,2,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(3,2,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(2,3,4)]+=tmp0_1; |
|
|
EM_S[INDEX2(3,3,4)]+=tmp0_1; |
|
|
} |
|
|
/////////////// |
|
|
// process y // |
|
|
/////////////// |
|
|
if (add_EM_F) { |
|
|
const double* y_p=const_cast<escript::Data*>(&y)->getSampleDataRO(e); |
|
|
const double tmp0_1 = w3*y_p[0]; |
|
|
EM_F[2]+=tmp0_1; |
|
|
EM_F[3]+=tmp0_1; |
|
|
} |
|
|
const index_t firstNode=m_NN[0]*(m_NN[1]-2)+k0; |
|
|
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, firstNode); |
|
|
} |
|
|
} // end colouring |
|
|
} |
|
|
} // end of parallel section |
|
|
} |
|
2118 |
|
|
2119 |
//protected |
*/ |
2120 |
void Rectangle::assemblePDEBoundarySystem(Paso_SystemMatrix* mat, |
escript::Data Rectangle::randomFillWorker(const escript::DataTypes::ShapeType& shape, |
2121 |
escript::Data& rhs, const escript::Data& d, const escript::Data& y) const |
long seed, const boost::python::tuple& filter) const |
2122 |
{ |
{ |
2123 |
dim_t numEq, numComp; |
if (m_numDim!=2) |
2124 |
if (!mat) { |
{ |
2125 |
numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize()); |
throw RipleyException("Only 2D supported at this time."); |
|
} else { |
|
|
numEq=mat->logical_row_block_size; |
|
|
numComp=mat->logical_col_block_size; |
|
2126 |
} |
} |
2127 |
const double w0 = 0.31100423396407310779*m_dx[1]; |
|
2128 |
const double w1 = 0.022329099369260225539*m_dx[1]; |
unsigned int radius=0; // these are only used by gaussian |
2129 |
const double w10 = 0.022329099369260225539*m_dx[0]; |
double sigma=0.5; |
2130 |
const double w11 = 0.16666666666666666667*m_dx[0]; |
|
2131 |
const double w12 = 0.33333333333333333333*m_dx[0]; |
unsigned int numvals=escript::DataTypes::noValues(shape); |
2132 |
const double w13 = 0.39433756729740644113*m_dx[0]; |
|
2133 |
const double w14 = 0.10566243270259355887*m_dx[0]; |
|
2134 |
const double w15 = 0.5*m_dx[0]; |
if (len(filter)==0) |
|
const double w2 = 0.083333333333333333333*m_dx[1]; |
|
|
const double w3 = 0.33333333333333333333*m_dx[1]; |
|
|
const double w4 = 0.16666666666666666667*m_dx[1]; |
|
|
const double w5 = 0.39433756729740644113*m_dx[1]; |
|
|
const double w6 = 0.10566243270259355887*m_dx[1]; |
|
|
const double w7 = 0.5*m_dx[1]; |
|
|
const double w8 = 0.083333333333333333333*m_dx[0]; |
|
|
const double w9 = 0.31100423396407310779*m_dx[0]; |
|
|
const bool add_EM_S=!d.isEmpty(); |
|
|
const bool add_EM_F=!y.isEmpty(); |
|
|
rhs.requireWrite(); |
|
|
#pragma omp parallel |
|
2135 |
{ |
{ |
2136 |
if (m_faceOffset[0] > -1) { |
// nothing special required here yet |
2137 |
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring |
} |
2138 |
#pragma omp for |
else if (len(filter)==3) |
2139 |
for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) { |
{ |
2140 |
vector<double> EM_S(4*4*numEq*numComp, 0); |
boost::python::extract<string> ex(filter[0]); |
2141 |
vector<double> EM_F(4*numEq, 0); |
if (!ex.check() || (ex()!="gaussian")) |
2142 |
const index_t e = k1; |
{ |
2143 |
/////////////// |
throw RipleyException("Unsupported random filter"); |
|
// process d // |
|
|
/////////////// |
|
|
if (add_EM_S) { |
|
|
const double* d_p=const_cast<escript::Data*>(&d)->getSampleDataRO(e); |
|
|
if (d.actsExpanded()) { |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double d_0 = d_p[INDEX3(k, m, 0, numEq, numComp)]; |
|
|
const double d_1 = d_p[INDEX3(k, m, 1, numEq, numComp)]; |
|
|
const double tmp0_0 = d_0 + d_1; |
|
|
const double tmp0_1 = d_0*w0; |
|
|
const double tmp1_1 = d_1*w1; |
|
|
const double tmp2_1 = tmp0_0*w2; |
|
|
const double tmp3_1 = d_1*w0; |
|
|
const double tmp4_1 = d_0*w1; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
|
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp2_1; |
|
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp2_1; |
|
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp3_1 + tmp4_1; |
|
|
} |
|
|
} |
|
|
} else { /* constant data */ |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double d_0 = d_p[INDEX2(k, m, numEq)]; |
|
|
const double tmp0_1 = d_0*w3; |
|
|
const double tmp1_1 = d_0*w4; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp1_1; |
|
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp1_1; |
|
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process y // |
|
|
/////////////// |
|
|
if (add_EM_F) { |
|
|
const double* y_p=const_cast<escript::Data*>(&y)->getSampleDataRO(e); |
|
|
if (y.actsExpanded()) { |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
const double y_0 = y_p[INDEX2(k, 0, numEq)]; |
|
|
const double y_1 = y_p[INDEX2(k, 1, numEq)]; |
|
|
const double tmp3_1 = w5*y_1; |
|
|
const double tmp2_1 = w6*y_0; |
|
|
const double tmp0_1 = w6*y_1; |
|
|
const double tmp1_1 = w5*y_0; |
|
|
EM_F[INDEX2(k,0,numEq)]+=tmp0_1 + tmp1_1; |
|
|
EM_F[INDEX2(k,2,numEq)]+=tmp2_1 + tmp3_1; |
|
|
} |
|
|
} else { /* constant data */ |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
const double y_0 = y_p[k]; |
|
|
const double tmp0_1 = w7*y_0; |
|
|
EM_F[INDEX2(k,0,numEq)]+=tmp0_1; |
|
|
EM_F[INDEX2(k,2,numEq)]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
const index_t firstNode=m_NN[0]*k1; |
|
|
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, |
|
|
firstNode, numEq, numComp); |
|
|
} |
|
|
} // end colouring |
|
2144 |
} |
} |
2145 |
|
boost::python::extract<unsigned int> ex1(filter[1]); |
2146 |
if (m_faceOffset[1] > -1) { |
if (!ex1.check()) |
2147 |
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring |
{ |
2148 |
#pragma omp for |
throw RipleyException("Radius of gaussian filter must be a positive integer."); |
|
for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) { |
|
|
vector<double> EM_S(4*4*numEq*numComp, 0); |
|
|
vector<double> EM_F(4*numEq, 0); |
|
|
const index_t e = m_faceOffset[1]+k1; |
|
|
/////////////// |
|
|
// process d // |
|
|
/////////////// |
|
|
if (add_EM_S) { |
|
|
const double* d_p=const_cast<escript::Data*>(&d)->getSampleDataRO(e); |
|
|
if (d.actsExpanded()) { |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double d_0 = d_p[INDEX3(k, m, 0, numEq, numComp)]; |
|
|
const double d_1 = d_p[INDEX3(k, m, 1, numEq, numComp)]; |
|
|
const double tmp0_0 = d_0 + d_1; |
|
|
const double tmp4_1 = d_1*w1; |
|
|
const double tmp1_1 = d_0*w1; |
|
|
const double tmp3_1 = d_0*w0; |
|
|
const double tmp0_1 = d_1*w0; |
|
|
const double tmp2_1 = tmp0_0*w2; |
|
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp3_1 + tmp4_1; |
|
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp2_1; |
|
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp2_1; |
|
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
|
|
} |
|
|
} |
|
|
} else { /* constant data */ |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double d_0 = d_p[INDEX2(k, m, numEq)]; |
|
|
const double tmp1_1 = d_0*w4; |
|
|
const double tmp0_1 = d_0*w3; |
|
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp1_1; |
|
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp1_1; |
|
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process y // |
|
|
/////////////// |
|
|
if (add_EM_F) { |
|
|
const double* y_p=const_cast<escript::Data*>(&y)->getSampleDataRO(e); |
|
|
if (y.actsExpanded()) { |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
const double y_0 = y_p[INDEX2(k, 0, numEq)]; |
|
|
const double y_1 = y_p[INDEX2(k, 1, numEq)]; |
|
|
const double tmp3_1 = w5*y_1; |
|
|
const double tmp2_1 = w6*y_0; |
|
|
const double tmp0_1 = w6*y_1; |
|
|
const double tmp1_1 = w5*y_0; |
|
|
EM_F[INDEX2(k,1,numEq)]+=tmp0_1 + tmp1_1; |
|
|
EM_F[INDEX2(k,3,numEq)]+=tmp2_1 + tmp3_1; |
|
|
} |
|
|
} else { /* constant data */ |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
const double y_0 = y_p[k]; |
|
|
const double tmp0_1 = w7*y_0; |
|
|
EM_F[INDEX2(k,1,numEq)]+=tmp0_1; |
|
|
EM_F[INDEX2(k,3,numEq)]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
const index_t firstNode=m_NN[0]*(k1+1)-2; |
|
|
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, |
|
|
firstNode, numEq, numComp); |
|
|
} |
|
|
} // end colouring |
|
2149 |
} |
} |
2150 |
|
radius=ex1(); |
2151 |
|
sigma=0.5; |
2152 |
|
boost::python::extract<double> ex2(filter[2]); |
2153 |
|
if (!ex2.check() || (sigma=ex2())<=0) |
2154 |
|
{ |
2155 |
|
throw RipleyException("Sigma must be a postive floating point number."); |
2156 |
|
} |
2157 |
|
} |
2158 |
|
else |
2159 |
|
{ |
2160 |
|
throw RipleyException("Unsupported random filter for Rectangle."); |
2161 |
|
} |
2162 |
|
|
2163 |
|
|
2164 |
|
|
2165 |
|
size_t internal[2]; |
2166 |
|
internal[0]=m_NE[0]+1; // number of points in the internal region |
2167 |
|
internal[1]=m_NE[1]+1; // that is, the ones we need smoothed versions of |
2168 |
|
size_t ext[2]; |
2169 |
|
ext[0]=internal[0]+2*radius; // includes points we need as input |
2170 |
|
ext[1]=internal[1]+2*radius; // for smoothing |
2171 |
|
|
2172 |
|
// now we check to see if the radius is acceptable |
2173 |
|
// That is, would not cross multiple ranks in MPI |
2174 |
|
|
2175 |
if (m_faceOffset[2] > -1) { |
if (2*radius>=internal[0]-4) |
2176 |
for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring |
{ |
2177 |
#pragma omp for |
throw RipleyException("Radius of gaussian filter is too large for X dimension of a rank"); |
2178 |
for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) { |
} |
2179 |
vector<double> EM_S(4*4*numEq*numComp, 0); |
if (2*radius>=internal[1]-4) |
2180 |
vector<double> EM_F(4*numEq, 0); |
{ |
2181 |
const index_t e = m_faceOffset[2]+k0; |
throw RipleyException("Radius of gaussian filter is too large for Y dimension of a rank"); |
2182 |
/////////////// |
} |
|
// process d // |
|
|
/////////////// |
|
|
if (add_EM_S) { |
|
|
const double* d_p=const_cast<escript::Data*>(&d)->getSampleDataRO(e); |
|
|
if (d.actsExpanded()) { |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double d_0 = d_p[INDEX3(k, m, 0, numEq, numComp)]; |
|
|
const double d_1 = d_p[INDEX3(k, m, 1, numEq, numComp)]; |
|
|
const double tmp0_0 = d_0 + d_1; |
|
|
const double tmp4_1 = d_1*w9; |
|
|
const double tmp2_1 = d_0*w9; |
|
|
const double tmp0_1 = tmp0_0*w8; |
|
|
const double tmp1_1 = d_1*w10; |
|
|
const double tmp3_1 = d_0*w10; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp1_1 + tmp2_1; |
|
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp3_1 + tmp4_1; |
|
|
} |
|
|
} |
|
|
} else { /* constant data */ |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double d_0 = d_p[INDEX2(k, m, numEq)]; |
|
|
const double tmp0_1 = d_0*w11; |
|
|
const double tmp1_1 = d_0*w12; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp1_1; |
|
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp1_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process y // |
|
|
/////////////// |
|
|
if (add_EM_F) { |
|
|
const double* y_p=const_cast<escript::Data*>(&y)->getSampleDataRO(e); |
|
|
if (y.actsExpanded()) { |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
const double y_0 = y_p[INDEX2(k, 0, numEq)]; |
|
|
const double y_1 = y_p[INDEX2(k, 1, numEq)]; |
|
|
const double tmp2_1 = w13*y_1; |
|
|
const double tmp1_1 = w14*y_1; |
|
|
const double tmp3_1 = w14*y_0; |
|
|
const double tmp0_1 = w13*y_0; |
|
|
EM_F[INDEX2(k,0,numEq)]+=tmp0_1 + tmp1_1; |
|
|
EM_F[INDEX2(k,1,numEq)]+=tmp2_1 + tmp3_1; |
|
|
} |
|
|
} else { /* constant data */ |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
const double y_0 = y_p[k]; |
|
|
const double tmp0_1 = w15*y_0; |
|
|
EM_F[INDEX2(k,0,numEq)]+=tmp0_1; |
|
|
EM_F[INDEX2(k,1,numEq)]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
const index_t firstNode=k0; |
|
|
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, |
|
|
firstNode, numEq, numComp); |
|
|
} |
|
|
} // end colouring |
|
|
} |
|
2183 |
|
|
2184 |
if (m_faceOffset[3] > -1) { |
double* src=new double[ext[0]*ext[1]*numvals]; |
2185 |
for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring |
esysUtils::randomFillArray(seed, src, ext[0]*ext[1]*numvals); |
2186 |
#pragma omp for |
|
|
for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) { |
|
|
vector<double> EM_S(4*4*numEq*numComp, 0); |
|
|
vector<double> EM_F(4*numEq, 0); |
|
|
const index_t e = m_faceOffset[3]+k0; |
|
|
/////////////// |
|
|
// process d // |
|
|
/////////////// |
|
|
if (add_EM_S) { |
|
|
const double* d_p=const_cast<escript::Data*>(&d)->getSampleDataRO(e); |
|
|
if (d.actsExpanded()) { |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double d_0 = d_p[INDEX3(k, m, 0, numEq, numComp)]; |
|
|
const double d_1 = d_p[INDEX3(k, m, 1, numEq, numComp)]; |
|
|
const double tmp0_0 = d_0 + d_1; |
|
|
const double tmp2_1 = d_1*w9; |
|
|
const double tmp4_1 = d_0*w9; |
|
|
const double tmp0_1 = tmp0_0*w8; |
|
|
const double tmp3_1 = d_1*w10; |
|
|
const double tmp1_1 = d_0*w10; |
|
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp3_1 + tmp4_1; |
|
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp1_1 + tmp2_1; |
|
|
} |
|
|
} |
|
|
} else { /* constant data */ |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double d_0 = d_p[INDEX2(k, m, numEq)]; |
|
|
const double tmp0_1 = d_0*w11; |
|
|
const double tmp1_1 = d_0*w12; |
|
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp1_1; |
|
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp1_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process y // |
|
|
/////////////// |
|
|
if (add_EM_F) { |
|
|
const double* y_p=const_cast<escript::Data*>(&y)->getSampleDataRO(e); |
|
|
if (y.actsExpanded()) { |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
const double y_0 = y_p[INDEX2(k, 0, numEq)]; |
|
|
const double y_1 = y_p[INDEX2(k, 1, numEq)]; |
|
|
const double tmp2_1 = w13*y_1; |
|
|
const double tmp1_1 = w14*y_1; |
|
|
const double tmp3_1 = w14*y_0; |
|
|
const double tmp0_1 = w13*y_0; |
|
|
EM_F[INDEX2(k,2,numEq)]+=tmp0_1 + tmp1_1; |
|
|
EM_F[INDEX2(k,3,numEq)]+=tmp2_1 + tmp3_1; |
|
|
} |
|
|
} else { /* constant data */ |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
const double y_0 = y_p[k]; |
|
|
const double tmp0_1 = w15*y_0; |
|
|
EM_F[INDEX2(k,2,numEq)]+=tmp0_1; |
|
|
EM_F[INDEX2(k,3,numEq)]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
const index_t firstNode=m_NN[0]*(m_NN[1]-2)+k0; |
|
|
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, |
|
|
firstNode, numEq, numComp); |
|
|
} |
|
|
} // end colouring |
|
|
} |
|
|
} // end of parallel section |
|
|
} |
|
2187 |
|
|
|
//protected |
|
|
void Rectangle::assemblePDEBoundarySystemReduced(Paso_SystemMatrix* mat, |
|
|
escript::Data& rhs, const escript::Data& d, const escript::Data& y) const |
|
|
{ |
|
|
dim_t numEq, numComp; |
|
|
if (!mat) |
|
|
numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize()); |
|
|
else { |
|
|
numEq=mat->logical_row_block_size; |
|
|
numComp=mat->logical_col_block_size; |
|
|
} |
|
|
const double w0 = 0.25*m_dx[1]; |
|
|
const double w1 = 0.5*m_dx[1]; |
|
|
const double w2 = 0.25*m_dx[0]; |
|
|
const double w3 = 0.5*m_dx[0]; |
|
|
const bool add_EM_S=!d.isEmpty(); |
|
|
const bool add_EM_F=!y.isEmpty(); |
|
|
rhs.requireWrite(); |
|
|
#pragma omp parallel |
|
|
{ |
|
|
if (m_faceOffset[0] > -1) { |
|
|
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring |
|
|
#pragma omp for |
|
|
for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) { |
|
|
vector<double> EM_S(4*4*numEq*numComp, 0); |
|
|
vector<double> EM_F(4*numEq, 0); |
|
|
const index_t e = k1; |
|
|
/////////////// |
|
|
// process d // |
|
|
/////////////// |
|
|
if (add_EM_S) { |
|
|
const double* d_p=const_cast<escript::Data*>(&d)->getSampleDataRO(e); |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double d_0 = d_p[INDEX2(k, m, numEq)]; |
|
|
const double tmp0_1 = d_0*w0; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process y // |
|
|
/////////////// |
|
|
if (add_EM_F) { |
|
|
const double* y_p=const_cast<escript::Data*>(&y)->getSampleDataRO(e); |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
const double tmp0_1 = w1*y_p[k]; |
|
|
EM_F[INDEX2(k,0,numEq)]+=tmp0_1; |
|
|
EM_F[INDEX2(k,2,numEq)]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
const index_t firstNode=m_NN[0]*k1; |
|
|
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, |
|
|
firstNode, numEq, numComp); |
|
|
} |
|
|
} // end colouring |
|
|
} |
|
2188 |
|
|
2189 |
if (m_faceOffset[1] > -1) { |
#ifdef ESYS_MPI |
2190 |
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring |
if ((internal[0]<5) || (internal[1]<5)) |
2191 |
#pragma omp for |
{ |
2192 |
for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) { |
// since the dimensions are equal for all ranks, this exception |
2193 |
vector<double> EM_S(4*4*numEq*numComp, 0); |
// will be thrown on all ranks |
2194 |
vector<double> EM_F(4*numEq, 0); |
throw RipleyException("Random Data in Ripley requries at least five elements per side per rank."); |
2195 |
const index_t e = m_faceOffset[1]+k1; |
} |
2196 |
/////////////// |
dim_t X=m_mpiInfo->rank%m_NX[0]; |
2197 |
// process d // |
dim_t Y=m_mpiInfo->rank%(m_NX[0]*m_NX[1])/m_NX[0]; |
2198 |
/////////////// |
#endif |
2199 |
if (add_EM_S) { |
|
2200 |
const double* d_p=const_cast<escript::Data*>(&d)->getSampleDataRO(e); |
/* |
2201 |
for (index_t k=0; k<numEq; k++) { |
// if we wanted to test a repeating pattern |
2202 |
for (index_t m=0; m<numComp; m++) { |
size_t basex=0; |
2203 |
const double d_0 = d_p[INDEX2(k, m, numEq)]; |
size_t basey=0; |
2204 |
const double tmp0_1 = d_0*w0; |
#ifdef ESYS_MPI |
2205 |
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp0_1; |
basex=X*m_gNE[0]/m_NX[0]; |
2206 |
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp0_1; |
basey=Y*m_gNE[1]/m_NX[1]; |
2207 |
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp0_1; |
#endif |
2208 |
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp0_1; |
|
2209 |
} |
esysUtils::patternFillArray2D(ext[0], ext[1], src, 4, basex, basey, numvals); |
2210 |
} |
*/ |
2211 |
} |
|
2212 |
/////////////// |
|
2213 |
// process y // |
#ifdef ESYS_MPI |
2214 |
/////////////// |
|
2215 |
if (add_EM_F) { |
BlockGrid2 grid(m_NX[0]-1, m_NX[1]-1); |
2216 |
const double* y_p=const_cast<escript::Data*>(&y)->getSampleDataRO(e); |
size_t inset=2*radius+2; // Its +2 not +1 because a whole element is shared (and hence |
2217 |
for (index_t k=0; k<numEq; k++) { |
// there is an overlap of two points both of which need to have "radius" points on either side. |
2218 |
const double tmp0_1 = w1*y_p[k]; |
|
2219 |
EM_F[INDEX2(k,1,numEq)]+=tmp0_1; |
size_t xmidlen=ext[0]-2*inset; // how wide is the x-dimension between the two insets |
2220 |
EM_F[INDEX2(k,3,numEq)]+=tmp0_1; |
size_t ymidlen=ext[1]-2*inset; |
2221 |
} |
|
2222 |
} |
Block2 block(ext[0], ext[1], inset, xmidlen, ymidlen, numvals); |
2223 |
const index_t firstNode=m_NN[0]*(k1+1)-2; |
|
2224 |
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, |
MPI_Request reqs[40]; // a non-tight upper bound on how many we need |
2225 |
firstNode, numEq, numComp); |
MPI_Status stats[40]; |
2226 |
} |
short rused=0; |
2227 |
} // end colouring |
|
2228 |
} |
messvec incoms; |
2229 |
|
messvec outcoms; |
2230 |
|
|
2231 |
|
grid.generateInNeighbours(X, Y, incoms); |
2232 |
|
grid.generateOutNeighbours(X, Y, outcoms); |
2233 |
|
|
2234 |
|
block.copyAllToBuffer(src); |
2235 |
|
|
2236 |
|
int comserr=0; |
2237 |
|
for (size_t i=0;i<incoms.size();++i) |
2238 |
|
{ |
2239 |
|
message& m=incoms[i]; |
2240 |
|
comserr|=MPI_Irecv(block.getInBuffer(m.destbuffid), block.getBuffSize(m.destbuffid) , MPI_DOUBLE, m.sourceID, m.tag, m_mpiInfo->comm, reqs+(rused++)); |
2241 |
|
block.setUsed(m.destbuffid); |
2242 |
|
} |
2243 |
|
|
2244 |
if (m_faceOffset[2] > -1) { |
for (size_t i=0;i<outcoms.size();++i) |
2245 |
for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring |
{ |
2246 |
#pragma omp for |
message& m=outcoms[i]; |
2247 |
for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) { |
comserr|=MPI_Isend(block.getOutBuffer(m.srcbuffid), block.getBuffSize(m.srcbuffid) , MPI_DOUBLE, m.destID, m.tag, m_mpiInfo->comm, reqs+(rused++)); |
2248 |
vector<double> EM_S(4*4*numEq*numComp, 0); |
} |
2249 |
vector<double> EM_F(4*numEq, 0); |
|
2250 |
const index_t e = m_faceOffset[2]+k0; |
if (!comserr) |
2251 |
/////////////// |
{ |
2252 |
// process d // |
comserr=MPI_Waitall(rused, reqs, stats); |
2253 |
/////////////// |
} |
|
if (add_EM_S) { |
|
|
const double* d_p=const_cast<escript::Data*>(&d)->getSampleDataRO(e); |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
for (index_t m=0; m<numComp; m++) { |
|
|
const double d_0 = d_p[INDEX2(k, m, numEq)]; |
|
|
const double tmp0_1 = d_0*w2; |
|
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1; |
|
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
} |
|
|
/////////////// |
|
|
// process y // |
|
|
/////////////// |
|
|
if (add_EM_F) { |
|
|
const double* y_p=const_cast<escript::Data*>(&y)->getSampleDataRO(e); |
|
|
for (index_t k=0; k<numEq; k++) { |
|
|
const double tmp0_1 = w3*y_p[k]; |
|
|
EM_F[INDEX2(k,0,numEq)]+=tmp0_1; |
|
|
EM_F[INDEX2(k,1,numEq)]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
const index_t firstNode=k0; |
|
|
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, |
|
|
firstNode, numEq, numComp); |
|
|
} |
|
|
} // end colouring |
|
|
} |
|
2254 |
|
|
2255 |
if (m_faceOffset[3] > -1) { |
if (comserr) |
2256 |
for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring |
{ |
2257 |
#pragma omp for |
// Yes this is throwing an exception as a result of an MPI error. |
2258 |
for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) { |
// and no we don't inform the other ranks that we are doing this. |
2259 |
vector<double> EM_S(4*4*numEq*numComp, 0); |
// however, we have no reason to believe coms work at this point anyway |
2260 |
vector<double> EM_F(4*numEq, 0); |
throw RipleyException("Error in coms for randomFill"); |
2261 |
const index_t e = m_faceOffset[3]+k0; |
} |
2262 |
/////////////// |
|
2263 |
// process d // |
block.copyUsedFromBuffer(src); |
2264 |
/////////////// |
|
2265 |
if (add_EM_S) { |
#endif |
2266 |
const double* d_p=const_cast<escript::Data*>(&d)->getSampleDataRO(e); |
|
2267 |
for (index_t k=0; k<numEq; k++) { |
if (radius==0 || numvals>1) // the truth of either should imply the truth of the other but let's be safe |
2268 |
for (index_t m=0; m<numComp; m++) { |
{ |
2269 |
const double d_0 = d_p[INDEX2(k, m, numEq)]; |
|
2270 |
const double tmp0_1 = d_0*w2; |
escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode()); |
2271 |
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp0_1; |
escript::Data resdat(0, shape, fs , true); |
2272 |
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0_1; |
// don't need to check for exwrite because we just made it |
2273 |
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1; |
escript::DataVector& dv=resdat.getExpandedVectorReference(); |
2274 |
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp0_1; |
|
2275 |
} |
|
2276 |
} |
// now we need to copy values over |
2277 |
} |
for (size_t y=0;y<(internal[1]);++y) |
2278 |
/////////////// |
{ |
2279 |
// process y // |
for (size_t x=0;x<(internal[0]);++x) |
2280 |
/////////////// |
{ |
2281 |
if (add_EM_F) { |
for (unsigned int i=0;i<numvals;++i) |
2282 |
const double* y_p=const_cast<escript::Data*>(&y)->getSampleDataRO(e); |
{ |
2283 |
for (index_t k=0; k<numEq; k++) { |
dv[i+(x+y*(internal[0]))*numvals]=src[i+(x+y*ext[0])*numvals]; |
|
const double tmp0_1 = w3*y_p[k]; |
|
|
EM_F[INDEX2(k,2,numEq)]+=tmp0_1; |
|
|
EM_F[INDEX2(k,3,numEq)]+=tmp0_1; |
|
|
} |
|
|
} |
|
|
const index_t firstNode=m_NN[0]*(m_NN[1]-2)+k0; |
|
|
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, |
|
|
firstNode, numEq, numComp); |
|
2284 |
} |
} |
2285 |
} // end colouring |
} |
2286 |
} |
} |
2287 |
} // end of parallel section |
delete[] src; |
2288 |
|
return resdat; |
2289 |
|
} |
2290 |
|
else // filter enabled |
2291 |
|
{ |
2292 |
|
escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode()); |
2293 |
|
escript::Data resdat(0, escript::DataTypes::scalarShape, fs , true); |
2294 |
|
// don't need to check for exwrite because we just made it |
2295 |
|
escript::DataVector& dv=resdat.getExpandedVectorReference(); |
2296 |
|
double* convolution=get2DGauss(radius, sigma); |
2297 |
|
for (size_t y=0;y<(internal[1]);++y) |
2298 |
|
{ |
2299 |
|
for (size_t x=0;x<(internal[0]);++x) |
2300 |
|
{ |
2301 |
|
dv[x+y*(internal[0])]=Convolve2D(convolution, src, x+radius, y+radius, radius, ext[0]); |
2302 |
|
|
2303 |
|
} |
2304 |
|
} |
2305 |
|
delete[] convolution; |
2306 |
|
delete[] src; |
2307 |
|
return resdat; |
2308 |
|
} |
2309 |
|
} |
2310 |
|
|
2311 |
|
int Rectangle::findNode(const double *coords) const { |
2312 |
|
const int NOT_MINE = -1; |
2313 |
|
//is the found element even owned by this rank |
2314 |
|
// (inside owned or shared elements but will map to an owned element) |
2315 |
|
for (int dim = 0; dim < m_numDim; dim++) { |
2316 |
|
double min = m_origin[dim] + m_offset[dim]* m_dx[dim] |
2317 |
|
- m_dx[dim]/2.; //allows for point outside mapping onto node |
2318 |
|
double max = m_origin[dim] + (m_offset[dim] + m_NE[dim])*m_dx[dim] |
2319 |
|
+ m_dx[dim]/2.; |
2320 |
|
if (min > coords[dim] || max < coords[dim]) { |
2321 |
|
return NOT_MINE; |
2322 |
|
} |
2323 |
|
} |
2324 |
|
// get distance from origin |
2325 |
|
double x = coords[0] - m_origin[0]; |
2326 |
|
double y = coords[1] - m_origin[1]; |
2327 |
|
// distance in elements |
2328 |
|
int ex = (int) floor(x / m_dx[0]); |
2329 |
|
int ey = (int) floor(y / m_dx[1]); |
2330 |
|
// set the min distance high enough to be outside the element plus a bit |
2331 |
|
int closest = NOT_MINE; |
2332 |
|
double minDist = 1; |
2333 |
|
for (int dim = 0; dim < m_numDim; dim++) { |
2334 |
|
minDist += m_dx[dim]*m_dx[dim]; |
2335 |
|
} |
2336 |
|
//find the closest node |
2337 |
|
for (int dx = 0; dx < 1; dx++) { |
2338 |
|
double xdist = (x - (ex + dx)*m_dx[0]); |
2339 |
|
for (int dy = 0; dy < 1; dy++) { |
2340 |
|
double ydist = (y - (ey + dy)*m_dx[1]); |
2341 |
|
double total = xdist*xdist + ydist*ydist; |
2342 |
|
if (total < minDist) { |
2343 |
|
closest = INDEX2(ex+dx-m_offset[0], ey+dy-m_offset[1], m_NE[0] + 1); |
2344 |
|
minDist = total; |
2345 |
|
} |
2346 |
|
} |
2347 |
|
} |
2348 |
|
//if this happens, we've let a dirac point slip through, which is awful |
2349 |
|
if (closest == NOT_MINE) { |
2350 |
|
throw RipleyException("Unable to map appropriate dirac point to a node," |
2351 |
|
" implementation problem in Rectangle::findNode()"); |
2352 |
|
} |
2353 |
|
return closest; |
2354 |
|
} |
2355 |
|
|
2356 |
|
void Rectangle::setAssembler(std::string type, std::map<std::string, |
2357 |
|
escript::Data> constants) { |
2358 |
|
if (type.compare("WaveAssembler") == 0) { |
2359 |
|
if (assembler_type != WAVE_ASSEMBLER && assembler_type != DEFAULT_ASSEMBLER) |
2360 |
|
throw RipleyException("Domain already using a different custom assembler"); |
2361 |
|
assembler_type = WAVE_ASSEMBLER; |
2362 |
|
delete assembler; |
2363 |
|
assembler = new WaveAssembler2D(this, m_dx, m_NX, m_NE, m_NN, constants); |
2364 |
|
} else if (type.compare("LameAssembler") == 0) { |
2365 |
|
if (assembler_type != LAME_ASSEMBLER && assembler_type != DEFAULT_ASSEMBLER) |
2366 |
|
throw RipleyException("Domain already using a different custom assembler"); |
2367 |
|
assembler_type = LAME_ASSEMBLER; |
2368 |
|
delete assembler; |
2369 |
|
assembler = new LameAssembler2D(this, m_dx, m_NX, m_NE, m_NN); |
2370 |
|
} else { //else ifs would go before this for other types |
2371 |
|
throw RipleyException("Ripley::Rectangle does not support the" |
2372 |
|
" requested assembler"); |
2373 |
|
} |
2374 |
} |
} |
2375 |
|
|
|
|
|
2376 |
} // end of namespace ripley |
} // end of namespace ripley |
2377 |
|
|