Parent Directory
|
Revision Log
|
Patch
branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp revision 3791 by caltinay, Wed Feb 1 05:10:22 2012 UTC | trunk/ripley/src/Rectangle.cpp revision 4769 by sshaw, Wed Mar 19 03:43:52 2014 UTC | |
---|---|---|
# | Line 1 | Line 1 |
1 | ||
2 | /******************************************************* | /***************************************************************************** |
3 | * | * |
4 | * Copyright (c) 2003-2012 by University of Queensland | * Copyright (c) 2003-2014 by University of Queensland |
5 | * Earth Systems Science Computational Center (ESSCC) | * http://www.uq.edu.au |
* http://www.uq.edu.au/esscc | ||
6 | * | * |
7 | * Primary Business: Queensland, Australia | * Primary Business: Queensland, Australia |
8 | * Licensed under the Open Software License version 3.0 | * Licensed under the Open Software License version 3.0 |
9 | * http://www.opensource.org/licenses/osl-3.0.php | * http://www.opensource.org/licenses/osl-3.0.php |
10 | * | * |
11 | *******************************************************/ | * Development until 2012 by Earth Systems Science Computational Center (ESSCC) |
12 | * Development 2012-2013 by School of Earth Sciences | |
13 | * Development from 2014 by Centre for Geoscience Computing (GeoComp) | |
14 | * | |
15 | *****************************************************************************/ | |
16 | ||
17 | #include <algorithm> | |
18 | ||
19 | #include <ripley/Rectangle.h> | #include <ripley/Rectangle.h> |
extern "C" { | ||
20 | #include <paso/SystemMatrix.h> | #include <paso/SystemMatrix.h> |
21 | } | #include <esysUtils/esysFileWriter.h> |
22 | #include <ripley/DefaultAssembler2D.h> | |
23 | #include <ripley/WaveAssembler2D.h> | |
24 | #include <ripley/LameAssembler2D.h> | |
25 | #include <ripley/domainhelpers.h> | |
26 | #include <boost/scoped_array.hpp> | |
27 | #include "esysUtils/EsysRandom.h" | |
28 | #include "blocktools.h" | |
29 | ||
30 | #ifdef USE_NETCDF | |
31 | #include <netcdfcpp.h> | |
32 | #endif | |
33 | ||
34 | #if USE_SILO | #if USE_SILO |
35 | #include <silo.h> | #include <silo.h> |
# | Line 26 extern "C" { | Line 41 extern "C" { |
41 | #include <iomanip> | #include <iomanip> |
42 | ||
43 | using namespace std; | using namespace std; |
44 | using esysUtils::FileWriter; | |
45 | ||
46 | namespace ripley { | namespace ripley { |
47 | ||
48 | Rectangle::Rectangle(int n0, int n1, double x0, double y0, double x1, | Rectangle::Rectangle(int n0, int n1, double x0, double y0, double x1, |
49 | double y1, int d0, int d1) : | double y1, int d0, int d1, |
50 | RipleyDomain(2), | const std::vector<double>& points, |
51 | m_gNE0(n0), | const std::vector<int>& tags, |
52 | m_gNE1(n1), | const simap_t& tagnamestonums) : |
53 | m_x0(x0), | RipleyDomain(2) |
54 | m_y0(y0), | { |
55 | m_l0(x1-x0), | // ignore subdivision parameters for serial run |
56 | m_l1(y1-y0), | if (m_mpiInfo->size == 1) { |
57 | m_NX(d0), | d0=1; |
58 | m_NY(d1) | d1=1; |
59 | { | } |
60 | ||
61 | bool warn=false; | |
62 | std::vector<int> factors; | |
63 | int ranks = m_mpiInfo->size; | |
64 | int epr[2] = {n0,n1}; | |
65 | int d[2] = {d0,d1}; | |
66 | if (d0<=0 || d1<=0) { | |
67 | for (int i = 0; i < 2; i++) { | |
68 | if (d[i] < 1) { | |
69 | d[i] = 1; | |
70 | continue; | |
71 | } | |
72 | epr[i] = -1; // can no longer be max | |
73 | //remove | |
74 | if (ranks % d[i] != 0) { | |
75 | throw RipleyException("Invalid number of spatial subdivisions"); | |
76 | } | |
77 | ranks /= d[i]; | |
78 | } | |
79 | factorise(factors, ranks); | |
80 | if (factors.size() != 0) { | |
81 | warn = true; | |
82 | } | |
83 | } | |
84 | ||
85 | while (factors.size() > 0) { | |
86 | int i = epr[0] > epr[1] ? 0 : 1; | |
87 | int f = factors.back(); | |
88 | factors.pop_back(); | |
89 | d[i] *= f; | |
90 | epr[i] /= f; | |
91 | } | |
92 | d0 = d[0]; d1 = d[1]; | |
93 | ||
94 | // ensure number of subdivisions is valid and nodes can be distributed | // ensure number of subdivisions is valid and nodes can be distributed |
95 | // among number of ranks | // among number of ranks |
96 | if (m_NX*m_NY != m_mpiInfo->size) | if (d0*d1 != m_mpiInfo->size) |
97 | throw RipleyException("Invalid number of spatial subdivisions"); | throw RipleyException("Invalid number of spatial subdivisions"); |
98 | ||
99 | if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0) | if (warn) { |
100 | throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension"); | cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1=" |
101 | << d1 << "). This may not be optimal!" << endl; | |
102 | } | |
103 | ||
104 | double l0 = x1-x0; | |
105 | double l1 = y1-y0; | |
106 | m_dx[0] = l0/n0; | |
107 | m_dx[1] = l1/n1; | |
108 | ||
109 | if ((n0+1)%d0 > 0) { | |
110 | n0=(int)round((float)(n0+1)/d0+0.5)*d0-1; | |
111 | l0=m_dx[0]*n0; | |
112 | cout << "Warning: Adjusted number of elements and length. N0=" | |
113 | << n0 << ", l0=" << l0 << endl; | |
114 | } | |
115 | if ((n1+1)%d1 > 0) { | |
116 | n1=(int)round((float)(n1+1)/d1+0.5)*d1-1; | |
117 | l1=m_dx[1]*n1; | |
118 | cout << "Warning: Adjusted number of elements and length. N1=" | |
119 | << n1 << ", l1=" << l1 << endl; | |
120 | } | |
121 | ||
122 | if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2)) | if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2)) |
123 | throw RipleyException("Too few elements for the number of ranks"); | throw RipleyException("Too few elements for the number of ranks"); |
124 | ||
125 | m_gNE[0] = n0; | |
126 | m_gNE[1] = n1; | |
127 | m_origin[0] = x0; | |
128 | m_origin[1] = y0; | |
129 | m_length[0] = l0; | |
130 | m_length[1] = l1; | |
131 | m_NX[0] = d0; | |
132 | m_NX[1] = d1; | |
133 | ||
134 | // local number of elements (with and without overlap) | // local number of elements (with and without overlap) |
135 | m_NE0 = m_ownNE0 = (m_NX>1 ? (n0+1)/m_NX : n0); | m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0); |
136 | if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1) | if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1) |
137 | m_NE0++; | m_NE[0]++; |
138 | else if (m_NX>1 && m_mpiInfo->rank%m_NX==m_NX-1) | else if (d0>1 && m_mpiInfo->rank%d0==d0-1) |
139 | m_ownNE0--; | m_ownNE[0]--; |
140 | ||
141 | m_NE1 = m_ownNE1 = (m_NY>1 ? (n1+1)/m_NY : n1); | m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1); |
142 | if (m_mpiInfo->rank/m_NX>0 && m_mpiInfo->rank/m_NX<m_NY-1) | if (m_mpiInfo->rank/d0>0 && m_mpiInfo->rank/d0<d1-1) |
143 | m_NE1++; | m_NE[1]++; |
144 | else if (m_NY>1 && m_mpiInfo->rank/m_NX==m_NY-1) | else if (d1>1 && m_mpiInfo->rank/d0==d1-1) |
145 | m_ownNE1--; | m_ownNE[1]--; |
146 | ||
147 | // local number of nodes | // local number of nodes |
148 | m_N0 = m_NE0+1; | m_NN[0] = m_NE[0]+1; |
149 | m_N1 = m_NE1+1; | m_NN[1] = m_NE[1]+1; |
150 | ||
151 | // bottom-left node is at (offset0,offset1) in global mesh | // bottom-left node is at (offset0,offset1) in global mesh |
152 | m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX); | m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0); |
153 | if (m_offset0 > 0) | if (m_offset[0] > 0) |
154 | m_offset0--; | m_offset[0]--; |
155 | m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank/m_NX); | m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank/d0); |
156 | if (m_offset1 > 0) | if (m_offset[1] > 0) |
157 | m_offset1--; | m_offset[1]--; |
158 | ||
159 | populateSampleIds(); | populateSampleIds(); |
160 | createPattern(); | createPattern(); |
161 | assembler = new DefaultAssembler2D(this, m_dx, m_NX, m_NE, m_NN); | |
162 | for (map<string, int>::const_iterator i = tagnamestonums.begin(); | |
163 | i != tagnamestonums.end(); i++) { | |
164 | setTagMap(i->first, i->second); | |
165 | } | |
166 | addPoints(tags.size(), &points[0], &tags[0]); | |
167 | } | } |
168 | ||
169 | Rectangle::~Rectangle() | Rectangle::~Rectangle() |
170 | { | { |
171 | Paso_SystemMatrixPattern_free(m_pattern); | Paso_SystemMatrixPattern_free(m_pattern); |
172 | Paso_Connector_free(m_connector); | Paso_Connector_free(m_connector); |
173 | delete assembler; | |
174 | } | } |
175 | ||
176 | string Rectangle::getDescription() const | string Rectangle::getDescription() const |
# | Line 97 bool Rectangle::operator==(const Abstrac | Line 183 bool Rectangle::operator==(const Abstrac |
183 | const Rectangle* o=dynamic_cast<const Rectangle*>(&other); | const Rectangle* o=dynamic_cast<const Rectangle*>(&other); |
184 | if (o) { | if (o) { |
185 | return (RipleyDomain::operator==(other) && | return (RipleyDomain::operator==(other) && |
186 | m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 | m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] |
187 | && m_x0==o->m_x0 && m_y0==o->m_y0 | && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] |
188 | && m_l0==o->m_l0 && m_l1==o->m_l1 | && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] |
189 | && m_NX==o->m_NX && m_NY==o->m_NY); | && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1]); |
190 | } | } |
191 | ||
192 | return false; | return false; |
193 | } | } |
194 | ||
195 | void Rectangle::readNcGrid(escript::Data& out, string filename, string varname, | |
196 | const ReaderParameters& params) const | |
197 | { | |
198 | #ifdef USE_NETCDF | |
199 | // check destination function space | |
200 | int myN0, myN1; | |
201 | if (out.getFunctionSpace().getTypeCode() == Nodes) { | |
202 | myN0 = m_NN[0]; | |
203 | myN1 = m_NN[1]; | |
204 | } else if (out.getFunctionSpace().getTypeCode() == Elements || | |
205 | out.getFunctionSpace().getTypeCode() == ReducedElements) { | |
206 | myN0 = m_NE[0]; | |
207 | myN1 = m_NE[1]; | |
208 | } else | |
209 | throw RipleyException("readNcGrid(): invalid function space for output data object"); | |
210 | ||
211 | if (params.first.size() != 2) | |
212 | throw RipleyException("readNcGrid(): argument 'first' must have 2 entries"); | |
213 | ||
214 | if (params.numValues.size() != 2) | |
215 | throw RipleyException("readNcGrid(): argument 'numValues' must have 2 entries"); | |
216 | ||
217 | if (params.multiplier.size() != 2) | |
218 | throw RipleyException("readNcGrid(): argument 'multiplier' must have 2 entries"); | |
219 | for (size_t i=0; i<params.multiplier.size(); i++) | |
220 | if (params.multiplier[i]<1) | |
221 | throw RipleyException("readNcGrid(): all multipliers must be positive"); | |
222 | if (params.reverse.size() != 2) | |
223 | throw RipleyException("readNcGrid(): argument 'reverse' must have 2 entries"); | |
224 | ||
225 | // check file existence and size | |
226 | NcFile f(filename.c_str(), NcFile::ReadOnly); | |
227 | if (!f.is_valid()) | |
228 | throw RipleyException("readNcGrid(): cannot open file"); | |
229 | ||
230 | NcVar* var = f.get_var(varname.c_str()); | |
231 | if (!var) | |
232 | throw RipleyException("readNcGrid(): invalid variable"); | |
233 | ||
234 | // TODO: rank>0 data support | |
235 | const int numComp = out.getDataPointSize(); | |
236 | if (numComp > 1) | |
237 | throw RipleyException("readNcGrid(): only scalar data supported"); | |
238 | ||
239 | const int dims = var->num_dims(); | |
240 | boost::scoped_array<long> edges(var->edges()); | |
241 | ||
242 | // is this a slice of the data object (dims!=2)? | |
243 | // note the expected ordering of edges (as in numpy: y,x) | |
244 | if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1])) | |
245 | || (dims==1 && params.numValues[1]>1) ) { | |
246 | throw RipleyException("readNcGrid(): not enough data in file"); | |
247 | } | |
248 | ||
249 | // check if this rank contributes anything | |
250 | if (params.first[0] >= m_offset[0]+myN0 || | |
251 | params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] || | |
252 | params.first[1] >= m_offset[1]+myN1 || | |
253 | params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1]) | |
254 | return; | |
255 | ||
256 | // now determine how much this rank has to write | |
257 | ||
258 | // first coordinates in data object to write to | |
259 | const int first0 = max(0, params.first[0]-m_offset[0]); | |
260 | const int first1 = max(0, params.first[1]-m_offset[1]); | |
261 | // indices to first value in file (not accounting for reverse yet) | |
262 | int idx0 = max(0, m_offset[0]-params.first[0]); | |
263 | int idx1 = max(0, m_offset[1]-params.first[1]); | |
264 | // number of values to read | |
265 | const int num0 = min(params.numValues[0]-idx0, myN0-first0); | |
266 | const int num1 = min(params.numValues[1]-idx1, myN1-first1); | |
267 | ||
268 | // make sure we read the right block if going backwards through file | |
269 | if (params.reverse[0]) | |
270 | idx0 = edges[dims-1]-num0-idx0; | |
271 | if (dims>1 && params.reverse[1]) | |
272 | idx1 = edges[dims-2]-num1-idx1; | |
273 | ||
274 | vector<double> values(num0*num1); | |
275 | if (dims==2) { | |
276 | var->set_cur(idx1, idx0); | |
277 | var->get(&values[0], num1, num0); | |
278 | } else { | |
279 | var->set_cur(idx0); | |
280 | var->get(&values[0], num0); | |
281 | } | |
282 | ||
283 | const int dpp = out.getNumDataPointsPerSample(); | |
284 | out.requireWrite(); | |
285 | ||
286 | // helpers for reversing | |
287 | const int x0 = (params.reverse[0] ? num0-1 : 0); | |
288 | const int x_mult = (params.reverse[0] ? -1 : 1); | |
289 | const int y0 = (params.reverse[1] ? num1-1 : 0); | |
290 | const int y_mult = (params.reverse[1] ? -1 : 1); | |
291 | ||
292 | for (index_t y=0; y<num1; y++) { | |
293 | #pragma omp parallel for | |
294 | for (index_t x=0; x<num0; x++) { | |
295 | const int baseIndex = first0+x*params.multiplier[0] | |
296 | +(first1+y*params.multiplier[1])*myN0; | |
297 | const int srcIndex = (y0+y_mult*y)*num0+(x0+x_mult*x); | |
298 | if (!isnan(values[srcIndex])) { | |
299 | for (index_t m1=0; m1<params.multiplier[1]; m1++) { | |
300 | for (index_t m0=0; m0<params.multiplier[0]; m0++) { | |
301 | const int dataIndex = baseIndex+m0+m1*myN0; | |
302 | double* dest = out.getSampleDataRW(dataIndex); | |
303 | for (index_t q=0; q<dpp; q++) { | |
304 | *dest++ = values[srcIndex]; | |
305 | } | |
306 | } | |
307 | } | |
308 | } | |
309 | } | |
310 | } | |
311 | #else | |
312 | throw RipleyException("readNcGrid(): not compiled with netCDF support"); | |
313 | #endif | |
314 | } | |
315 | ||
316 | void Rectangle::readBinaryGrid(escript::Data& out, string filename, | |
317 | const ReaderParameters& params) const | |
318 | { | |
319 | // the mapping is not universally correct but should work on our | |
320 | // supported platforms | |
321 | switch (params.dataType) { | |
322 | case DATATYPE_INT32: | |
323 | readBinaryGridImpl<int>(out, filename, params); | |
324 | break; | |
325 | case DATATYPE_FLOAT32: | |
326 | readBinaryGridImpl<float>(out, filename, params); | |
327 | break; | |
328 | case DATATYPE_FLOAT64: | |
329 | readBinaryGridImpl<double>(out, filename, params); | |
330 | break; | |
331 | default: | |
332 | throw RipleyException("readBinaryGrid(): invalid or unsupported datatype"); | |
333 | } | |
334 | } | |
335 | ||
336 | #ifdef USE_BOOSTIO | |
337 | void Rectangle::readBinaryGridFromZipped(escript::Data& out, string filename, | |
338 | const ReaderParameters& params) const | |
339 | { | |
340 | // the mapping is not universally correct but should work on our | |
341 | // supported platforms | |
342 | switch (params.dataType) { | |
343 | case DATATYPE_INT32: | |
344 | readBinaryGridZippedImpl<int>(out, filename, params); | |
345 | break; | |
346 | case DATATYPE_FLOAT32: | |
347 | readBinaryGridZippedImpl<float>(out, filename, params); | |
348 | break; | |
349 | case DATATYPE_FLOAT64: | |
350 | readBinaryGridZippedImpl<double>(out, filename, params); | |
351 | break; | |
352 | default: | |
353 | throw RipleyException("readBinaryGridFromZipped(): invalid or unsupported datatype"); | |
354 | } | |
355 | } | |
356 | #endif | |
357 | ||
358 | template<typename ValueType> | |
359 | void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename, | |
360 | const ReaderParameters& params) const | |
361 | { | |
362 | // check destination function space | |
363 | int myN0, myN1; | |
364 | if (out.getFunctionSpace().getTypeCode() == Nodes) { | |
365 | myN0 = m_NN[0]; | |
366 | myN1 = m_NN[1]; | |
367 | } else if (out.getFunctionSpace().getTypeCode() == Elements || | |
368 | out.getFunctionSpace().getTypeCode() == ReducedElements) { | |
369 | myN0 = m_NE[0]; | |
370 | myN1 = m_NE[1]; | |
371 | } else | |
372 | throw RipleyException("readBinaryGrid(): invalid function space for output data object"); | |
373 | ||
374 | // check file existence and size | |
375 | ifstream f(filename.c_str(), ifstream::binary); | |
376 | if (f.fail()) { | |
377 | throw RipleyException("readBinaryGrid(): cannot open file"); | |
378 | } | |
379 | f.seekg(0, ios::end); | |
380 | const int numComp = out.getDataPointSize(); | |
381 | const int filesize = f.tellg(); | |
382 | const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType); | |
383 | if (filesize < reqsize) { | |
384 | f.close(); | |
385 | throw RipleyException("readBinaryGrid(): not enough data in file"); | |
386 | } | |
387 | ||
388 | // check if this rank contributes anything | |
389 | if (params.first[0] >= m_offset[0]+myN0 || | |
390 | params.first[0]+params.numValues[0] <= m_offset[0] || | |
391 | params.first[1] >= m_offset[1]+myN1 || | |
392 | params.first[1]+params.numValues[1] <= m_offset[1]) { | |
393 | f.close(); | |
394 | return; | |
395 | } | |
396 | ||
397 | // now determine how much this rank has to write | |
398 | ||
399 | // first coordinates in data object to write to | |
400 | const int first0 = max(0, params.first[0]-m_offset[0]); | |
401 | const int first1 = max(0, params.first[1]-m_offset[1]); | |
402 | // indices to first value in file | |
403 | const int idx0 = max(0, m_offset[0]-params.first[0]); | |
404 | const int idx1 = max(0, m_offset[1]-params.first[1]); | |
405 | // number of values to read | |
406 | const int num0 = min(params.numValues[0]-idx0, myN0-first0); | |
407 | const int num1 = min(params.numValues[1]-idx1, myN1-first1); | |
408 | ||
409 | out.requireWrite(); | |
410 | vector<ValueType> values(num0*numComp); | |
411 | const int dpp = out.getNumDataPointsPerSample(); | |
412 | ||
413 | for (int y=0; y<num1; y++) { | |
414 | const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]); | |
415 | f.seekg(fileofs*sizeof(ValueType)); | |
416 | f.read((char*)&values[0], num0*numComp*sizeof(ValueType)); | |
417 | for (int x=0; x<num0; x++) { | |
418 | const int baseIndex = first0+x*params.multiplier[0] | |
419 | +(first1+y*params.multiplier[1])*myN0; | |
420 | for (int m1=0; m1<params.multiplier[1]; m1++) { | |
421 | for (int m0=0; m0<params.multiplier[0]; m0++) { | |
422 | const int dataIndex = baseIndex+m0+m1*myN0; | |
423 | double* dest = out.getSampleDataRW(dataIndex); | |
424 | for (int c=0; c<numComp; c++) { | |
425 | ValueType val = values[x*numComp+c]; | |
426 | ||
427 | if (params.byteOrder != BYTEORDER_NATIVE) { | |
428 | char* cval = reinterpret_cast<char*>(&val); | |
429 | // this will alter val!! | |
430 | byte_swap32(cval); | |
431 | } | |
432 | if (!std::isnan(val)) { | |
433 | for (int q=0; q<dpp; q++) { | |
434 | *dest++ = static_cast<double>(val); | |
435 | } | |
436 | } | |
437 | } | |
438 | } | |
439 | } | |
440 | } | |
441 | } | |
442 | ||
443 | f.close(); | |
444 | } | |
445 | ||
446 | #ifdef USE_BOOSTIO | |
447 | template<typename ValueType> | |
448 | void Rectangle::readBinaryGridZippedImpl(escript::Data& out, const string& filename, | |
449 | const ReaderParameters& params) const | |
450 | { | |
451 | // check destination function space | |
452 | int myN0, myN1; | |
453 | if (out.getFunctionSpace().getTypeCode() == Nodes) { | |
454 | myN0 = m_NN[0]; | |
455 | myN1 = m_NN[1]; | |
456 | } else if (out.getFunctionSpace().getTypeCode() == Elements || | |
457 | out.getFunctionSpace().getTypeCode() == ReducedElements) { | |
458 | myN0 = m_NE[0]; | |
459 | myN1 = m_NE[1]; | |
460 | } else | |
461 | throw RipleyException("readBinaryGrid(): invalid function space for output data object"); | |
462 | ||
463 | // check file existence and size | |
464 | ifstream f(filename.c_str(), ifstream::binary); | |
465 | if (f.fail()) { | |
466 | throw RipleyException("readBinaryGridFromZipped(): cannot open file"); | |
467 | } | |
468 | f.seekg(0, ios::end); | |
469 | const int numComp = out.getDataPointSize(); | |
470 | int filesize = f.tellg(); | |
471 | f.seekg(0, ios::beg); | |
472 | std::vector<char> compressed(filesize); | |
473 | f.read((char*)&compressed[0], filesize); | |
474 | f.close(); | |
475 | std::vector<char> decompressed = unzip(compressed); | |
476 | filesize = decompressed.size(); | |
477 | const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType); | |
478 | if (filesize < reqsize) { | |
479 | throw RipleyException("readBinaryGridFromZipped(): not enough data in file"); | |
480 | } | |
481 | ||
482 | // check if this rank contributes anything | |
483 | if (params.first[0] >= m_offset[0]+myN0 || | |
484 | params.first[0]+params.numValues[0] <= m_offset[0] || | |
485 | params.first[1] >= m_offset[1]+myN1 || | |
486 | params.first[1]+params.numValues[1] <= m_offset[1]) { | |
487 | f.close(); | |
488 | return; | |
489 | } | |
490 | ||
491 | // now determine how much this rank has to write | |
492 | ||
493 | // first coordinates in data object to write to | |
494 | const int first0 = max(0, params.first[0]-m_offset[0]); | |
495 | const int first1 = max(0, params.first[1]-m_offset[1]); | |
496 | // indices to first value in file | |
497 | const int idx0 = max(0, m_offset[0]-params.first[0]); | |
498 | const int idx1 = max(0, m_offset[1]-params.first[1]); | |
499 | // number of values to read | |
500 | const int num0 = min(params.numValues[0]-idx0, myN0-first0); | |
501 | const int num1 = min(params.numValues[1]-idx1, myN1-first1); | |
502 | ||
503 | out.requireWrite(); | |
504 | vector<ValueType> values(num0*numComp); | |
505 | const int dpp = out.getNumDataPointsPerSample(); | |
506 | ||
507 | for (int y=0; y<num1; y++) { | |
508 | const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]); | |
509 | memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType)); | |
510 | for (int x=0; x<num0; x++) { | |
511 | const int baseIndex = first0+x*params.multiplier[0] | |
512 | +(first1+y*params.multiplier[1])*myN0; | |
513 | for (int m1=0; m1<params.multiplier[1]; m1++) { | |
514 | for (int m0=0; m0<params.multiplier[0]; m0++) { | |
515 | const int dataIndex = baseIndex+m0+m1*myN0; | |
516 | double* dest = out.getSampleDataRW(dataIndex); | |
517 | for (int c=0; c<numComp; c++) { | |
518 | ValueType val = values[x*numComp+c]; | |
519 | ||
520 | if (params.byteOrder != BYTEORDER_NATIVE) { | |
521 | char* cval = reinterpret_cast<char*>(&val); | |
522 | // this will alter val!! | |
523 | byte_swap32(cval); | |
524 | } | |
525 | if (!std::isnan(val)) { | |
526 | for (int q=0; q<dpp; q++) { | |
527 | *dest++ = static_cast<double>(val); | |
528 | } | |
529 | } | |
530 | } | |
531 | } | |
532 | } | |
533 | } | |
534 | } | |
535 | ||
536 | f.close(); | |
537 | } | |
538 | #endif | |
539 | ||
540 | void Rectangle::writeBinaryGrid(const escript::Data& in, string filename, | |
541 | int byteOrder, int dataType) const | |
542 | { | |
543 | // the mapping is not universally correct but should work on our | |
544 | // supported platforms | |
545 | switch (dataType) { | |
546 | case DATATYPE_INT32: | |
547 | writeBinaryGridImpl<int>(in, filename, byteOrder); | |
548 | break; | |
549 | case DATATYPE_FLOAT32: | |
550 | writeBinaryGridImpl<float>(in, filename, byteOrder); | |
551 | break; | |
552 | case DATATYPE_FLOAT64: | |
553 | writeBinaryGridImpl<double>(in, filename, byteOrder); | |
554 | break; | |
555 | default: | |
556 | throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype"); | |
557 | } | |
558 | } | |
559 | ||
560 | template<typename ValueType> | |
561 | void Rectangle::writeBinaryGridImpl(const escript::Data& in, | |
562 | const string& filename, int byteOrder) const | |
563 | { | |
564 | // check function space and determine number of points | |
565 | int myN0, myN1; | |
566 | int totalN0, totalN1; | |
567 | if (in.getFunctionSpace().getTypeCode() == Nodes) { | |
568 | myN0 = m_NN[0]; | |
569 | myN1 = m_NN[1]; | |
570 | totalN0 = m_gNE[0]+1; | |
571 | totalN1 = m_gNE[1]+1; | |
572 | } else if (in.getFunctionSpace().getTypeCode() == Elements || | |
573 | in.getFunctionSpace().getTypeCode() == ReducedElements) { | |
574 | myN0 = m_NE[0]; | |
575 | myN1 = m_NE[1]; | |
576 | totalN0 = m_gNE[0]; | |
577 | totalN1 = m_gNE[1]; | |
578 | } else | |
579 | throw RipleyException("writeBinaryGrid(): invalid function space of data object"); | |
580 | ||
581 | const int numComp = in.getDataPointSize(); | |
582 | const int dpp = in.getNumDataPointsPerSample(); | |
583 | ||
584 | if (numComp > 1 || dpp > 1) | |
585 | throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported"); | |
586 | ||
587 | const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1; | |
588 | ||
589 | // from here on we know that each sample consists of one value | |
590 | FileWriter fw; | |
591 | fw.openFile(filename, fileSize); | |
592 | MPIBarrier(); | |
593 | ||
594 | for (index_t y=0; y<myN1; y++) { | |
595 | const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0)*sizeof(ValueType); | |
596 | ostringstream oss; | |
597 | ||
598 | for (index_t x=0; x<myN0; x++) { | |
599 | const double* sample = in.getSampleDataRO(y*myN0+x); | |
600 | ValueType fvalue = static_cast<ValueType>(*sample); | |
601 | if (byteOrder == BYTEORDER_NATIVE) { | |
602 | oss.write((char*)&fvalue, sizeof(fvalue)); | |
603 | } else { | |
604 | char* value = reinterpret_cast<char*>(&fvalue); | |
605 | oss.write(byte_swap32(value), sizeof(fvalue)); | |
606 | } | |
607 | } | |
608 | fw.writeAt(oss, fileofs); | |
609 | } | |
610 | fw.close(); | |
611 | } | |
612 | ||
613 | void Rectangle::dump(const string& fileName) const | void Rectangle::dump(const string& fileName) const |
614 | { | { |
615 | #if USE_SILO | #if USE_SILO |
# | Line 114 void Rectangle::dump(const string& fileN | Line 618 void Rectangle::dump(const string& fileN |
618 | fn+=".silo"; | fn+=".silo"; |
619 | } | } |
620 | ||
const int NUM_SILO_FILES = 1; | ||
const char* blockDirFmt = "/block%04d"; | ||
621 | int driver=DB_HDF5; | int driver=DB_HDF5; |
622 | DBfile* dbfile = NULL; | DBfile* dbfile = NULL; |
623 | const char* blockDirFmt = "/block%04d"; | |
624 | ||
625 | #ifdef ESYS_MPI | #ifdef ESYS_MPI |
626 | PMPIO_baton_t* baton = NULL; | PMPIO_baton_t* baton = NULL; |
627 | const int NUM_SILO_FILES = 1; | |
628 | #endif | #endif |
629 | ||
630 | if (m_mpiInfo->size > 1) { | if (m_mpiInfo->size > 1) { |
# | Line 168 void Rectangle::dump(const string& fileN | Line 672 void Rectangle::dump(const string& fileN |
672 | } | } |
673 | */ | */ |
674 | ||
675 | boost::scoped_ptr<double> x(new double[m_N0]); | boost::scoped_ptr<double> x(new double[m_NN[0]]); |
676 | boost::scoped_ptr<double> y(new double[m_N1]); | boost::scoped_ptr<double> y(new double[m_NN[1]]); |
677 | double* coords[2] = { x.get(), y.get() }; | double* coords[2] = { x.get(), y.get() }; |
pair<double,double> xdx = getFirstCoordAndSpacing(0); | ||
pair<double,double> ydy = getFirstCoordAndSpacing(1); | ||
678 | #pragma omp parallel | #pragma omp parallel |
679 | { | { |
680 | #pragma omp for nowait | #pragma omp for nowait |
681 | for (dim_t i0 = 0; i0 < m_N0; i0++) { | for (dim_t i0 = 0; i0 < m_NN[0]; i0++) { |
682 | coords[0][i0]=xdx.first+i0*xdx.second; | coords[0][i0]=getLocalCoordinate(i0, 0); |
683 | } | } |
684 | #pragma omp for nowait | #pragma omp for nowait |
685 | for (dim_t i1 = 0; i1 < m_N1; i1++) { | for (dim_t i1 = 0; i1 < m_NN[1]; i1++) { |
686 | coords[1][i1]=ydy.first+i1*ydy.second; | coords[1][i1]=getLocalCoordinate(i1, 1); |
687 | } | } |
688 | } | } |
689 | IndexVector dims = getNumNodesPerDim(); | int* dims = const_cast<int*>(getNumNodesPerDim()); |
690 | ||
691 | // write mesh | // write mesh |
692 | DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE, | DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 2, DB_DOUBLE, |
693 | DB_COLLINEAR, NULL); | DB_COLLINEAR, NULL); |
694 | ||
695 | // write node ids | // write node ids |
696 | DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2, | DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 2, |
697 | NULL, 0, DB_INT, DB_NODECENT, NULL); | NULL, 0, DB_INT, DB_NODECENT, NULL); |
698 | ||
699 | // write element ids | // write element ids |
700 | dims = getNumElementsPerDim(); | dims = const_cast<int*>(getNumElementsPerDim()); |
701 | DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0], | DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0], |
702 | &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL); | dims, 2, NULL, 0, DB_INT, DB_ZONECENT, NULL); |
703 | ||
704 | // rank 0 writes multimesh and multivar | // rank 0 writes multimesh and multivar |
705 | if (m_mpiInfo->rank == 0) { | if (m_mpiInfo->rank == 0) { |
# | Line 265 const int* Rectangle::borrowSampleRefere | Line 767 const int* Rectangle::borrowSampleRefere |
767 | case FaceElements: | case FaceElements: |
768 | case ReducedFaceElements: | case ReducedFaceElements: |
769 | return &m_faceId[0]; | return &m_faceId[0]; |
770 | case Points: | |
771 | return &m_diracPointNodeIDs[0]; | |
772 | default: | default: |
773 | break; | break; |
774 | } | } |
# | Line 289 bool Rectangle::ownSample(int fsType, in | Line 793 bool Rectangle::ownSample(int fsType, in |
793 | case Elements: | case Elements: |
794 | case ReducedElements: | case ReducedElements: |
795 | // check ownership of element's bottom left node | // check ownership of element's bottom left node |
796 | return (m_dofMap[id%m_NE0+m_N0*(id/m_NE0)] < getNumDOF()); | return (m_dofMap[id%m_NE[0]+m_NN[0]*(id/m_NE[0])] < getNumDOF()); |
797 | case FaceElements: | case FaceElements: |
798 | case ReducedFaceElements: | case ReducedFaceElements: |
799 | { | { |
800 | // determine which face the sample belongs to before | // determine which face the sample belongs to before |
801 | // checking ownership of corresponding element's first node | // checking ownership of corresponding element's first node |
const IndexVector faces = getNumFacesPerBoundary(); | ||
802 | dim_t n=0; | dim_t n=0; |
803 | for (size_t i=0; i<faces.size(); i++) { | for (size_t i=0; i<4; i++) { |
804 | n+=faces[i]; | n+=m_faceCount[i]; |
805 | if (id<n) { | if (id<n) { |
806 | index_t k; | index_t k; |
807 | if (i==1) | if (i==1) |
808 | k=m_N0-2; | k=m_NN[0]-2; |
809 | else if (i==3) | else if (i==3) |
810 | k=m_N0*(m_N1-2); | k=m_NN[0]*(m_NN[1]-2); |
811 | else | else |
812 | k=0; | k=0; |
813 | // determine whether to move right or up | // determine whether to move right or up |
814 | const index_t delta=(i/2==0 ? m_N0 : 1); | const index_t delta=(i/2==0 ? m_NN[0] : 1); |
815 | return (m_dofMap[k+(id-n+faces[i])*delta] < getNumDOF()); | return (m_dofMap[k+(id-n+m_faceCount[i])*delta] < getNumDOF()); |
816 | } | } |
817 | } | } |
818 | return false; | return false; |
# | Line 331 void Rectangle::setToNormal(escript::Dat | Line 834 void Rectangle::setToNormal(escript::Dat |
834 | { | { |
835 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
836 | #pragma omp for nowait | #pragma omp for nowait |
837 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = 0; k1 < m_NE[1]; ++k1) { |
838 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
839 | // set vector at two quadrature points | // set vector at two quadrature points |
840 | *o++ = -1.; | *o++ = -1.; |
# | Line 343 void Rectangle::setToNormal(escript::Dat | Line 846 void Rectangle::setToNormal(escript::Dat |
846 | ||
847 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
848 | #pragma omp for nowait | #pragma omp for nowait |
849 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = 0; k1 < m_NE[1]; ++k1) { |
850 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
851 | // set vector at two quadrature points | // set vector at two quadrature points |
852 | *o++ = 1.; | *o++ = 1.; |
# | Line 355 void Rectangle::setToNormal(escript::Dat | Line 858 void Rectangle::setToNormal(escript::Dat |
858 | ||
859 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
860 | #pragma omp for nowait | #pragma omp for nowait |
861 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = 0; k0 < m_NE[0]; ++k0) { |
862 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
863 | // set vector at two quadrature points | // set vector at two quadrature points |
864 | *o++ = 0.; | *o++ = 0.; |
# | Line 367 void Rectangle::setToNormal(escript::Dat | Line 870 void Rectangle::setToNormal(escript::Dat |
870 | ||
871 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
872 | #pragma omp for nowait | #pragma omp for nowait |
873 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = 0; k0 < m_NE[0]; ++k0) { |
874 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
875 | // set vector at two quadrature points | // set vector at two quadrature points |
876 | *o++ = 0.; | *o++ = 0.; |
# | Line 383 void Rectangle::setToNormal(escript::Dat | Line 886 void Rectangle::setToNormal(escript::Dat |
886 | { | { |
887 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
888 | #pragma omp for nowait | #pragma omp for nowait |
889 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = 0; k1 < m_NE[1]; ++k1) { |
890 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
891 | *o++ = -1.; | *o++ = -1.; |
892 | *o = 0.; | *o = 0.; |
# | Line 392 void Rectangle::setToNormal(escript::Dat | Line 895 void Rectangle::setToNormal(escript::Dat |
895 | ||
896 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
897 | #pragma omp for nowait | #pragma omp for nowait |
898 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = 0; k1 < m_NE[1]; ++k1) { |
899 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
900 | *o++ = 1.; | *o++ = 1.; |
901 | *o = 0.; | *o = 0.; |
# | Line 401 void Rectangle::setToNormal(escript::Dat | Line 904 void Rectangle::setToNormal(escript::Dat |
904 | ||
905 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
906 | #pragma omp for nowait | #pragma omp for nowait |
907 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = 0; k0 < m_NE[0]; ++k0) { |
908 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
909 | *o++ = 0.; | *o++ = 0.; |
910 | *o = -1.; | *o = -1.; |
# | Line 410 void Rectangle::setToNormal(escript::Dat | Line 913 void Rectangle::setToNormal(escript::Dat |
913 | ||
914 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
915 | #pragma omp for nowait | #pragma omp for nowait |
916 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = 0; k0 < m_NE[0]; ++k0) { |
917 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
918 | *o++ = 0.; | *o++ = 0.; |
919 | *o = 1.; | *o = 1.; |
# | Line 432 void Rectangle::setToSize(escript::Data& | Line 935 void Rectangle::setToSize(escript::Data& |
935 | || out.getFunctionSpace().getTypeCode() == ReducedElements) { | || out.getFunctionSpace().getTypeCode() == ReducedElements) { |
936 | out.requireWrite(); | out.requireWrite(); |
937 | const dim_t numQuad=out.getNumDataPointsPerSample(); | const dim_t numQuad=out.getNumDataPointsPerSample(); |
938 | const double hSize=getFirstCoordAndSpacing(0).second; | const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]); |
const double vSize=getFirstCoordAndSpacing(1).second; | ||
const double size=min(hSize,vSize); | ||
939 | #pragma omp parallel for | #pragma omp parallel for |
940 | for (index_t k = 0; k < getNumElements(); ++k) { | for (index_t k = 0; k < getNumElements(); ++k) { |
941 | double* o = out.getSampleDataRW(k); | double* o = out.getSampleDataRW(k); |
# | Line 444 void Rectangle::setToSize(escript::Data& | Line 945 void Rectangle::setToSize(escript::Data& |
945 | || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { | || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { |
946 | out.requireWrite(); | out.requireWrite(); |
947 | const dim_t numQuad=out.getNumDataPointsPerSample(); | const dim_t numQuad=out.getNumDataPointsPerSample(); |
const double hSize=getFirstCoordAndSpacing(0).second; | ||
const double vSize=getFirstCoordAndSpacing(1).second; | ||
948 | #pragma omp parallel | #pragma omp parallel |
949 | { | { |
950 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
951 | #pragma omp for nowait | #pragma omp for nowait |
952 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = 0; k1 < m_NE[1]; ++k1) { |
953 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
954 | fill(o, o+numQuad, vSize); | fill(o, o+numQuad, m_dx[1]); |
955 | } | } |
956 | } | } |
957 | ||
958 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
959 | #pragma omp for nowait | #pragma omp for nowait |
960 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = 0; k1 < m_NE[1]; ++k1) { |
961 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
962 | fill(o, o+numQuad, vSize); | fill(o, o+numQuad, m_dx[1]); |
963 | } | } |
964 | } | } |
965 | ||
966 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
967 | #pragma omp for nowait | #pragma omp for nowait |
968 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = 0; k0 < m_NE[0]; ++k0) { |
969 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
970 | fill(o, o+numQuad, hSize); | fill(o, o+numQuad, m_dx[0]); |
971 | } | } |
972 | } | } |
973 | ||
974 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
975 | #pragma omp for nowait | #pragma omp for nowait |
976 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = 0; k0 < m_NE[0]; ++k0) { |
977 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
978 | fill(o, o+numQuad, hSize); | fill(o, o+numQuad, m_dx[0]); |
979 | } | } |
980 | } | } |
981 | } // end of parallel section | } // end of parallel section |
# | Line 489 void Rectangle::setToSize(escript::Data& | Line 988 void Rectangle::setToSize(escript::Data& |
988 | } | } |
989 | } | } |
990 | ||
Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder, | ||
bool reducedColOrder) const | ||
{ | ||
/* FIXME: reduced | ||
if (reducedRowOrder || reducedColOrder) | ||
throw RipleyException("getPattern() not implemented for reduced order"); | ||
*/ | ||
return m_pattern; | ||
} | ||
991 | void Rectangle::Print_Mesh_Info(const bool full) const | void Rectangle::Print_Mesh_Info(const bool full) const |
992 | { | { |
993 | RipleyDomain::Print_Mesh_Info(full); | RipleyDomain::Print_Mesh_Info(full); |
# | Line 506 void Rectangle::Print_Mesh_Info(const bo | Line 995 void Rectangle::Print_Mesh_Info(const bo |
995 | cout << " Id Coordinates" << endl; | cout << " Id Coordinates" << endl; |
996 | cout.precision(15); | cout.precision(15); |
997 | cout.setf(ios::scientific, ios::floatfield); | cout.setf(ios::scientific, ios::floatfield); |
pair<double,double> xdx = getFirstCoordAndSpacing(0); | ||
pair<double,double> ydy = getFirstCoordAndSpacing(1); | ||
998 | for (index_t i=0; i < getNumNodes(); i++) { | for (index_t i=0; i < getNumNodes(); i++) { |
999 | cout << " " << setw(5) << m_nodeId[i] | cout << " " << setw(5) << m_nodeId[i] |
1000 | << " " << xdx.first+(i%m_N0)*xdx.second | << " " << getLocalCoordinate(i%m_NN[0], 0) |
1001 | << " " << ydy.first+(i/m_N0)*ydy.second << endl; | << " " << getLocalCoordinate(i/m_NN[0], 1) << endl; |
1002 | } | } |
1003 | } | } |
1004 | } | } |
1005 | ||
IndexVector Rectangle::getNumNodesPerDim() const | ||
{ | ||
IndexVector ret; | ||
ret.push_back(m_N0); | ||
ret.push_back(m_N1); | ||
return ret; | ||
} | ||
IndexVector Rectangle::getNumElementsPerDim() const | ||
{ | ||
IndexVector ret; | ||
ret.push_back(m_NE0); | ||
ret.push_back(m_NE1); | ||
return ret; | ||
} | ||
IndexVector Rectangle::getNumFacesPerBoundary() const | ||
{ | ||
IndexVector ret(4, 0); | ||
//left | ||
if (m_offset0==0) | ||
ret[0]=m_NE1; | ||
//right | ||
if (m_mpiInfo->rank%m_NX==m_NX-1) | ||
ret[1]=m_NE1; | ||
//bottom | ||
if (m_offset1==0) | ||
ret[2]=m_NE0; | ||
//top | ||
if (m_mpiInfo->rank/m_NX==m_NY-1) | ||
ret[3]=m_NE0; | ||
return ret; | ||
} | ||
IndexVector Rectangle::getNumSubdivisionsPerDim() const | ||
{ | ||
IndexVector ret; | ||
ret.push_back(m_NX); | ||
ret.push_back(m_NY); | ||
return ret; | ||
} | ||
pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const | ||
{ | ||
if (dim==0) { | ||
return pair<double,double>(m_x0+(m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0); | ||
} else if (dim==1) { | ||
return pair<double,double>(m_y0+(m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1); | ||
} | ||
throw RipleyException("getFirstCoordAndSpacing: invalid argument"); | ||
} | ||
//protected | ||
dim_t Rectangle::getNumDOF() const | ||
{ | ||
return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY; | ||
} | ||
//protected | ||
dim_t Rectangle::getNumFaceElements() const | ||
{ | ||
const IndexVector faces = getNumFacesPerBoundary(); | ||
dim_t n=0; | ||
for (size_t i=0; i<faces.size(); i++) | ||
n+=faces[i]; | ||
return n; | ||
} | ||
1006 | ||
1007 | //protected | //protected |
1008 | void Rectangle::assembleCoordinates(escript::Data& arg) const | void Rectangle::assembleCoordinates(escript::Data& arg) const |
# | Line 594 void Rectangle::assembleCoordinates(escr | Line 1014 void Rectangle::assembleCoordinates(escr |
1014 | if (!numSamplesEqual(&x, 1, getNumNodes())) | if (!numSamplesEqual(&x, 1, getNumNodes())) |
1015 | throw RipleyException("setToX: Illegal number of samples in Data object"); | throw RipleyException("setToX: Illegal number of samples in Data object"); |
1016 | ||
pair<double,double> xdx = getFirstCoordAndSpacing(0); | ||
pair<double,double> ydy = getFirstCoordAndSpacing(1); | ||
1017 | arg.requireWrite(); | arg.requireWrite(); |
1018 | #pragma omp parallel for | #pragma omp parallel for |
1019 | for (dim_t i1 = 0; i1 < m_N1; i1++) { | for (dim_t i1 = 0; i1 < m_NN[1]; i1++) { |
1020 | for (dim_t i0 = 0; i0 < m_N0; i0++) { | for (dim_t i0 = 0; i0 < m_NN[0]; i0++) { |
1021 | double* point = arg.getSampleDataRW(i0+m_N0*i1); | double* point = arg.getSampleDataRW(i0+m_NN[0]*i1); |
1022 | point[0] = xdx.first+i0*xdx.second; | point[0] = getLocalCoordinate(i0, 0); |
1023 | point[1] = ydy.first+i1*ydy.second; | point[1] = getLocalCoordinate(i1, 1); |
1024 | } | } |
1025 | } | } |
1026 | } | } |
1027 | ||
1028 | //protected | //protected |
1029 | void Rectangle::assembleGradient(escript::Data& out, escript::Data& in) const | void Rectangle::assembleGradient(escript::Data& out, const escript::Data& in) const |
1030 | { | { |
1031 | const dim_t numComp = in.getDataPointSize(); | const dim_t numComp = in.getDataPointSize(); |
1032 | const double h0 = m_l0/m_gNE0; | const double cx0 = .21132486540518711775/m_dx[0]; |
1033 | const double h1 = m_l1/m_gNE1; | const double cx1 = .78867513459481288225/m_dx[0]; |
1034 | const double cx0 = -1./h0; | const double cx2 = 1./m_dx[0]; |
1035 | const double cx1 = -.78867513459481288225/h0; | const double cy0 = .21132486540518711775/m_dx[1]; |
1036 | const double cx2 = -.5/h0; | const double cy1 = .78867513459481288225/m_dx[1]; |
1037 | const double cx3 = -.21132486540518711775/h0; | const double cy2 = 1./m_dx[1]; |
const double cx4 = .21132486540518711775/h0; | ||
const double cx5 = .5/h0; | ||
const double cx6 = .78867513459481288225/h0; | ||
const double cx7 = 1./h0; | ||
const double cy0 = -1./h1; | ||
const double cy1 = -.78867513459481288225/h1; | ||
const double cy2 = -.5/h1; | ||
const double cy3 = -.21132486540518711775/h1; | ||
const double cy4 = .21132486540518711775/h1; | ||
const double cy5 = .5/h1; | ||
const double cy6 = .78867513459481288225/h1; | ||
const double cy7 = 1./h1; | ||
1038 | ||
1039 | if (out.getFunctionSpace().getTypeCode() == Elements) { | if (out.getFunctionSpace().getTypeCode() == Elements) { |
1040 | out.requireWrite(); | out.requireWrite(); |
1041 | #pragma omp parallel for | #pragma omp parallel |
1042 | for (index_t k1=0; k1 < m_NE1; ++k1) { | { |
1043 | for (index_t k0=0; k0 < m_NE0; ++k0) { | vector<double> f_00(numComp); |
1044 | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); | vector<double> f_01(numComp); |
1045 | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); | vector<double> f_10(numComp); |
1046 | const double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); | vector<double> f_11(numComp); |
1047 | const double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); | #pragma omp for |
1048 | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1049 | for (index_t i=0; i < numComp; ++i) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1050 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double)); |
1051 | o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double)); |
1052 | o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double)); |
1053 | o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1054 | o[INDEX3(i,0,2,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0])); |
1055 | o[INDEX3(i,1,2,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; | for (index_t i=0; i < numComp; ++i) { |
1056 | o[INDEX3(i,0,3,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; | o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0; |
1057 | o[INDEX3(i,1,3,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; | o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0; |
1058 | } /* end of component loop i */ | o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0; |
1059 | } /* end of k0 loop */ | o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1; |
1060 | } /* end of k1 loop */ | o[INDEX3(i,0,2,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1; |
1061 | o[INDEX3(i,1,2,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0; | |
1062 | o[INDEX3(i,0,3,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1; | |
1063 | o[INDEX3(i,1,3,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1; | |
1064 | } // end of component loop i | |
1065 | } // end of k0 loop | |
1066 | } // end of k1 loop | |
1067 | } // end of parallel section | |
1068 | } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) { | } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) { |
1069 | out.requireWrite(); | out.requireWrite(); |
1070 | #pragma omp parallel for | #pragma omp parallel |
1071 | for (index_t k1=0; k1 < m_NE1; ++k1) { | { |
1072 | for (index_t k0=0; k0 < m_NE0; ++k0) { | vector<double> f_00(numComp); |
1073 | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); | vector<double> f_01(numComp); |
1074 | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); | vector<double> f_10(numComp); |
1075 | const double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); | vector<double> f_11(numComp); |
1076 | const double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); | #pragma omp for |
1077 | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1078 | for (index_t i=0; i < numComp; ++i) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1079 | o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double)); |
1080 | o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double)); |
1081 | } /* end of component loop i */ | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double)); |
1082 | } /* end of k0 loop */ | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1083 | } /* end of k1 loop */ | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0])); |
1084 | for (index_t i=0; i < numComp; ++i) { | |
1085 | o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2; | |
1086 | o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2; | |
1087 | } // end of component loop i | |
1088 | } // end of k0 loop | |
1089 | } // end of k1 loop | |
1090 | } // end of parallel section | |
1091 | } else if (out.getFunctionSpace().getTypeCode() == FaceElements) { | } else if (out.getFunctionSpace().getTypeCode() == FaceElements) { |
1092 | out.requireWrite(); | out.requireWrite(); |
1093 | #pragma omp parallel | #pragma omp parallel |
1094 | { | { |
1095 | vector<double> f_00(numComp); | |
1096 | vector<double> f_01(numComp); | |
1097 | vector<double> f_10(numComp); | |
1098 | vector<double> f_11(numComp); | |
1099 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
1100 | #pragma omp for nowait | #pragma omp for nowait |
1101 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1102 | const double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double)); |
1103 | const double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double)); |
1104 | const double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double)); |
1105 | const double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1106 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
1107 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1108 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; | o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0; |
1109 | o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7; | o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2; |
1110 | o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; | o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1; |
1111 | o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7; | o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy2; |
1112 | } /* end of component loop i */ | } // end of component loop i |
1113 | } /* end of k1 loop */ | } // end of k1 loop |
1114 | } /* end of face 0 */ | } // end of face 0 |
1115 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
1116 | #pragma omp for nowait | #pragma omp for nowait |
1117 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1118 | const double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double)); |
1119 | const double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double)); |
1120 | const double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double)); |
1121 | const double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1122 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
1123 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1124 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; | o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0; |
1125 | o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7; | o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2; |
1126 | o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; | o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1; |
1127 | o[INDEX3(i,1,1,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7; | o[INDEX3(i,1,1,numComp,2)] = (f_11[i]-f_10[i])*cy2; |
1128 | } /* end of component loop i */ | } // end of component loop i |
1129 | } /* end of k1 loop */ | } // end of k1 loop |
1130 | } /* end of face 1 */ | } // end of face 1 |
1131 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
1132 | #pragma omp for nowait | #pragma omp for nowait |
1133 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1134 | const double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double)); |
1135 | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double)); |
1136 | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double)); |
1137 | const double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double)); |
1138 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
1139 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1140 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; | o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2; |
1141 | o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; | o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0; |
1142 | o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; | o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx2; |
1143 | o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; | o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1; |
1144 | } /* end of component loop i */ | } // end of component loop i |
1145 | } /* end of k0 loop */ | } // end of k0 loop |
1146 | } /* end of face 2 */ | } // end of face 2 |
1147 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
1148 | #pragma omp for nowait | #pragma omp for nowait |
1149 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1150 | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double)); |
1151 | const double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
1152 | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double)); |
1153 | const double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
1154 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
1155 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1156 | o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; | o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2; |
1157 | o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; | o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0; |
1158 | o[INDEX3(i,0,1,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; | o[INDEX3(i,0,1,numComp,2)] = (f_11[i]-f_01[i])*cx2; |
1159 | o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; | o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1; |
1160 | } /* end of component loop i */ | } // end of component loop i |
1161 | } /* end of k0 loop */ | } // end of k0 loop |
1162 | } /* end of face 3 */ | } // end of face 3 |
1163 | } // end of parallel section | } // end of parallel section |
1164 | ||
1165 | } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { | } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { |
1166 | out.requireWrite(); | out.requireWrite(); |
1167 | #pragma omp parallel | #pragma omp parallel |
1168 | { | { |
1169 | vector<double> f_00(numComp); | |
1170 | vector<double> f_01(numComp); | |
1171 | vector<double> f_10(numComp); | |
1172 | vector<double> f_11(numComp); | |
1173 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
1174 | #pragma omp for nowait | #pragma omp for nowait |
1175 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1176 | const double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double)); |
1177 | const double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double)); |
1178 | const double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double)); |
1179 | const double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1180 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
1181 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1182 | o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); | o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2; |
1183 | o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7; | o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2; |
1184 | } /* end of component loop i */ | } // end of component loop i |
1185 | } /* end of k1 loop */ | } // end of k1 loop |
1186 | } /* end of face 0 */ | } // end of face 0 |
1187 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
1188 | #pragma omp for nowait | #pragma omp for nowait |
1189 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1190 | const double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double)); |
1191 | const double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double)); |
1192 | const double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double)); |
1193 | const double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1194 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
1195 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1196 | o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); | o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2; |
1197 | o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7; | o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2; |
1198 | } /* end of component loop i */ | } // end of component loop i |
1199 | } /* end of k1 loop */ | } // end of k1 loop |
1200 | } /* end of face 1 */ | } // end of face 1 |
1201 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
1202 | #pragma omp for nowait | #pragma omp for nowait |
1203 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1204 | const double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double)); |
1205 | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double)); |
1206 | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double)); |
1207 | const double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double)); |
1208 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
1209 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1210 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; | o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2; |
1211 | o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]); | o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2; |
1212 | } /* end of component loop i */ | } // end of component loop i |
1213 | } /* end of k0 loop */ | } // end of k0 loop |
1214 | } /* end of face 2 */ | } // end of face 2 |
1215 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
1216 | #pragma omp for nowait | #pragma omp for nowait |
1217 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1218 | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double)); |
1219 | const double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
1220 | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double)); |
1221 | const double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
1222 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
1223 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1224 | o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; | o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2; |
1225 | o[INDEX3(i,1,0,numComp,2)] = cy5*(f_01[i] + f_11[i]) + cy2*(f_00[i] + f_10[i]); | o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2; |
1226 | } /* end of component loop i */ | } // end of component loop i |
1227 | } /* end of k0 loop */ | } // end of k0 loop |
1228 | } /* end of face 3 */ | } // end of face 3 |
1229 | } // end of parallel section | } // end of parallel section |
1230 | } | } |
1231 | } | } |
1232 | ||
1233 | //protected | //protected |
1234 | void Rectangle::assembleIntegrate(vector<double>& integrals, escript::Data& arg) const | void Rectangle::assembleIntegrate(vector<double>& integrals, |
1235 | const escript::Data& arg) const | |
1236 | { | { |
1237 | const dim_t numComp = arg.getDataPointSize(); | const dim_t numComp = arg.getDataPointSize(); |
1238 | const double h0 = m_l0/m_gNE0; | const index_t left = (m_offset[0]==0 ? 0 : 1); |
1239 | const double h1 = m_l1/m_gNE1; | const index_t bottom = (m_offset[1]==0 ? 0 : 1); |
1240 | const index_t left = (m_offset0==0 ? 0 : 1); | const int fs=arg.getFunctionSpace().getTypeCode(); |
1241 | const index_t bottom = (m_offset1==0 ? 0 : 1); | if (fs == Elements && arg.actsExpanded()) { |
if (arg.getFunctionSpace().getTypeCode() == Elements) { | ||
const double w = h0*h1/4.; | ||
1242 | #pragma omp parallel | #pragma omp parallel |
1243 | { | { |
1244 | vector<double> int_local(numComp, 0); | vector<double> int_local(numComp, 0); |
1245 | const double w = m_dx[0]*m_dx[1]/4.; | |
1246 | #pragma omp for nowait | #pragma omp for nowait |
1247 | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) { |
1248 | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) { |
1249 | const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE0)); | const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0])); |
1250 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1251 | const double f0 = f[INDEX2(i,0,numComp)]; | const double f0 = f[INDEX2(i,0,numComp)]; |
1252 | const double f1 = f[INDEX2(i,1,numComp)]; | const double f1 = f[INDEX2(i,1,numComp)]; |
1253 | const double f2 = f[INDEX2(i,2,numComp)]; | const double f2 = f[INDEX2(i,2,numComp)]; |
1254 | const double f3 = f[INDEX2(i,3,numComp)]; | const double f3 = f[INDEX2(i,3,numComp)]; |
1255 | int_local[i]+=(f0+f1+f2+f3)*w; | int_local[i]+=(f0+f1+f2+f3)*w; |
1256 | } /* end of component loop i */ | } // end of component loop i |
1257 | } /* end of k0 loop */ | } // end of k0 loop |
1258 | } /* end of k1 loop */ | } // end of k1 loop |
1259 | #pragma omp critical | #pragma omp critical |
1260 | for (index_t i=0; i<numComp; i++) | for (index_t i=0; i<numComp; i++) |
1261 | integrals[i]+=int_local[i]; | integrals[i]+=int_local[i]; |
1262 | } // end of parallel section | } // end of parallel section |
1263 | } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) { | |
1264 | const double w = h0*h1; | } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) { |
1265 | const double w = m_dx[0]*m_dx[1]; | |
1266 | #pragma omp parallel | #pragma omp parallel |
1267 | { | { |
1268 | vector<double> int_local(numComp, 0); | vector<double> int_local(numComp, 0); |
1269 | #pragma omp for nowait | #pragma omp for nowait |
1270 | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) { |
1271 | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) { |
1272 | const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE0)); | const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0])); |
1273 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1274 | int_local[i]+=f[i]*w; | int_local[i]+=f[i]*w; |
1275 | } /* end of component loop i */ | } |
1276 | } /* end of k0 loop */ | } |
1277 | } /* end of k1 loop */ | } |
1278 | #pragma omp critical | #pragma omp critical |
1279 | for (index_t i=0; i<numComp; i++) | for (index_t i=0; i<numComp; i++) |
1280 | integrals[i]+=int_local[i]; | integrals[i]+=int_local[i]; |
1281 | } // end of parallel section | } // end of parallel section |
1282 | } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) { | |
1283 | const double w0 = h0/2.; | } else if (fs == FaceElements && arg.actsExpanded()) { |
const double w1 = h1/2.; | ||
1284 | #pragma omp parallel | #pragma omp parallel |
1285 | { | { |
1286 | vector<double> int_local(numComp, 0); | vector<double> int_local(numComp, 0); |
1287 | const double w0 = m_dx[0]/2.; | |
1288 | const double w1 = m_dx[1]/2.; | |
1289 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
1290 | #pragma omp for nowait | #pragma omp for nowait |
1291 | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) { |
1292 | const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1); | const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1); |
1293 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1294 | const double f0 = f[INDEX2(i,0,numComp)]; | const double f0 = f[INDEX2(i,0,numComp)]; |
1295 | const double f1 = f[INDEX2(i,1,numComp)]; | const double f1 = f[INDEX2(i,1,numComp)]; |
1296 | int_local[i]+=(f0+f1)*w1; | int_local[i]+=(f0+f1)*w1; |
1297 | } /* end of component loop i */ | } // end of component loop i |
1298 | } /* end of k1 loop */ | } // end of k1 loop |
1299 | } | } |
1300 | ||
1301 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
1302 | #pragma omp for nowait | #pragma omp for nowait |
1303 | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) { |
1304 | const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1); | const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1); |
1305 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1306 | const double f0 = f[INDEX2(i,0,numComp)]; | const double f0 = f[INDEX2(i,0,numComp)]; |
1307 | const double f1 = f[INDEX2(i,1,numComp)]; | const double f1 = f[INDEX2(i,1,numComp)]; |
1308 | int_local[i]+=(f0+f1)*w1; | int_local[i]+=(f0+f1)*w1; |
1309 | } /* end of component loop i */ | } // end of component loop i |
1310 | } /* end of k1 loop */ | } // end of k1 loop |
1311 | } | } |
1312 | ||
1313 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
1314 | #pragma omp for nowait | #pragma omp for nowait |
1315 | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) { |
1316 | const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0); | const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0); |
1317 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1318 | const double f0 = f[INDEX2(i,0,numComp)]; | const double f0 = f[INDEX2(i,0,numComp)]; |
1319 | const double f1 = f[INDEX2(i,1,numComp)]; | const double f1 = f[INDEX2(i,1,numComp)]; |
1320 | int_local[i]+=(f0+f1)*w0; | int_local[i]+=(f0+f1)*w0; |
1321 | } /* end of component loop i */ | } // end of component loop i |
1322 | } /* end of k0 loop */ | } // end of k0 loop |
1323 | } | } |
1324 | ||
1325 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
1326 | #pragma omp for nowait | #pragma omp for nowait |
1327 | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) { |
1328 | const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0); | const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0); |
1329 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1330 | const double f0 = f[INDEX2(i,0,numComp)]; | const double f0 = f[INDEX2(i,0,numComp)]; |
1331 | const double f1 = f[INDEX2(i,1,numComp)]; | const double f1 = f[INDEX2(i,1,numComp)]; |
1332 | int_local[i]+=(f0+f1)*w0; | int_local[i]+=(f0+f1)*w0; |
1333 | } /* end of component loop i */ | } // end of component loop i |
1334 | } /* end of k0 loop */ | } // end of k0 loop |
1335 | } | } |
1336 | #pragma omp critical | #pragma omp critical |
1337 | for (index_t i=0; i<numComp; i++) | for (index_t i=0; i<numComp; i++) |
1338 | integrals[i]+=int_local[i]; | integrals[i]+=int_local[i]; |
1339 | } // end of parallel section | } // end of parallel section |
1340 | } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) { | |
1341 | } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded())) { | |
1342 | #pragma omp parallel | #pragma omp parallel |
1343 | { | { |
1344 | vector<double> int_local(numComp, 0); | vector<double> int_local(numComp, 0); |
1345 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
1346 | #pragma omp for nowait | #pragma omp for nowait |
1347 | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) { |
1348 | const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1); | const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1); |
1349 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1350 | int_local[i]+=f[i]*h1; | int_local[i]+=f[i]*m_dx[1]; |
1351 | } /* end of component loop i */ | } |
1352 | } /* end of k1 loop */ | } |
1353 | } | } |
1354 | ||
1355 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
1356 | #pragma omp for nowait | #pragma omp for nowait |
1357 | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) { |
1358 | const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1); | const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1); |
1359 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1360 | int_local[i]+=f[i]*h1; | int_local[i]+=f[i]*m_dx[1]; |
1361 | } /* end of component loop i */ | } |
1362 | } /* end of k1 loop */ | } |
1363 | } | } |
1364 | ||
1365 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
1366 | #pragma omp for nowait | #pragma omp for nowait |
1367 | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) { |
1368 | const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0); | const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0); |
1369 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1370 | int_local[i]+=f[i]*h0; | int_local[i]+=f[i]*m_dx[0]; |
1371 | } /* end of component loop i */ | } |
1372 | } /* end of k0 loop */ | } |
1373 | } | } |
1374 | ||
1375 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
1376 | #pragma omp for nowait | #pragma omp for nowait |
1377 | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) { |
1378 | const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0); | const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0); |
1379 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1380 | int_local[i]+=f[i]*h0; | int_local[i]+=f[i]*m_dx[0]; |
1381 | } /* end of component loop i */ | } |
1382 | } /* end of k0 loop */ | } |
1383 | } | } |
1384 | ||
1385 | #pragma omp critical | #pragma omp critical |
1386 | for (index_t i=0; i<numComp; i++) | for (index_t i=0; i<numComp; i++) |
1387 | integrals[i]+=int_local[i]; | integrals[i]+=int_local[i]; |
1388 | } // end of parallel section | } // end of parallel section |
1389 | } | } // function space selector |
1390 | } | } |
1391 | ||
1392 | //protected | //protected |
1393 | dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const | dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const |
1394 | { | { |
1395 | const dim_t nDOF0 = (m_gNE0+1)/m_NX; | const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0]; |
1396 | const dim_t nDOF1 = (m_gNE1+1)/m_NY; | const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1]; |
1397 | const int x=node%nDOF0; | const int x=node%nDOF0; |
1398 | const int y=node/nDOF0; | const int y=node/nDOF0; |
1399 | dim_t num=0; | dim_t num=0; |
# | Line 989 dim_t Rectangle::insertNeighbourNodes(In | Line 1418 dim_t Rectangle::insertNeighbourNodes(In |
1418 | } | } |
1419 | ||
1420 | //protected | //protected |
1421 | void Rectangle::nodesToDOF(escript::Data& out, escript::Data& in) const | void Rectangle::nodesToDOF(escript::Data& out, const escript::Data& in) const |
1422 | { | { |
1423 | const dim_t numComp = in.getDataPointSize(); | const dim_t numComp = in.getDataPointSize(); |
1424 | out.requireWrite(); | out.requireWrite(); |
1425 | ||
1426 | const index_t left = (m_offset0==0 ? 0 : 1); | const index_t left = (m_offset[0]==0 ? 0 : 1); |
1427 | const index_t bottom = (m_offset1==0 ? 0 : 1); | const index_t bottom = (m_offset[1]==0 ? 0 : 1); |
1428 | const dim_t nDOF0 = (m_gNE0+1)/m_NX; | const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0]; |
1429 | const dim_t nDOF1 = (m_gNE1+1)/m_NY; | const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1]; |
1430 | #pragma omp parallel for | #pragma omp parallel for |
1431 | for (index_t i=0; i<nDOF1; i++) { | for (index_t i=0; i<nDOF1; i++) { |
1432 | for (index_t j=0; j<nDOF0; j++) { | for (index_t j=0; j<nDOF0; j++) { |
1433 | const index_t n=j+left+(i+bottom)*m_N0; | const index_t n=j+left+(i+bottom)*m_NN[0]; |
1434 | const double* src=in.getSampleDataRO(n); | const double* src=in.getSampleDataRO(n); |
1435 | copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0)); | copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0)); |
1436 | } | } |
# | Line 1009 void Rectangle::nodesToDOF(escript::Data | Line 1438 void Rectangle::nodesToDOF(escript::Data |
1438 | } | } |
1439 | ||
1440 | //protected | //protected |
1441 | void Rectangle::dofToNodes(escript::Data& out, escript::Data& in) const | void Rectangle::dofToNodes(escript::Data& out, const escript::Data& in) const |
1442 | { | { |
1443 | const dim_t numComp = in.getDataPointSize(); | const dim_t numComp = in.getDataPointSize(); |
1444 | Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp); | Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp); |
1445 | in.requireWrite(); | // expand data object if necessary to be able to grab the whole data |
1446 | Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0)); | const_cast<escript::Data*>(&in)->expand(); |
1447 | Paso_Coupler_startCollect(coupler, in.getSampleDataRO(0)); | |
1448 | ||
1449 | const dim_t numDOF = getNumDOF(); | const dim_t numDOF = getNumDOF(); |
1450 | out.requireWrite(); | out.requireWrite(); |
# | Line 1027 void Rectangle::dofToNodes(escript::Data | Line 1457 void Rectangle::dofToNodes(escript::Data |
1457 | : &buffer[(m_dofMap[i]-numDOF)*numComp]); | : &buffer[(m_dofMap[i]-numDOF)*numComp]); |
1458 | copy(src, src+numComp, out.getSampleDataRW(i)); | copy(src, src+numComp, out.getSampleDataRW(i)); |
1459 | } | } |
1460 | Paso_Coupler_free(coupler); | |
1461 | } | } |
1462 | ||
1463 | //private | //private |
1464 | void Rectangle::populateSampleIds() | void Rectangle::populateSampleIds() |
1465 | { | { |
1466 | // identifiers are ordered from left to right, bottom to top globablly. | // degrees of freedom are numbered from left to right, bottom to top in |
1467 | // each rank, continuing on the next rank (ranks also go left-right, | |
1468 | // bottom-top). | |
1469 | // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which | |
1470 | // helps when writing out data rank after rank. | |
1471 | ||
1472 | // build node distribution vector first. | // build node distribution vector first. |
1473 | // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes | // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is |
1474 | // constant for all ranks in this implementation | |
1475 | m_nodeDistribution.assign(m_mpiInfo->size+1, 0); | m_nodeDistribution.assign(m_mpiInfo->size+1, 0); |
1476 | const dim_t numDOF=getNumDOF(); | const dim_t numDOF=getNumDOF(); |
1477 | for (dim_t k=1; k<m_mpiInfo->size; k++) { | for (dim_t k=1; k<m_mpiInfo->size; k++) { |
# | Line 1045 void Rectangle::populateSampleIds() | Line 1481 void Rectangle::populateSampleIds() |
1481 | m_nodeId.resize(getNumNodes()); | m_nodeId.resize(getNumNodes()); |
1482 | m_dofId.resize(numDOF); | m_dofId.resize(numDOF); |
1483 | m_elementId.resize(getNumElements()); | m_elementId.resize(getNumElements()); |
1484 | ||
1485 | // populate face element counts | |
1486 | //left | |
1487 | if (m_offset[0]==0) | |
1488 | m_faceCount[0]=m_NE[1]; | |
1489 | else | |
1490 | m_faceCount[0]=0; | |
1491 | //right | |
1492 | if (m_mpiInfo->rank%m_NX[0]==m_NX[0]-1) | |
1493 | m_faceCount[1]=m_NE[1]; | |
1494 | else | |
1495 | m_faceCount[1]=0; | |
1496 | //bottom | |
1497 | if (m_offset[1]==0) | |
1498 | m_faceCount[2]=m_NE[0]; | |
1499 | else | |
1500 | m_faceCount[2]=0; | |
1501 | //top | |
1502 | if (m_mpiInfo->rank/m_NX[0]==m_NX[1]-1) | |
1503 | m_faceCount[3]=m_NE[0]; | |
1504 | else | |
1505 | m_faceCount[3]=0; | |
1506 | ||
1507 | m_faceId.resize(getNumFaceElements()); | m_faceId.resize(getNumFaceElements()); |
1508 | ||
1509 | const index_t left = (m_offset[0]==0 ? 0 : 1); | |
1510 | const index_t bottom = (m_offset[1]==0 ? 0 : 1); | |
1511 | const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0]; | |
1512 | const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1]; | |
1513 | ||
1514 | #define globalNodeId(x,y) \ | |
1515 | ((m_offset[0]+x)/nDOF0)*nDOF0*nDOF1+(m_offset[0]+x)%nDOF0 \ | |
1516 | + ((m_offset[1]+y)/nDOF1)*nDOF0*nDOF1*m_NX[0]+((m_offset[1]+y)%nDOF1)*nDOF0 | |
1517 | ||
1518 | // set corner id's outside the parallel region | |
1519 | m_nodeId[0] = globalNodeId(0, 0); | |
1520 | m_nodeId[m_NN[0]-1] = globalNodeId(m_NN[0]-1, 0); | |
1521 | m_nodeId[m_NN[0]*(m_NN[1]-1)] = globalNodeId(0, m_NN[1]-1); | |
1522 | m_nodeId[m_NN[0]*m_NN[1]-1] = globalNodeId(m_NN[0]-1,m_NN[1]-1); | |
1523 | #undef globalNodeId | |
1524 | ||
1525 | #pragma omp parallel | #pragma omp parallel |
1526 | { | { |
1527 | // nodes | // populate degrees of freedom and own nodes (identical id) |
1528 | #pragma omp for nowait | #pragma omp for nowait |
1529 | for (dim_t i1=0; i1<m_N1; i1++) { | for (dim_t i=0; i<nDOF1; i++) { |
1530 | for (dim_t i0=0; i0<m_N0; i0++) { | for (dim_t j=0; j<nDOF0; j++) { |
1531 | m_nodeId[i0+i1*m_N0] = (m_offset1+i1)*(m_gNE0+1)+m_offset0+i0; | const index_t nodeIdx=j+left+(i+bottom)*m_NN[0]; |
1532 | const index_t dofIdx=j+i*nDOF0; | |
1533 | m_dofId[dofIdx] = m_nodeId[nodeIdx] | |
1534 | = m_nodeDistribution[m_mpiInfo->rank]+dofIdx; | |
1535 | } | } |
1536 | } | } |
1537 | ||
1538 | // degrees of freedom | // populate the rest of the nodes (shared with other ranks) |
1539 | if (m_faceCount[0]==0) { // left column | |
1540 | #pragma omp for nowait | |
1541 | for (dim_t i=0; i<nDOF1; i++) { | |
1542 | const index_t nodeIdx=(i+bottom)*m_NN[0]; | |
1543 | const index_t dofId=(i+1)*nDOF0-1; | |
1544 | m_nodeId[nodeIdx] | |
1545 | = m_nodeDistribution[m_mpiInfo->rank-1]+dofId; | |
1546 | } | |
1547 | } | |
1548 | if (m_faceCount[1]==0) { // right column | |
1549 | #pragma omp for nowait | #pragma omp for nowait |
1550 | for (dim_t k=0; k<numDOF; k++) | for (dim_t i=0; i<nDOF1; i++) { |
1551 | m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k; | const index_t nodeIdx=(i+bottom+1)*m_NN[0]-1; |
1552 | const index_t dofId=i*nDOF0; | |
1553 | m_nodeId[nodeIdx] | |
1554 | = m_nodeDistribution[m_mpiInfo->rank+1]+dofId; | |
1555 | } | |
1556 | } | |
1557 | if (m_faceCount[2]==0) { // bottom row | |
1558 | #pragma omp for nowait | |
1559 | for (dim_t i=0; i<nDOF0; i++) { | |
1560 | const index_t nodeIdx=i+left; | |
1561 | const index_t dofId=nDOF0*(nDOF1-1)+i; | |
1562 | m_nodeId[nodeIdx] | |
1563 | = m_nodeDistribution[m_mpiInfo->rank-m_NX[0]]+dofId; | |
1564 | } | |
1565 | } | |
1566 | if (m_faceCount[3]==0) { // top row | |
1567 | #pragma omp for nowait | |
1568 | for (dim_t i=0; i<nDOF0; i++) { | |
1569 | const index_t nodeIdx=m_NN[0]*(m_NN[1]-1)+i+left; | |
1570 | const index_t dofId=i; | |
1571 | m_nodeId[nodeIdx] | |
1572 | = m_nodeDistribution[m_mpiInfo->rank+m_NX[0]]+dofId; | |
1573 | } | |
1574 | } | |
1575 | ||
1576 | // elements | // populate element id's |
1577 | #pragma omp for nowait | #pragma omp for nowait |
1578 | for (dim_t i1=0; i1<m_NE1; i1++) { | for (dim_t i1=0; i1<m_NE[1]; i1++) { |
1579 | for (dim_t i0=0; i0<m_NE0; i0++) { | for (dim_t i0=0; i0<m_NE[0]; i0++) { |
1580 | m_elementId[i0+i1*m_NE0]=(m_offset1+i1)*m_gNE0+m_offset0+i0; | m_elementId[i0+i1*m_NE[0]]=(m_offset[1]+i1)*m_gNE[0]+m_offset[0]+i0; |
1581 | } | } |
1582 | } | } |
1583 | ||
# | Line 1083 void Rectangle::populateSampleIds() | Line 1594 void Rectangle::populateSampleIds() |
1594 | updateTagsInUse(Elements); | updateTagsInUse(Elements); |
1595 | ||
1596 | // generate face offset vector and set face tags | // generate face offset vector and set face tags |
const IndexVector facesPerEdge = getNumFacesPerBoundary(); | ||
1597 | const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20; | const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20; |
1598 | const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP }; | const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP }; |
1599 | m_faceOffset.assign(facesPerEdge.size(), -1); | m_faceOffset.assign(4, -1); |
1600 | m_faceTags.clear(); | m_faceTags.clear(); |
1601 | index_t offset=0; | index_t offset=0; |
1602 | for (size_t i=0; i<facesPerEdge.size(); i++) { | for (size_t i=0; i<4; i++) { |
1603 | if (facesPerEdge[i]>0) { | if (m_faceCount[i]>0) { |
1604 | m_faceOffset[i]=offset; | m_faceOffset[i]=offset; |
1605 | offset+=facesPerEdge[i]; | offset+=m_faceCount[i]; |
1606 | m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]); | m_faceTags.insert(m_faceTags.end(), m_faceCount[i], faceTag[i]); |
1607 | } | } |
1608 | } | } |
1609 | setTagMap("left", LEFT); | setTagMap("left", LEFT); |
# | Line 1106 void Rectangle::populateSampleIds() | Line 1616 void Rectangle::populateSampleIds() |
1616 | //private | //private |
1617 | void Rectangle::createPattern() | void Rectangle::createPattern() |
1618 | { | { |
1619 | const dim_t nDOF0 = (m_gNE0+1)/m_NX; | const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0]; |
1620 | const dim_t nDOF1 = (m_gNE1+1)/m_NY; | const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1]; |
1621 | const index_t left = (m_offset0==0 ? 0 : 1); | const index_t left = (m_offset[0]==0 ? 0 : 1); |
1622 | const index_t bottom = (m_offset1==0 ? 0 : 1); | const index_t bottom = (m_offset[1]==0 ? 0 : 1); |
1623 | ||
1624 | // populate node->DOF mapping with own degrees of freedom. | // populate node->DOF mapping with own degrees of freedom. |
1625 | // The rest is assigned in the loop further down | // The rest is assigned in the loop further down |
# | Line 1117 void Rectangle::createPattern() | Line 1627 void Rectangle::createPattern() |
1627 | #pragma omp parallel for | #pragma omp parallel for |
1628 | for (index_t i=bottom; i<bottom+nDOF1; i++) { | for (index_t i=bottom; i<bottom+nDOF1; i++) { |
1629 | for (index_t j=left; j<left+nDOF0; j++) { | for (index_t j=left; j<left+nDOF0; j++) { |
1630 | m_dofMap[i*m_N0+j]=(i-bottom)*nDOF0+j-left; | m_dofMap[i*m_NN[0]+j]=(i-bottom)*nDOF0+j-left; |
1631 | } | } |
1632 | } | } |
1633 | ||
# | Line 1129 void Rectangle::createPattern() | Line 1639 void Rectangle::createPattern() |
1639 | RankVector neighbour; | RankVector neighbour; |
1640 | IndexVector offsetInShared(1,0); | IndexVector offsetInShared(1,0); |
1641 | IndexVector sendShared, recvShared; | IndexVector sendShared, recvShared; |
1642 | int numShared=0; | int numShared=0, expectedShared=0; |
1643 | const int x=m_mpiInfo->rank%m_NX; | const int x=m_mpiInfo->rank%m_NX[0]; |
1644 | const int y=m_mpiInfo->rank/m_NX; | const int y=m_mpiInfo->rank/m_NX[0]; |
1645 | if (x > 0) | |
1646 | expectedShared += nDOF1; | |
1647 | if (x < m_NX[0] - 1) | |
1648 | expectedShared += nDOF1; | |
1649 | if (y > 0) | |
1650 | expectedShared += nDOF0; | |
1651 | if (y < m_NX[1] - 1) | |
1652 | expectedShared += nDOF0; | |
1653 | if (x > 0 && y > 0) expectedShared++; | |
1654 | if (x > 0 && y < m_NX[1] - 1) expectedShared++; | |
1655 | if (x < m_NX[0] - 1 && y > 0) expectedShared++; | |
1656 | if (x < m_NX[0] - 1 && y < m_NX[1] - 1) expectedShared++; | |
1657 | ||
1658 | vector<IndexVector> rowIndices(expectedShared); | |
1659 | ||
1660 | for (int i1=-1; i1<2; i1++) { | for (int i1=-1; i1<2; i1++) { |
1661 | for (int i0=-1; i0<2; i0++) { | for (int i0=-1; i0<2; i0++) { |
1662 | // skip this rank | // skip this rank |
# | Line 1140 void Rectangle::createPattern() | Line 1665 void Rectangle::createPattern() |
1665 | // location of neighbour rank | // location of neighbour rank |
1666 | const int nx=x+i0; | const int nx=x+i0; |
1667 | const int ny=y+i1; | const int ny=y+i1; |
1668 | if (nx>=0 && ny>=0 && nx<m_NX && ny<m_NY) { | if (nx>=0 && ny>=0 && nx<m_NX[0] && ny<m_NX[1]) { |
1669 | neighbour.push_back(ny*m_NX+nx); | neighbour.push_back(ny*m_NX[0]+nx); |
1670 | if (i0==0) { | if (i0==0) { |
1671 | // sharing top or bottom edge | // sharing top or bottom edge |
1672 | const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0); | const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0); |
1673 | const int firstNode=(i1==-1 ? left : m_N0*(m_N1-1)+left); | const int firstNode=(i1==-1 ? left : m_NN[0]*(m_NN[1]-1)+left); |
1674 | offsetInShared.push_back(offsetInShared.back()+nDOF0); | offsetInShared.push_back(offsetInShared.back()+nDOF0); |
1675 | for (dim_t i=0; i<nDOF0; i++, numShared++) { | for (dim_t i=0; i<nDOF0; i++, numShared++) { |
1676 | sendShared.push_back(firstDOF+i); | sendShared.push_back(firstDOF+i); |
1677 | recvShared.push_back(numDOF+numShared); | recvShared.push_back(numDOF+numShared); |
1678 | if (i>0) | if (i>0) |
1679 | colIndices[firstDOF+i-1].push_back(numShared); | doublyLink(colIndices, rowIndices, firstDOF+i-1, numShared); |
1680 | colIndices[firstDOF+i].push_back(numShared); | doublyLink(colIndices, rowIndices, firstDOF+i, numShared); |
1681 | if (i<nDOF0-1) | if (i<nDOF0-1) |
1682 | colIndices[firstDOF+i+1].push_back(numShared); | doublyLink(colIndices, rowIndices, firstDOF+i+1, numShared); |
1683 | m_dofMap[firstNode+i]=numDOF+numShared; | m_dofMap[firstNode+i]=numDOF+numShared; |
1684 | } | } |
1685 | } else if (i1==0) { | } else if (i1==0) { |
1686 | // sharing left or right edge | // sharing left or right edge |
1687 | const int firstDOF=(i0==-1 ? 0 : nDOF0-1); | const int firstDOF=(i0==-1 ? 0 : nDOF0-1); |
1688 | const int firstNode=(i0==-1 ? bottom*m_N0 : (bottom+1)*m_N0-1); | const int firstNode=(i0==-1 ? bottom*m_NN[0] : (bottom+1)*m_NN[0]-1); |
1689 | offsetInShared.push_back(offsetInShared.back()+nDOF1); | offsetInShared.push_back(offsetInShared.back()+nDOF1); |
1690 | for (dim_t i=0; i<nDOF1; i++, numShared++) { | for (dim_t i=0; i<nDOF1; i++, numShared++) { |
1691 | sendShared.push_back(firstDOF+i*nDOF0); | sendShared.push_back(firstDOF+i*nDOF0); |
1692 | recvShared.push_back(numDOF+numShared); | recvShared.push_back(numDOF+numShared); |
1693 | if (i>0) | if (i>0) |
1694 | colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared); | doublyLink(colIndices, rowIndices, firstDOF+(i-1)*nDOF0, numShared); |
1695 | colIndices[firstDOF+i*nDOF0].push_back(numShared); | doublyLink(colIndices, rowIndices, firstDOF+i*nDOF0, numShared); |
1696 | if (i<nDOF1-1) | if (i<nDOF1-1) |
1697 | colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared); | doublyLink(colIndices, rowIndices, firstDOF+(i+1)*nDOF0, numShared); |
1698 | m_dofMap[firstNode+i*m_N0]=numDOF+numShared; | m_dofMap[firstNode+i*m_NN[0]]=numDOF+numShared; |
1699 | } | } |
1700 | } else { | } else { |
1701 | // sharing a node | // sharing a node |
1702 | const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0); | const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0); |
1703 | const int node=(i0+1)/2*(m_N0-1)+(i1+1)/2*m_N0*(m_N1-1); | const int node=(i0+1)/2*(m_NN[0]-1)+(i1+1)/2*m_NN[0]*(m_NN[1]-1); |
1704 | offsetInShared.push_back(offsetInShared.back()+1); | offsetInShared.push_back(offsetInShared.back()+1); |
1705 | sendShared.push_back(dof); | sendShared.push_back(dof); |
1706 | recvShared.push_back(numDOF+numShared); | recvShared.push_back(numDOF+numShared); |
1707 | colIndices[dof].push_back(numShared); | doublyLink(colIndices, rowIndices, dof, numShared); |
1708 | m_dofMap[node]=numDOF+numShared; | m_dofMap[node]=numDOF+numShared; |
1709 | ++numShared; | ++numShared; |
1710 | } | } |
1711 | } | } |
1712 | } | } |
1713 | } | } |
1714 | ||
1715 | #pragma omp parallel for | |
1716 | for (int i = 0; i < numShared; i++) { | |
1717 | std::sort(rowIndices[i].begin(), rowIndices[i].end()); | |
1718 | } | |
1719 | ||
1720 | // create connector | // create connector |
1721 | Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc( | Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc( |
1722 | numDOF, neighbour.size(), &neighbour[0], &sendShared[0], | numDOF, neighbour.size(), &neighbour[0], &sendShared[0], |
# | Line 1201 void Rectangle::createPattern() | Line 1731 void Rectangle::createPattern() |
1731 | // create main and couple blocks | // create main and couple blocks |
1732 | Paso_Pattern *mainPattern = createMainPattern(); | Paso_Pattern *mainPattern = createMainPattern(); |
1733 | Paso_Pattern *colPattern, *rowPattern; | Paso_Pattern *colPattern, *rowPattern; |
1734 | createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern); | createCouplePatterns(colIndices, rowIndices, numShared, &colPattern, &rowPattern); |
1735 | ||
1736 | // allocate paso distribution | // allocate paso distribution |
1737 | Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo, | Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo, |
# | Line 1285 void Rectangle::addToMatrixAndRHS(Paso_S | Line 1815 void Rectangle::addToMatrixAndRHS(Paso_S |
1815 | IndexVector rowIndex; | IndexVector rowIndex; |
1816 | rowIndex.push_back(m_dofMap[firstNode]); | rowIndex.push_back(m_dofMap[firstNode]); |
1817 | rowIndex.push_back(m_dofMap[firstNode+1]); | rowIndex.push_back(m_dofMap[firstNode+1]); |
1818 | rowIndex.push_back(m_dofMap[firstNode+m_N0]); | rowIndex.push_back(m_dofMap[firstNode+m_NN[0]]); |
1819 | rowIndex.push_back(m_dofMap[firstNode+m_N0+1]); | rowIndex.push_back(m_dofMap[firstNode+m_NN[0]+1]); |
1820 | if (addF) { | if (addF) { |
1821 | double *F_p=F.getSampleDataRW(0); | double *F_p=F.getSampleDataRW(0); |
1822 | for (index_t i=0; i<rowIndex.size(); i++) { | for (index_t i=0; i<rowIndex.size(); i++) { |
# | Line 1304 void Rectangle::addToMatrixAndRHS(Paso_S | Line 1834 void Rectangle::addToMatrixAndRHS(Paso_S |
1834 | ||
1835 | //protected | //protected |
1836 | void Rectangle::interpolateNodesOnElements(escript::Data& out, | void Rectangle::interpolateNodesOnElements(escript::Data& out, |
1837 | escript::Data& in, bool reduced) const | const escript::Data& in, |
1838 | bool reduced) const | |
1839 | { | { |
1840 | const dim_t numComp = in.getDataPointSize(); | const dim_t numComp = in.getDataPointSize(); |
1841 | if (reduced) { | if (reduced) { |
1842 | out.requireWrite(); | out.requireWrite(); |
1843 | const double c0 = .25; | const double c0 = 0.25; |
1844 | #pragma omp parallel for | #pragma omp parallel |
1845 | for (index_t k1=0; k1 < m_NE1; ++k1) { | { |
1846 | for (index_t k0=0; k0 < m_NE0; ++k0) { | vector<double> f_00(numComp); |
1847 | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); | vector<double> f_01(numComp); |
1848 | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); | vector<double> f_10(numComp); |
1849 | const double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); | vector<double> f_11(numComp); |
1850 | const double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); | #pragma omp for |
1851 | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1852 | for (index_t i=0; i < numComp; ++i) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1853 | o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double)); |
1854 | } /* end of component loop i */ | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double)); |
1855 | } /* end of k0 loop */ | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double)); |
1856 | } /* end of k1 loop */ | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1857 | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0])); | |
1858 | for (index_t i=0; i < numComp; ++i) { | |
1859 | o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]); | |
1860 | } /* end of component loop i */ | |
1861 | } /* end of k0 loop */ | |
1862 | } /* end of k1 loop */ | |
1863 | } /* end of parallel section */ | |
1864 | } else { | } else { |
1865 | out.requireWrite(); | out.requireWrite(); |
1866 | const double c0 = .16666666666666666667; | const double c0 = 0.16666666666666666667; |
1867 | const double c1 = .044658198738520451079; | const double c1 = 0.044658198738520451079; |
1868 | const double c2 = .62200846792814621559; | const double c2 = 0.62200846792814621559; |
1869 | #pragma omp parallel for | #pragma omp parallel |
1870 | for (index_t k1=0; k1 < m_NE1; ++k1) { | { |
1871 | for (index_t k0=0; k0 < m_NE0; ++k0) { | vector<double> f_00(numComp); |
1872 | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); | vector<double> f_01(numComp); |
1873 | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); | vector<double> f_10(numComp); |
1874 | const double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); | vector<double> f_11(numComp); |
1875 | const double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); | #pragma omp for |
1876 | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1877 | for (index_t i=0; i < numComp; ++i) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1878 | o[INDEX2(i,numComp,0)] = f_00[i]*c2 + f_11[i]*c1 + c0*(f_01[i] + f_10[i]); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double)); |
1879 | o[INDEX2(i,numComp,1)] = f_01[i]*c1 + f_10[i]*c2 + c0*(f_00[i] + f_11[i]); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double)); |
1880 | o[INDEX2(i,numComp,2)] = f_01[i]*c2 + f_10[i]*c1 + c0*(f_00[i] + f_11[i]); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double)); |
1881 | o[INDEX2(i,numComp,3)] = f_00[i]*c1 + f_11[i]*c2 + c0*(f_01[i] + f_10[i]); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1882 | } /* end of component loop i */ | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0])); |
1883 | } /* end of k0 loop */ | for (index_t i=0; i < numComp; ++i) { |
1884 | } /* end of k1 loop */ | o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_10[i]) + c1*f_11[i] + c2*f_00[i]; |
1885 | o[INDEX2(i,numComp,1)] = c0*(f_00[i] + f_11[i]) + c1*f_01[i] + c2*f_10[i]; | |
1886 | o[INDEX2(i,numComp,2)] = c0*(f_00[i] + f_11[i]) + c1*f_10[i] + c2*f_01[i]; | |
1887 | o[INDEX2(i,numComp,3)] = c0*(f_01[i] + f_10[i]) + c1*f_00[i] + c2*f_11[i]; | |
1888 | } /* end of component loop i */ | |
1889 | } /* end of k0 loop */ | |
1890 | } /* end of k1 loop */ | |
1891 | } /* end of parallel section */ | |
1892 | } | } |
1893 | } | } |
1894 | ||
1895 | //protected | //protected |
1896 | void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in, | void Rectangle::interpolateNodesOnFaces(escript::Data& out, |
1897 | const escript::Data& in, | |
1898 | bool reduced) const | bool reduced) const |
1899 | { | { |
1900 | const dim_t numComp = in.getDataPointSize(); | const dim_t numComp = in.getDataPointSize(); |
1901 | if (reduced) { | if (reduced) { |
1902 | out.requireWrite(); | out.requireWrite(); |
const double c0 = .5; | ||
1903 | #pragma omp parallel | #pragma omp parallel |
1904 | { | { |
1905 | vector<double> f_00(numComp); | |
1906 | vector<double> f_01(numComp); | |
1907 | vector<double> f_10(numComp); | |
1908 | vector<double> f_11(numComp); | |
1909 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
1910 | #pragma omp for nowait | #pragma omp for nowait |
1911 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1912 | const double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double)); |
1913 | const double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double)); |
1914 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
1915 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1916 | o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i]); | o[INDEX2(i,numComp,0)] = (f_00[i] + f_01[i])/2; |
1917 | } /* end of component loop i */ | } /* end of component loop i */ |
1918 | } /* end of k1 loop */ | } /* end of k1 loop */ |
1919 | } /* end of face 0 */ | } /* end of face 0 */ |
1920 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
1921 | #pragma omp for nowait | #pragma omp for nowait |
1922 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1923 | const double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double)); |
1924 | const double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1925 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
1926 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1927 | o[INDEX2(i,numComp,0)] = c0*(f_10[i] + f_11[i]); | o[INDEX2(i,numComp,0)] = (f_10[i] + f_11[i])/2; |
1928 | } /* end of component loop i */ | } /* end of component loop i */ |
1929 | } /* end of k1 loop */ | } /* end of k1 loop */ |
1930 | } /* end of face 1 */ | } /* end of face 1 */ |
1931 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
1932 | #pragma omp for nowait | #pragma omp for nowait |
1933 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1934 | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double)); |
1935 | const double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double)); |
1936 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
1937 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1938 | o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_10[i]); | o[INDEX2(i,numComp,0)] = (f_00[i] + f_10[i])/2; |
1939 | } /* end of component loop i */ | } /* end of component loop i */ |
1940 | } /* end of k0 loop */ | } /* end of k0 loop */ |
1941 | } /* end of face 2 */ | } /* end of face 2 */ |
1942 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
1943 | #pragma omp for nowait | #pragma omp for nowait |
1944 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1945 | const double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
1946 | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
1947 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
1948 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1949 | o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_11[i]); | o[INDEX2(i,numComp,0)] = (f_01[i] + f_11[i])/2; |
1950 | } /* end of component loop i */ | } /* end of component loop i */ |
1951 | } /* end of k0 loop */ | } /* end of k0 loop */ |
1952 | } /* end of face 3 */ | } /* end of face 3 */ |
1953 | } // end of parallel section | } /* end of parallel section */ |
1954 | } else { | } else { |
1955 | out.requireWrite(); | out.requireWrite(); |
1956 | const double c0 = 0.21132486540518711775; | const double c0 = 0.21132486540518711775; |
1957 | const double c1 = 0.78867513459481288225; | const double c1 = 0.78867513459481288225; |
1958 | #pragma omp parallel | #pragma omp parallel |
1959 | { | { |
1960 | vector<double> f_00(numComp); | |
1961 | vector<double> f_01(numComp); | |
1962 | vector<double> f_10(numComp); | |
1963 | vector<double> f_11(numComp); | |
1964 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
1965 | #pragma omp for nowait | #pragma omp for nowait |
1966 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1967 | const double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double)); |
1968 | const double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double)); |
1969 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
1970 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1971 | o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_01[i]*c0; | o[INDEX2(i,numComp,0)] = c0*f_01[i] + c1*f_00[i]; |
1972 | o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_01[i]*c1; | o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_01[i]; |
1973 | } /* end of component loop i */ | } /* end of component loop i */ |
1974 | } /* end of k1 loop */ | } /* end of k1 loop */ |
1975 | } /* end of face 0 */ | } /* end of face 0 */ |
1976 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
1977 | #pragma omp for nowait | #pragma omp for nowait |
1978 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1979 | const double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double)); |
1980 | const double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1981 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
1982 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1983 | o[INDEX2(i,numComp,0)] = f_10[i]*c1 + f_11[i]*c0; | o[INDEX2(i,numComp,0)] = c1*f_10[i] + c0*f_11[i]; |
1984 | o[INDEX2(i,numComp,1)] = f_10[i]*c0 + f_11[i]*c1; | o[INDEX2(i,numComp,1)] = c1*f_11[i] + c0*f_10[i]; |
1985 | } /* end of component loop i */ | } /* end of component loop i */ |
1986 | } /* end of k1 loop */ | } /* end of k1 loop */ |
1987 | } /* end of face 1 */ | } /* end of face 1 */ |
1988 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
1989 | #pragma omp for nowait | #pragma omp for nowait |
1990 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1991 | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double)); |
1992 | const double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double)); |
1993 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
1994 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1995 | o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_10[i]*c0; | o[INDEX2(i,numComp,0)] = c0*f_10[i] + c1*f_00[i]; |
1996 | o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_10[i]*c1; | o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_10[i]; |
1997 | } /* end of component loop i */ | } /* end of component loop i */ |
1998 | } /* end of k0 loop */ | } /* end of k0 loop */ |
1999 | } /* end of face 2 */ | } /* end of face 2 */ |
2000 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
2001 | #pragma omp for nowait | #pragma omp for nowait |
2002 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
2003 | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
2004 | const double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
2005 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
2006 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
2007 | o[INDEX2(i,numComp,0)] = f_01[i]*c1 + f_11[i]*c0; | o[INDEX2(i,numComp,0)] = c0*f_11[i] + c1*f_01[i]; |
2008 | o[INDEX2(i,numComp,1)] = f_01[i]*c0 + f_11[i]*c1; | o[INDEX2(i,numComp,1)] = c0*f_01[i] + c1*f_11[i]; |
2009 | } /* end of component loop i */ | } /* end of component loop i */ |
2010 | } /* end of k0 loop */ | } /* end of k0 loop */ |
2011 | } /* end of face 3 */ | } /* end of face 3 */ |
2012 | } // end of parallel section | } /* end of parallel section */ |
2013 | } | } |
2014 | } | } |
2015 | ||
//protected | ||
void Rectangle::assemblePDESingle(Paso_SystemMatrix* mat, | ||
escript::Data& rhs, const escript::Data& A, const escript::Data& B, | ||
const escript::Data& C, const escript::Data& D, | ||
const escript::Data& X, const escript::Data& Y) const | ||
{ | ||
const double h0 = m_l0/m_gNE0; | ||
const double h1 = m_l1/m_gNE1; | ||
const double w0 = -0.1555021169820365539*h1/h0; | ||
const double w1 = 0.041666666666666666667; | ||
const double w2 = -0.15550211698203655390; | ||
const double w3 = 0.041666666666666666667*h0/h1; | ||
const double w4 = 0.15550211698203655390; | ||
const double w5 = -0.041666666666666666667; | ||
const double w6 = -0.01116454968463011277*h1/h0; | ||
const double w7 = 0.011164549684630112770; | ||
const double w8 = -0.011164549684630112770; | ||
const double w9 = -0.041666666666666666667*h1/h0; | ||
const double w10 = -0.041666666666666666667*h0/h1; | ||
const double w11 = 0.1555021169820365539*h1/h0; | ||
const double w12 = 0.1555021169820365539*h0/h1; | ||
const double w13 = 0.01116454968463011277*h0/h1; | ||
const double w14 = 0.01116454968463011277*h1/h0; | ||
const double w15 = 0.041666666666666666667*h1/h0; | ||
const double w16 = -0.01116454968463011277*h0/h1; | ||
const double w17 = -0.1555021169820365539*h0/h1; | ||
const double w18 = -0.33333333333333333333*h1/h0; | ||
const double w19 = 0.25; | ||
const double w20 = -0.25; | ||
const double w21 = 0.16666666666666666667*h0/h1; | ||
const double w22 = -0.16666666666666666667*h1/h0; | ||
const double w23 = -0.16666666666666666667*h0/h1; | ||
const double w24 = 0.33333333333333333333*h1/h0; | ||
const double w25 = 0.33333333333333333333*h0/h1; | ||
const double w26 = 0.16666666666666666667*h1/h0; | ||
const double w27 = -0.33333333333333333333*h0/h1; | ||
const double w28 = -0.032861463941450536761*h1; | ||
const double w29 = -0.032861463941450536761*h0; | ||
const double w30 = -0.12264065304058601714*h1; | ||
const double w31 = -0.0023593469594139828636*h1; | ||
const double w32 = -0.008805202725216129906*h0; | ||
const double w33 = -0.008805202725216129906*h1; | ||
const double w34 = 0.032861463941450536761*h1; | ||
const double w35 = 0.008805202725216129906*h1; | ||
const double w36 = 0.008805202725216129906*h0; | ||
const double w37 = 0.0023593469594139828636*h1; | ||
const double w38 = 0.12264065304058601714*h1; | ||
const double w39 = 0.032861463941450536761*h0; | ||
const double w40 = -0.12264065304058601714*h0; | ||
const double w41 = -0.0023593469594139828636*h0; | ||
const double w42 = 0.0023593469594139828636*h0; | ||
const double w43 = 0.12264065304058601714*h0; | ||
const double w44 = -0.16666666666666666667*h1; | ||
const double w45 = -0.083333333333333333333*h0; | ||
const double w46 = 0.083333333333333333333*h1; | ||
const double w47 = 0.16666666666666666667*h1; | ||
const double w48 = 0.083333333333333333333*h0; | ||
const double w49 = -0.16666666666666666667*h0; | ||
const double w50 = 0.16666666666666666667*h0; | ||
const double w51 = -0.083333333333333333333*h1; | ||
const double w52 = 0.025917019497006092316*h0*h1; | ||
const double w53 = 0.0018607582807716854616*h0*h1; | ||
const double w54 = 0.0069444444444444444444*h0*h1; | ||
const double w55 = 0.09672363354357992482*h0*h1; | ||
const double w56 = 0.00049858867864229740201*h0*h1; | ||
const double w57 = 0.055555555555555555556*h0*h1; | ||
const double w58 = 0.027777777777777777778*h0*h1; | ||
const double w59 = 0.11111111111111111111*h0*h1; | ||
const double w60 = -0.19716878364870322056*h1; | ||
const double w61 = -0.19716878364870322056*h0; | ||
const double w62 = -0.052831216351296779436*h0; | ||
const double w63 = -0.052831216351296779436*h1; | ||
const double w64 = 0.19716878364870322056*h1; | ||
const double w65 = 0.052831216351296779436*h1; | ||
const double w66 = 0.19716878364870322056*h0; | ||
const double w67 = 0.052831216351296779436*h0; | ||
const double w68 = -0.5*h1; | ||
const double w69 = -0.5*h0; | ||
const double w70 = 0.5*h1; | ||
const double w71 = 0.5*h0; | ||
const double w72 = 0.1555021169820365539*h0*h1; | ||
const double w73 = 0.041666666666666666667*h0*h1; | ||
const double w74 = 0.01116454968463011277*h0*h1; | ||
const double w75 = 0.25*h0*h1; | ||
2016 | ||
rhs.requireWrite(); | ||
#pragma omp parallel | ||
{ | ||
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring | ||
#pragma omp for | ||
for (index_t k1=k1_0; k1<m_NE1; k1+=2) { | ||
for (index_t k0=0; k0<m_NE0; ++k0) { | ||
bool add_EM_S=false; | ||
bool add_EM_F=false; | ||
vector<double> EM_S(4*4, 0); | ||
vector<double> EM_F(4, 0); | ||
const index_t e = k0 + m_NE0*k1; | ||
/////////////// | ||
// process A // | ||
/////////////// | ||
if (!A.isEmpty()) { | ||
add_EM_S=true; | ||
const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); | ||
if (A.actsExpanded()) { | ||
const double A_00_0 = A_p[INDEX3(0,0,0,2,2)]; | ||
const double A_10_0 = A_p[INDEX3(1,0,0,2,2)]; | ||
const double A_01_0 = A_p[INDEX3(0,1,0,2,2)]; | ||
const double A_11_0 = A_p[INDEX3(1,1,0,2,2)]; | ||
const double A_00_1 = A_p[INDEX3(0,0,1,2,2)]; | ||
const double A_10_1 = A_p[INDEX3(1,0,1,2,2)]; | ||
const double A_01_1 = A_p[INDEX3(0,1,1,2,2)]; | ||
const double A_11_1 = A_p[INDEX3(1,1,1,2,2)]; | ||
const double A_00_2 = A_p[INDEX3(0,0,2,2,2)]; | ||
const double A_10_2 = A_p[INDEX3(1,0,2,2,2)]; | ||
const double A_01_2 = A_p[INDEX3(0,1,2,2,2)]; | ||
const double A_11_2 = A_p[INDEX3(1,1,2,2,2)]; | ||
const double A_00_3 = A_p[INDEX3(0,0,3,2,2)]; | ||
const double A_10_3 = A_p[INDEX3(1,0,3,2,2)]; | ||
const double A_01_3 = A_p[INDEX3(0,1,3,2,2)]; | ||
const double A_11_3 = A_p[INDEX3(1,1,3,2,2)]; | ||
const double tmp0_0 = A_01_0 + A_01_3; | ||
const double tmp1_0 = A_00_0 + A_00_1; | ||
const double tmp2_0 = A_11_0 + A_11_1 + A_11_2 + A_11_3; | ||
const double tmp3_0 = A_00_2 + A_00_3; | ||
const double tmp4_0 = A_10_1 + A_10_2; | ||
const double tmp5_0 = A_00_0 + A_00_1 + A_00_2 + A_00_3; | ||
const double tmp6_0 = A_01_3 + A_10_0; | ||
const double tmp7_0 = A_01_0 + A_10_3; | ||
const double tmp8_0 = A_01_1 + A_01_2 + A_10_1 + A_10_2; | ||
const double tmp9_0 = A_01_0 + A_10_0; | ||
const double tmp12_0 = A_11_0 + A_11_2; | ||
const double tmp10_0 = A_01_3 + A_10_3; | ||
const double tmp14_0 = A_01_0 + A_01_3 + A_10_0 + A_10_3; | ||
const double tmp13_0 = A_01_2 + A_10_1; | ||
const double tmp11_0 = A_11_1 + A_11_3; | ||
const double tmp18_0 = A_01_1 + A_10_1; | ||
const double tmp15_0 = A_01_1 + A_10_2; | ||
const double tmp16_0 = A_10_0 + A_10_3; | ||
const double tmp17_0 = A_01_1 + A_01_2; | ||
const double tmp19_0 = A_01_2 + A_10_2; | ||
const double tmp0_1 = A_10_3*w8; | ||
const double tmp1_1 = tmp0_0*w1; | ||
const double tmp2_1 = A_01_1*w4; | ||
const double tmp3_1 = tmp1_0*w0; | ||
const double tmp4_1 = A_01_2*w7; | ||
const double tmp5_1 = tmp2_0*w3; | ||
const double tmp6_1 = tmp3_0*w6; | ||
const double tmp7_1 = A_10_0*w2; | ||
const double tmp8_1 = tmp4_0*w5; | ||
const double tmp9_1 = tmp2_0*w10; | ||
const double tmp14_1 = A_10_0*w8; | ||
const double tmp23_1 = tmp3_0*w14; | ||
const double tmp35_1 = A_01_0*w8; | ||
const double tmp54_1 = tmp13_0*w8; | ||
const double tmp20_1 = tmp9_0*w4; | ||
const double tmp25_1 = tmp12_0*w12; | ||
const double tmp44_1 = tmp7_0*w7; | ||
const double tmp26_1 = tmp10_0*w4; | ||
const double tmp52_1 = tmp18_0*w8; | ||
const double tmp48_1 = A_10_1*w7; | ||
const double tmp46_1 = A_01_3*w8; | ||
const double tmp50_1 = A_01_0*w2; | ||
const double tmp56_1 = tmp19_0*w8; | ||
const double tmp19_1 = A_10_3*w2; | ||
const double tmp47_1 = A_10_2*w4; | ||
const double tmp16_1 = tmp3_0*w0; | ||
const double tmp18_1 = tmp1_0*w6; | ||
const double tmp31_1 = tmp11_0*w12; | ||
const double tmp55_1 = tmp15_0*w2; | ||
const double tmp39_1 = A_10_2*w7; | ||
const double tmp11_1 = tmp6_0*w7; | ||
const double tmp40_1 = tmp11_0*w17; | ||
const double tmp34_1 = tmp15_0*w8; | ||
const double tmp33_1 = tmp14_0*w5; | ||
const double tmp24_1 = tmp11_0*w13; | ||
const double tmp43_1 = tmp17_0*w5; | ||
const double tmp15_1 = A_01_2*w4; | ||
const double tmp53_1 = tmp19_0*w2; | ||
const double tmp27_1 = tmp3_0*w11; | ||
const double tmp32_1 = tmp13_0*w2; | ||
const double tmp10_1 = tmp5_0*w9; | ||
const double tmp37_1 = A_10_1*w4; | ||
const double tmp38_1 = tmp5_0*w15; | ||
const double tmp17_1 = A_01_1*w7; | ||
const double tmp12_1 = tmp7_0*w4; | ||
const double tmp22_1 = tmp10_0*w7; | ||
const double tmp57_1 = tmp18_0*w2; | ||
const double tmp28_1 = tmp9_0*w7; | ||
const double tmp29_1 = tmp1_0*w14; | ||
const double tmp51_1 = tmp11_0*w16; | ||
const double tmp42_1 = tmp12_0*w16; | ||
const double tmp49_1 = tmp12_0*w17; | ||
const double tmp21_1 = tmp1_0*w11; | ||
const double tmp45_1 = tmp6_0*w4; | ||
const double tmp13_1 = tmp8_0*w1; | ||
const double tmp36_1 = tmp16_0*w1; | ||
const double tmp41_1 = A_01_3*w2; | ||
const double tmp30_1 = tmp12_0*w13; | ||
EM_S[INDEX2(0,0,4)]+=tmp13_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1 + tmp24_1 + tmp25_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp36_1 + tmp37_1 + tmp39_1 + tmp3_1 + tmp43_1 + tmp46_1 + tmp50_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp0_1 + tmp15_1 + tmp17_1 + tmp1_1 + tmp38_1 + tmp49_1 + tmp51_1 + tmp7_1 + tmp8_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp10_1 + tmp32_1 + tmp33_1 + tmp34_1 + tmp9_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1 + tmp8_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp21_1 + tmp23_1 + tmp30_1 + tmp31_1 + tmp33_1 + tmp56_1 + tmp57_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp10_1 + tmp13_1 + tmp44_1 + tmp45_1 + tmp9_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1 + tmp40_1 + tmp41_1 + tmp42_1 + tmp43_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp36_1 + tmp38_1 + tmp43_1 + tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp9_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp24_1 + tmp25_1 + tmp27_1 + tmp29_1 + tmp33_1 + tmp52_1 + tmp53_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp1_1 + tmp5_1 + tmp8_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp10_1 + tmp33_1 + tmp54_1 + tmp55_1 + tmp9_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp14_1 + tmp19_1 + tmp1_1 + tmp2_1 + tmp38_1 + tmp40_1 + tmp42_1 + tmp4_1 + tmp8_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp16_1 + tmp18_1 + tmp35_1 + tmp36_1 + tmp41_1 + tmp43_1 + tmp47_1 + tmp48_1 + tmp5_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp13_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | ||
} else { // constant data | ||
const double A_00 = A_p[INDEX2(0,0,2)]; | ||
const double A_10 = A_p[INDEX2(1,0,2)]; | ||
const double A_01 = A_p[INDEX2(0,1,2)]; | ||
const double A_11 = A_p[INDEX2(1,1,2)]; | ||
const double tmp0_0 = A_01 + A_10; | ||
const double tmp0_1 = A_00*w18; | ||
const double tmp1_1 = A_01*w19; | ||
const double tmp2_1 = A_10*w20; | ||
const double tmp3_1 = A_11*w21; | ||
const double tmp4_1 = A_00*w22; | ||
const double tmp5_1 = tmp0_0*w19; | ||
const double tmp6_1 = A_11*w23; | ||
const double tmp7_1 = A_11*w25; | ||
const double tmp8_1 = A_00*w24; | ||
const double tmp9_1 = tmp0_0*w20; | ||
const double tmp10_1 = A_01*w20; | ||
const double tmp11_1 = A_11*w27; | ||
const double tmp12_1 = A_00*w26; | ||
const double tmp13_1 = A_10*w19; | ||
EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp7_1 + tmp8_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp4_1 + tmp6_1 + tmp9_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp4_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp4_1 + tmp6_1 + tmp9_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp7_1 + tmp8_1; | ||
} | ||
} | ||
/////////////// | ||
// process B // | ||
/////////////// | ||
if (!B.isEmpty()) { | ||
add_EM_S=true; | ||
const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); | ||
if (B.actsExpanded()) { | ||
const double B_0_0 = B_p[INDEX2(0,0,2)]; | ||
const double B_1_0 = B_p[INDEX2(1,0,2)]; | ||
const double B_0_1 = B_p[INDEX2(0,1,2)]; | ||
const double B_1_1 = B_p[INDEX2(1,1,2)]; | ||
const double B_0_2 = B_p[INDEX2(0,2,2)]; | ||
const double B_1_2 = B_p[INDEX2(1,2,2)]; | ||
const double B_0_3 = B_p[INDEX2(0,3,2)]; | ||
const double B_1_3 = B_p[INDEX2(1,3,2)]; | ||
const double tmp0_0 = B_1_0 + B_1_1; | ||
const double tmp1_0 = B_1_2 + B_1_3; | ||
const double tmp2_0 = B_0_1 + B_0_3; | ||
const double tmp3_0 = B_0_0 + B_0_2; | ||
const double tmp63_1 = B_1_1*w42; | ||
const double tmp79_1 = B_1_1*w40; | ||
const double tmp37_1 = tmp3_0*w35; | ||
const double tmp8_1 = tmp0_0*w32; | ||
const double tmp71_1 = B_0_1*w34; | ||
const double tmp19_1 = B_0_3*w31; | ||
const double tmp15_1 = B_0_3*w34; | ||
const double tmp9_1 = tmp3_0*w34; | ||
const double tmp35_1 = B_1_0*w36; | ||
const double tmp66_1 = B_0_3*w28; | ||
const double tmp28_1 = B_1_0*w42; | ||
const double tmp22_1 = B_1_0*w40; | ||
const double tmp16_1 = B_1_2*w29; | ||
const double tmp6_1 = tmp2_0*w35; | ||
const double tmp55_1 = B_1_3*w40; | ||
const double tmp50_1 = B_1_3*w42; | ||
const double tmp7_1 = tmp1_0*w29; | ||
const double tmp1_1 = tmp1_0*w32; | ||
const double tmp57_1 = B_0_3*w30; | ||
const double tmp18_1 = B_1_1*w32; | ||
const double tmp53_1 = B_1_0*w41; | ||
const double tmp61_1 = B_1_3*w36; | ||
const double tmp27_1 = B_0_3*w38; | ||
const double tmp64_1 = B_0_2*w30; | ||
const double tmp76_1 = B_0_1*w38; | ||
const double tmp39_1 = tmp2_0*w34; | ||
const double tmp62_1 = B_0_1*w31; | ||
const double tmp56_1 = B_0_0*w31; | ||
const double tmp49_1 = B_1_1*w36; | ||
const double tmp2_1 = B_0_2*w31; | ||
const double tmp23_1 = B_0_2*w33; | ||
const double tmp38_1 = B_1_1*w43; | ||
const double tmp74_1 = B_1_2*w41; | ||
const double tmp43_1 = B_1_1*w41; | ||
const double tmp58_1 = B_0_2*w28; | ||
const double tmp67_1 = B_0_0*w33; | ||
const double tmp33_1 = tmp0_0*w39; | ||
const double tmp4_1 = B_0_0*w28; | ||
const double tmp20_1 = B_0_0*w30; | ||
const double tmp13_1 = B_0_2*w38; | ||
const double tmp65_1 = B_1_2*w43; | ||
const double tmp0_1 = tmp0_0*w29; | ||
const double tmp41_1 = tmp3_0*w33; | ||
const double tmp73_1 = B_0_2*w37; | ||
const double tmp69_1 = B_0_0*w38; | ||
const double tmp48_1 = B_1_2*w39; | ||
const double tmp59_1 = B_0_1*w33; | ||
const double tmp17_1 = B_1_3*w41; | ||
const double tmp5_1 = B_0_3*w33; | ||
const double tmp3_1 = B_0_1*w30; | ||
const double tmp21_1 = B_0_1*w28; | ||
const double tmp42_1 = B_1_0*w29; | ||
const double tmp54_1 = B_1_2*w32; | ||
const double tmp60_1 = B_1_0*w39; | ||
const double tmp32_1 = tmp1_0*w36; | ||
const double tmp10_1 = B_0_1*w37; | ||
const double tmp14_1 = B_0_0*w35; | ||
const double tmp29_1 = B_0_1*w35; | ||
const double tmp26_1 = B_1_2*w36; | ||
const double tmp30_1 = B_1_3*w43; | ||
const double tmp70_1 = B_0_2*w35; | ||
const double tmp34_1 = B_1_3*w39; | ||
const double tmp51_1 = B_1_0*w43; | ||
const double tmp31_1 = B_0_2*w34; | ||
const double tmp45_1 = tmp3_0*w28; | ||
const double tmp11_1 = tmp1_0*w39; | ||
const double tmp52_1 = B_1_1*w29; | ||
const double tmp44_1 = B_1_3*w32; | ||
const double tmp25_1 = B_1_1*w39; | ||
const double tmp47_1 = tmp2_0*w33; | ||
const double tmp72_1 = B_1_3*w29; | ||
const double tmp40_1 = tmp2_0*w28; | ||
const double tmp46_1 = B_1_2*w40; | ||
const double tmp36_1 = B_1_2*w42; | ||
const double tmp24_1 = B_0_0*w37; | ||
const double tmp77_1 = B_0_3*w35; | ||
const double tmp68_1 = B_0_3*w37; | ||
const double tmp78_1 = B_0_0*w34; | ||
const double tmp12_1 = tmp0_0*w36; | ||
const double tmp75_1 = B_1_0*w32; | ||
EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp45_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp6_1 + tmp9_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp7_1 + tmp8_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp39_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp11_1 + tmp12_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | ||
} else { // constant data | ||
const double B_0 = B_p[0]; | ||
const double B_1 = B_p[1]; | ||
const double tmp0_1 = B_0*w44; | ||
const double tmp1_1 = B_1*w45; | ||
const double tmp2_1 = B_0*w46; | ||
const double tmp3_1 = B_0*w47; | ||
const double tmp4_1 = B_1*w48; | ||
const double tmp5_1 = B_1*w49; | ||
const double tmp6_1 = B_1*w50; | ||
const double tmp7_1 = B_0*w51; | ||
EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp5_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp3_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp6_1 + tmp7_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp2_1 + tmp4_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp3_1 + tmp5_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp7_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp2_1 + tmp6_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp5_1 + tmp7_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp1_1 + tmp2_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp0_1 + tmp6_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp1_1 + tmp7_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp2_1 + tmp5_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp4_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp3_1 + tmp6_1; | ||
} | ||
} | ||
/////////////// | ||
// process C // | ||
/////////////// | ||
if (!C.isEmpty()) { | ||
add_EM_S=true; | ||
const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); | ||
if (C.actsExpanded()) { | ||
const double C_0_0 = C_p[INDEX2(0,0,2)]; | ||
const double C_1_0 = C_p[INDEX2(1,0,2)]; | ||
const double C_0_1 = C_p[INDEX2(0,1,2)]; | ||
const double C_1_1 = C_p[INDEX2(1,1,2)]; | ||
const double C_0_2 = C_p[INDEX2(0,2,2)]; | ||
const double C_1_2 = C_p[INDEX2(1,2,2)]; | ||
const double C_0_3 = C_p[INDEX2(0,3,2)]; | ||
const double C_1_3 = C_p[INDEX2(1,3,2)]; | ||
const double tmp0_0 = C_1_0 + C_1_1; | ||
const double tmp1_0 = C_1_2 + C_1_3; | ||
const double tmp2_0 = C_0_1 + C_0_3; | ||
const double tmp3_0 = C_0_0 + C_0_2; | ||
const double tmp64_1 = C_0_2*w30; | ||
const double tmp14_1 = C_0_2*w28; | ||
const double tmp19_1 = C_0_3*w31; | ||
const double tmp22_1 = C_1_0*w40; | ||
const double tmp37_1 = tmp3_0*w35; | ||
const double tmp29_1 = C_0_1*w35; | ||
const double tmp73_1 = C_0_2*w37; | ||
const double tmp74_1 = C_1_2*w41; | ||
const double tmp52_1 = C_1_3*w39; | ||
const double tmp25_1 = C_1_1*w39; | ||
const double tmp62_1 = C_0_1*w31; | ||
const double tmp79_1 = C_1_1*w40; | ||
const double tmp43_1 = C_1_1*w36; | ||
const double tmp27_1 = C_0_3*w38; | ||
const double tmp28_1 = C_1_0*w42; | ||
const double tmp63_1 = C_1_1*w42; | ||
const double tmp59_1 = C_0_3*w34; | ||
const double tmp72_1 = C_1_3*w29; | ||
const double tmp40_1 = tmp2_0*w35; | ||
const double tmp13_1 = C_0_3*w30; | ||
const double tmp51_1 = C_1_2*w40; | ||
const double tmp54_1 = C_1_2*w42; | ||
const double tmp12_1 = C_0_0*w31; | ||
const double tmp2_1 = tmp1_0*w32; | ||
const double tmp68_1 = C_0_2*w31; | ||
const double tmp75_1 = C_1_0*w32; | ||
const double tmp49_1 = C_1_1*w41; | ||
const double tmp4_1 = C_0_2*w35; | ||
const double tmp66_1 = C_0_3*w28; | ||
const double tmp56_1 = C_0_1*w37; | ||
const double tmp5_1 = C_0_1*w34; | ||
const double tmp38_1 = tmp2_0*w34; | ||
const double tmp76_1 = C_0_1*w38; | ||
const double tmp21_1 = C_0_1*w28; | ||
const double tmp69_1 = C_0_1*w30; | ||
const double tmp53_1 = C_1_0*w36; | ||
const double tmp42_1 = C_1_2*w39; | ||
const double tmp32_1 = tmp1_0*w29; | ||
const double tmp45_1 = C_1_0*w43; | ||
const double tmp33_1 = tmp0_0*w32; | ||
const double tmp35_1 = C_1_0*w41; | ||
const double tmp26_1 = C_1_2*w36; | ||
const double tmp67_1 = C_0_0*w33; | ||
const double tmp31_1 = C_0_2*w34; | ||
const double tmp20_1 = C_0_0*w30; | ||
const double tmp70_1 = C_0_0*w28; | ||
const double tmp8_1 = tmp0_0*w39; | ||
const double tmp30_1 = C_1_3*w43; | ||
const double tmp0_1 = tmp0_0*w29; | ||
const double tmp17_1 = C_1_3*w41; | ||
const double tmp58_1 = C_0_0*w35; | ||
const double tmp9_1 = tmp3_0*w33; | ||
const double tmp61_1 = C_1_3*w36; | ||
const double tmp41_1 = tmp3_0*w34; | ||
const double tmp50_1 = C_1_3*w32; | ||
const double tmp18_1 = C_1_1*w32; | ||
const double tmp6_1 = tmp1_0*w36; | ||
const double tmp3_1 = C_0_0*w38; | ||
const double tmp34_1 = C_1_1*w29; | ||
const double tmp77_1 = C_0_3*w35; | ||
const double tmp65_1 = C_1_2*w43; | ||
const double tmp71_1 = C_0_3*w33; | ||
const double tmp55_1 = C_1_1*w43; | ||
const double tmp46_1 = tmp3_0*w28; | ||
const double tmp24_1 = C_0_0*w37; | ||
const double tmp10_1 = tmp1_0*w39; | ||
const double tmp48_1 = C_1_0*w29; | ||
const double tmp15_1 = C_0_1*w33; | ||
const double tmp36_1 = C_1_2*w32; | ||
const double tmp60_1 = C_1_0*w39; | ||
const double tmp47_1 = tmp2_0*w33; | ||
const double tmp16_1 = C_1_2*w29; | ||
const double tmp1_1 = C_0_3*w37; | ||
const double tmp7_1 = tmp2_0*w28; | ||
const double tmp39_1 = C_1_3*w40; | ||
const double tmp44_1 = C_1_3*w42; | ||
const double tmp57_1 = C_0_2*w38; | ||
const double tmp78_1 = C_0_0*w34; | ||
const double tmp11_1 = tmp0_0*w36; | ||
const double tmp23_1 = C_0_2*w33; | ||
EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp2_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp7_1 + tmp9_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp6_1 + tmp8_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp38_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp10_1 + tmp11_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | ||
} else { // constant data | ||
const double C_0 = C_p[0]; | ||
const double C_1 = C_p[1]; | ||
const double tmp0_1 = C_0*w47; | ||
const double tmp1_1 = C_1*w45; | ||
const double tmp2_1 = C_1*w48; | ||
const double tmp3_1 = C_0*w51; | ||
const double tmp4_1 = C_0*w44; | ||
const double tmp5_1 = C_1*w49; | ||
const double tmp6_1 = C_1*w50; | ||
const double tmp7_1 = C_0*w46; | ||
EM_S[INDEX2(0,0,4)]+=tmp4_1 + tmp5_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp4_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp3_1 + tmp5_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp1_1 + tmp3_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp0_1 + tmp5_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp1_1 + tmp7_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp5_1 + tmp7_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp3_1 + tmp6_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp4_1 + tmp6_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp2_1 + tmp4_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp2_1 + tmp7_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp6_1 + tmp7_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp2_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp6_1; | ||
} | ||
} | ||
/////////////// | ||
// process D // | ||
/////////////// | ||
if (!D.isEmpty()) { | ||
add_EM_S=true; | ||
const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); | ||
if (D.actsExpanded()) { | ||
const double D_0 = D_p[0]; | ||
const double D_1 = D_p[1]; | ||
const double D_2 = D_p[2]; | ||
const double D_3 = D_p[3]; | ||
const double tmp0_0 = D_0 + D_1; | ||
const double tmp1_0 = D_2 + D_3; | ||
const double tmp2_0 = D_0 + D_1 + D_2 + D_3; | ||
const double tmp3_0 = D_1 + D_2; | ||
const double tmp4_0 = D_1 + D_3; | ||
const double tmp5_0 = D_0 + D_2; | ||
const double tmp6_0 = D_0 + D_3; | ||
const double tmp0_1 = tmp0_0*w52; | ||
const double tmp1_1 = tmp1_0*w53; | ||
const double tmp2_1 = tmp2_0*w54; | ||
const double tmp3_1 = tmp1_0*w52; | ||
const double tmp4_1 = tmp0_0*w53; | ||
const double tmp5_1 = tmp3_0*w54; | ||
const double tmp6_1 = D_0*w55; | ||
const double tmp7_1 = D_3*w56; | ||
const double tmp8_1 = D_3*w55; | ||
const double tmp9_1 = D_0*w56; | ||
const double tmp10_1 = tmp4_0*w52; | ||
const double tmp11_1 = tmp5_0*w53; | ||
const double tmp12_1 = tmp5_0*w52; | ||
const double tmp13_1 = tmp4_0*w53; | ||
const double tmp14_1 = tmp6_0*w54; | ||
const double tmp15_1 = D_2*w55; | ||
const double tmp16_1 = D_1*w56; | ||
const double tmp17_1 = D_1*w55; | ||
const double tmp18_1 = D_2*w56; | ||
EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp6_1 + tmp7_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp12_1 + tmp13_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp2_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp14_1 + tmp17_1 + tmp18_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp2_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp12_1 + tmp13_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp2_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp2_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp10_1 + tmp11_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp3_1 + tmp4_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp8_1 + tmp9_1; | ||
} else { // constant data | ||
const double tmp0_1 = D_p[0]*w57; | ||
const double tmp1_1 = D_p[0]*w58; | ||
const double tmp2_1 = D_p[0]*w59; | ||
EM_S[INDEX2(0,0,4)]+=tmp2_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp0_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp0_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp1_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp2_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp1_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp0_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp0_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp1_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp2_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp0_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp1_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp0_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp2_1; | ||
} | ||
} | ||
/////////////// | ||
// process X // | ||
/////////////// | ||
if (!X.isEmpty()) { | ||
add_EM_F=true; | ||
const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); | ||
if (X.actsExpanded()) { | ||
const double X_0_0 = X_p[INDEX2(0,0,2)]; | ||
const double X_1_0 = X_p[INDEX2(1,0,2)]; | ||
const double X_0_1 = X_p[INDEX2(0,1,2)]; | ||
const double X_1_1 = X_p[INDEX2(1,1,2)]; | ||
const double X_0_2 = X_p[INDEX2(0,2,2)]; | ||
const double X_1_2 = X_p[INDEX2(1,2,2)]; | ||
const double X_0_3 = X_p[INDEX2(0,3,2)]; | ||
const double X_1_3 = X_p[INDEX2(1,3,2)]; | ||
const double tmp0_0 = X_0_2 + X_0_3; | ||
const double tmp1_0 = X_1_1 + X_1_3; | ||
const double tmp2_0 = X_1_0 + X_1_2; | ||
const double tmp3_0 = X_0_0 + X_0_1; | ||
const double tmp0_1 = tmp0_0*w63; | ||
const double tmp1_1 = tmp1_0*w62; | ||
const double tmp2_1 = tmp2_0*w61; | ||
const double tmp3_1 = tmp3_0*w60; | ||
const double tmp4_1 = tmp0_0*w65; | ||
const double tmp5_1 = tmp3_0*w64; | ||
const double tmp6_1 = tmp2_0*w62; | ||
const double tmp7_1 = tmp1_0*w61; | ||
const double tmp8_1 = tmp2_0*w66; | ||
const double tmp9_1 = tmp3_0*w63; | ||
const double tmp10_1 = tmp0_0*w60; | ||
const double tmp11_1 = tmp1_0*w67; | ||
const double tmp12_1 = tmp1_0*w66; | ||
const double tmp13_1 = tmp3_0*w65; | ||
const double tmp14_1 = tmp0_0*w64; | ||
const double tmp15_1 = tmp2_0*w67; | ||
EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
EM_F[1]+=tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1; | ||
EM_F[2]+=tmp10_1 + tmp11_1 + tmp8_1 + tmp9_1; | ||
EM_F[3]+=tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | ||
} else { // constant data | ||
const double tmp0_1 = X_p[1]*w69; | ||
const double tmp1_1 = X_p[0]*w68; | ||
const double tmp2_1 = X_p[0]*w70; | ||
const double tmp3_1 = X_p[1]*w71; | ||
EM_F[0]+=tmp0_1 + tmp1_1; | ||
EM_F[1]+=tmp0_1 + tmp2_1; | ||
EM_F[2]+=tmp1_1 + tmp3_1; | ||
EM_F[3]+=tmp2_1 + tmp3_1; | ||
} | ||
} | ||
/////////////// | ||
// process Y // | ||
/////////////// | ||
if (!Y.isEmpty()) { | ||
add_EM_F=true; | ||
const double* Y_p=const_cast<escript::Data*>(&Y)->getSampleDataRO(e); | ||
if (Y.actsExpanded()) { | ||
const double Y_0 = Y_p[0]; | ||
const double Y_1 = Y_p[1]; | ||
const double Y_2 = Y_p[2]; | ||
const double Y_3 = Y_p[3]; | ||
const double tmp0_0 = Y_1 + Y_2; | ||
const double tmp1_0 = Y_0 + Y_3; | ||
const double tmp0_1 = Y_0*w72; | ||
const double tmp1_1 = tmp0_0*w73; | ||
const double tmp2_1 = Y_3*w74; | ||
const double tmp3_1 = Y_1*w72; | ||
const double tmp4_1 = tmp1_0*w73; | ||
const double tmp5_1 = Y_2*w74; | ||
const double tmp6_1 = Y_2*w72; | ||
const double tmp7_1 = Y_1*w74; | ||
const double tmp8_1 = Y_3*w72; | ||
const double tmp9_1 = Y_0*w74; | ||
EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1; | ||
EM_F[1]+=tmp3_1 + tmp4_1 + tmp5_1; | ||
EM_F[2]+=tmp4_1 + tmp6_1 + tmp7_1; | ||
EM_F[3]+=tmp1_1 + tmp8_1 + tmp9_1; | ||
} else { // constant data | ||
const double tmp0_1 = Y_p[0]*w75; | ||
EM_F[0]+=tmp0_1; | ||
EM_F[1]+=tmp0_1; | ||
EM_F[2]+=tmp0_1; | ||
EM_F[3]+=tmp0_1; | ||
} | ||
} | ||
2017 | ||
2018 | // add to matrix (if add_EM_S) and RHS (if add_EM_F) | namespace |
2019 | const index_t firstNode=m_N0*k1+k0; | { |
2020 | addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, firstNode); | // Calculates a guassian blur colvolution matrix for 2D |
2021 | } // end k0 loop | // See wiki article on the subject |
2022 | } // end k1 loop | double* get2DGauss(unsigned radius, double sigma) |
2023 | } // end of colouring | { |
2024 | } // end of parallel region | double* arr=new double[(radius*2+1)*(radius*2+1)]; |
2025 | double common=M_1_PI*0.5*1/(sigma*sigma); | |
2026 | double total=0; | |
2027 | int r=static_cast<int>(radius); | |
2028 | for (int y=-r;y<=r;++y) | |
2029 | { | |
2030 | for (int x=-r;x<=r;++x) | |
2031 | { | |
2032 | arr[(x+r)+(y+r)*(r*2+1)]=common*exp(-(x*x+y*y)/(2*sigma*sigma)); | |
2033 | total+=arr[(x+r)+(y+r)*(r*2+1)]; | |
2034 | } | |
2035 | } | |
2036 | double invtotal=1/total; | |
2037 | for (size_t p=0;p<(radius*2+1)*(radius*2+1);++p) | |
2038 | { | |
2039 | arr[p]*=invtotal; | |
2040 | } | |
2041 | return arr; | |
2042 | } | |
2043 | ||
2044 | // applies conv to source to get a point. | |
2045 | // (xp, yp) are the coords in the source matrix not the destination matrix | |
2046 | double Convolve2D(double* conv, double* source, size_t xp, size_t yp, unsigned radius, size_t width) | |
2047 | { | |
2048 | size_t bx=xp-radius, by=yp-radius; | |
2049 | size_t sbase=bx+by*width; | |
2050 | double result=0; | |
2051 | for (int y=0;y<2*radius+1;++y) | |
2052 | { | |
2053 | for (int x=0;x<2*radius+1;++x) | |
2054 | { | |
2055 | result+=conv[x+y*(2*radius+1)] * source[sbase + x+y*width]; | |
2056 | } | |
2057 | } | |
2058 | return result; | |
2059 | } | |
2060 | } | } |
2061 | ||
//protected | ||
void Rectangle::assemblePDESingleReduced(Paso_SystemMatrix* mat, | ||
escript::Data& rhs, const escript::Data& A, const escript::Data& B, | ||
const escript::Data& C, const escript::Data& D, | ||
const escript::Data& X, const escript::Data& Y) const | ||
{ | ||
const double h0 = m_l0/m_gNE0; | ||
const double h1 = m_l1/m_gNE1; | ||
const double w0 = -.25*h1/h0; | ||
const double w1 = .25; | ||
const double w2 = -.25; | ||
const double w3 = .25*h0/h1; | ||
const double w4 = -.25*h0/h1; | ||
const double w5 = .25*h1/h0; | ||
const double w6 = -.125*h1; | ||
const double w7 = -.125*h0; | ||
const double w8 = .125*h1; | ||
const double w9 = .125*h0; | ||
const double w10 = .0625*h0*h1; | ||
const double w11 = -.5*h1; | ||
const double w12 = -.5*h0; | ||
const double w13 = .5*h1; | ||
const double w14 = .5*h0; | ||
const double w15 = .25*h0*h1; | ||
2062 | ||
2063 | rhs.requireWrite(); | /* This is a wrapper for filtered (and non-filtered) randoms |
2064 | #pragma omp parallel | * For detailed doco see randomFillWorker |
2065 | */ | |
2066 | escript::Data Rectangle::randomFill(const escript::DataTypes::ShapeType& shape, | |
2067 | const escript::FunctionSpace& what, | |
2068 | long seed, const boost::python::tuple& filter) const | |
2069 | { | |
2070 | int numvals=escript::DataTypes::noValues(shape); | |
2071 | if (len(filter)>0 && (numvals!=1)) | |
2072 | { | { |
2073 | for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring | throw RipleyException("Ripley only supports filters for scalar data."); |
2074 | #pragma omp for | } |
2075 | for (index_t k1=k1_0; k1<m_NE1; k1+=2) { | escript::Data res=randomFillWorker(shape, seed, filter); |
2076 | for (index_t k0=0; k0<m_NE0; ++k0) { | if (res.getFunctionSpace()!=what) |
2077 | bool add_EM_S=false; | { |
2078 | bool add_EM_F=false; | escript::Data r=escript::Data(res, what); |
2079 | vector<double> EM_S(4*4, 0); | return r; |
2080 | vector<double> EM_F(4, 0); | } |
2081 | const index_t e = k0 + m_NE0*k1; | return res; |
/////////////// | ||
// process A // | ||
/////////////// | ||
if (!A.isEmpty()) { | ||
add_EM_S=true; | ||
const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); | ||
const double A_00 = A_p[INDEX2(0,0,2)]; | ||
const double A_10 = A_p[INDEX2(1,0,2)]; | ||
const double A_01 = A_p[INDEX2(0,1,2)]; | ||
const double A_11 = A_p[INDEX2(1,1,2)]; | ||
const double tmp0_0 = A_01 + A_10; | ||
const double tmp0_1 = A_11*w3; | ||
const double tmp1_1 = A_00*w0; | ||
const double tmp2_1 = A_01*w1; | ||
const double tmp3_1 = A_10*w2; | ||
const double tmp4_1 = tmp0_0*w1; | ||
const double tmp5_1 = A_11*w4; | ||
const double tmp6_1 = A_00*w5; | ||
const double tmp7_1 = tmp0_0*w2; | ||
const double tmp8_1 = A_10*w1; | ||
const double tmp9_1 = A_01*w2; | ||
EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp4_1 + tmp6_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp2_1 + tmp3_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp1_1 + tmp5_1 + tmp7_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp0_1 + tmp6_1 + tmp7_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp1_1 + tmp4_1 + tmp5_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp5_1 + tmp6_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp5_1 + tmp6_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp1_1 + tmp4_1 + tmp5_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp0_1 + tmp6_1 + tmp7_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp1_1 + tmp5_1 + tmp7_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp2_1 + tmp3_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp1_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp4_1 + tmp6_1; | ||
} | ||
/////////////// | ||
// process B // | ||
/////////////// | ||
if (!B.isEmpty()) { | ||
add_EM_S=true; | ||
const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); | ||
const double tmp2_1 = B_p[0]*w8; | ||
const double tmp0_1 = B_p[0]*w6; | ||
const double tmp3_1 = B_p[1]*w9; | ||
const double tmp1_1 = B_p[1]*w7; | ||
EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp2_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp0_1 + tmp3_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp1_1 + tmp2_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp0_1 + tmp3_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp1_1 + tmp2_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp0_1 + tmp3_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp1_1 + tmp2_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp3_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp2_1 + tmp3_1; | ||
} | ||
/////////////// | ||
// process C // | ||
/////////////// | ||
if (!C.isEmpty()) { | ||
add_EM_S=true; | ||
const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); | ||
const double tmp1_1 = C_p[1]*w7; | ||
const double tmp0_1 = C_p[0]*w8; | ||
const double tmp3_1 = C_p[0]*w6; | ||
const double tmp2_1 = C_p[1]*w9; | ||
EM_S[INDEX2(0,0,4)]+=tmp1_1 + tmp3_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp3_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp1_1 + tmp3_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp1_1 + tmp3_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp0_1 + tmp2_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp0_1 + tmp2_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp2_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp2_1; | ||
} | ||
/////////////// | ||
// process D // | ||
/////////////// | ||
if (!D.isEmpty()) { | ||
add_EM_S=true; | ||
const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); | ||
const double tmp0_1 = D_p[0]*w10; | ||
EM_S[INDEX2(0,0,4)]+=tmp0_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp0_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp0_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp0_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp0_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp0_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp0_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp0_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp0_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp0_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp0_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp0_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp0_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp0_1; | ||
} | ||
/////////////// | ||
// process X // | ||
/////////////// | ||
if (!X.isEmpty()) { | ||
add_EM_F=true; | ||
const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); | ||
const double tmp0_1 = X_p[0]*w11; | ||
const double tmp2_1 = X_p[0]*w13; | ||
const double tmp1_1 = X_p[1]*w12; | ||
const double tmp3_1 = X_p[1]*w14; | ||
EM_F[0]+=tmp0_1 + tmp1_1; | ||
EM_F[1]+=tmp1_1 + tmp2_1; | ||
EM_F[2]+=tmp0_1 + tmp3_1; | ||
EM_F[3]+=tmp2_1 + tmp3_1; | ||
} | ||
/////////////// | ||
// process Y // | ||
/////////////// | ||
if (!Y.isEmpty()) { | ||
add_EM_F=true; | ||
const double* Y_p=const_cast<escript::Data*>(&Y)->getSampleDataRO(e); | ||
const double tmp0_1 = Y_p[0]*w15; | ||
EM_F[0]+=tmp0_1; | ||
EM_F[1]+=tmp0_1; | ||
EM_F[2]+=tmp0_1; | ||
EM_F[3]+=tmp0_1; | ||
} | ||
// add to matrix (if add_EM_S) and RHS (if add_EM_F) | ||
const index_t firstNode=m_N0*k1+k0; | ||
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, firstNode); | ||
} // end k0 loop | ||
} // end k1 loop | ||
} // end of colouring | ||
} // end of parallel region | ||
2082 | } | } |
2083 | ||
//protected | ||
void Rectangle::assemblePDESystem(Paso_SystemMatrix* mat, | ||
escript::Data& rhs, const escript::Data& A, const escript::Data& B, | ||
const escript::Data& C, const escript::Data& D, | ||
const escript::Data& X, const escript::Data& Y) const | ||
{ | ||
const double h0 = m_l0/m_gNE0; | ||
const double h1 = m_l1/m_gNE1; | ||
dim_t numEq, numComp; | ||
if (!mat) | ||
numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize()); | ||
else { | ||
numEq=mat->logical_row_block_size; | ||
numComp=mat->logical_col_block_size; | ||
} | ||
const double w0 = -0.1555021169820365539*h1/h0; | ||
const double w1 = 0.041666666666666666667; | ||
const double w2 = -0.15550211698203655390; | ||
const double w3 = 0.041666666666666666667*h0/h1; | ||
const double w4 = 0.15550211698203655390; | ||
const double w5 = -0.041666666666666666667; | ||
const double w6 = -0.01116454968463011277*h1/h0; | ||
const double w7 = 0.011164549684630112770; | ||
const double w8 = -0.011164549684630112770; | ||
const double w9 = -0.041666666666666666667*h1/h0; | ||
const double w10 = -0.041666666666666666667*h0/h1; | ||
const double w11 = 0.1555021169820365539*h1/h0; | ||
const double w12 = 0.1555021169820365539*h0/h1; | ||
const double w13 = 0.01116454968463011277*h0/h1; | ||
const double w14 = 0.01116454968463011277*h1/h0; | ||
const double w15 = 0.041666666666666666667*h1/h0; | ||
const double w16 = -0.01116454968463011277*h0/h1; | ||
const double w17 = -0.1555021169820365539*h0/h1; | ||
const double w18 = -0.33333333333333333333*h1/h0; | ||
const double w19 = 0.25000000000000000000; | ||
const double w20 = -0.25000000000000000000; | ||
const double w21 = 0.16666666666666666667*h0/h1; | ||
const double w22 = -0.16666666666666666667*h1/h0; | ||
const double w23 = -0.16666666666666666667*h0/h1; | ||
const double w24 = 0.33333333333333333333*h1/h0; | ||
const double w25 = 0.33333333333333333333*h0/h1; | ||
const double w26 = 0.16666666666666666667*h1/h0; | ||
const double w27 = -0.33333333333333333333*h0/h1; | ||
const double w28 = -0.032861463941450536761*h1; | ||
const double w29 = -0.032861463941450536761*h0; | ||
const double w30 = -0.12264065304058601714*h1; | ||
const double w31 = -0.0023593469594139828636*h1; | ||
const double w32 = -0.008805202725216129906*h0; | ||
const double w33 = -0.008805202725216129906*h1; | ||
const double w34 = 0.032861463941450536761*h1; | ||
const double w35 = 0.008805202725216129906*h1; | ||
const double w36 = 0.008805202725216129906*h0; | ||
const double w37 = 0.0023593469594139828636*h1; | ||
const double w38 = 0.12264065304058601714*h1; | ||
const double w39 = 0.032861463941450536761*h0; | ||
const double w40 = -0.12264065304058601714*h0; | ||
const double w41 = -0.0023593469594139828636*h0; | ||
const double w42 = 0.0023593469594139828636*h0; | ||
const double w43 = 0.12264065304058601714*h0; | ||
const double w44 = -0.16666666666666666667*h1; | ||
const double w45 = -0.083333333333333333333*h0; | ||
const double w46 = 0.083333333333333333333*h1; | ||
const double w47 = 0.16666666666666666667*h1; | ||
const double w48 = 0.083333333333333333333*h0; | ||
const double w49 = -0.16666666666666666667*h0; | ||
const double w50 = 0.16666666666666666667*h0; | ||
const double w51 = -0.083333333333333333333*h1; | ||
const double w52 = 0.025917019497006092316*h0*h1; | ||
const double w53 = 0.0018607582807716854616*h0*h1; | ||
const double w54 = 0.0069444444444444444444*h0*h1; | ||
const double w55 = 0.09672363354357992482*h0*h1; | ||
const double w56 = 0.00049858867864229740201*h0*h1; | ||
const double w57 = 0.055555555555555555556*h0*h1; | ||
const double w58 = 0.027777777777777777778*h0*h1; | ||
const double w59 = 0.11111111111111111111*h0*h1; | ||
const double w60 = -0.19716878364870322056*h1; | ||
const double w61 = -0.19716878364870322056*h0; | ||
const double w62 = -0.052831216351296779436*h0; | ||
const double w63 = -0.052831216351296779436*h1; | ||
const double w64 = 0.19716878364870322056*h1; | ||
const double w65 = 0.052831216351296779436*h1; | ||
const double w66 = 0.19716878364870322056*h0; | ||
const double w67 = 0.052831216351296779436*h0; | ||
const double w68 = -0.5*h1; | ||
const double w69 = -0.5*h0; | ||
const double w70 = 0.5*h1; | ||
const double w71 = 0.5*h0; | ||
const double w72 = 0.1555021169820365539*h0*h1; | ||
const double w73 = 0.041666666666666666667*h0*h1; | ||
const double w74 = 0.01116454968463011277*h0*h1; | ||
const double w75 = 0.25*h0*h1; | ||
2084 | ||
2085 | rhs.requireWrite(); | /* This routine produces a Data object filled with smoothed random data. |
2086 | #pragma omp parallel | The dimensions of the rectangle being filled are internal[0] x internal[1] points. |
2087 | { | A parameter radius gives the size of the stencil used for the smoothing. |
2088 | for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring | A point on the left hand edge for example, will still require `radius` extra points to the left |
2089 | #pragma omp for | in order to complete the stencil. |
for (index_t k1=k1_0; k1<m_NE1; k1+=2) { | ||
for (index_t k0=0; k0<m_NE0; ++k0) { | ||
bool add_EM_S=false; | ||
bool add_EM_F=false; | ||
vector<double> EM_S(4*4*numEq*numComp, 0); | ||
vector<double> EM_F(4*numEq, 0); | ||
const index_t e = k0 + m_NE0*k1; | ||
/////////////// | ||
// process A // | ||
/////////////// | ||
if (!A.isEmpty()) { | ||
add_EM_S=true; | ||
const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); | ||
if (A.actsExpanded()) { | ||
for (index_t k=0; k<numEq; k++) { | ||
for (index_t m=0; m<numComp; m++) { | ||
const double A_00_0 = A_p[INDEX5(k,0,m,0,0, numEq,2,numComp,2)]; | ||
const double A_01_0 = A_p[INDEX5(k,0,m,1,0, numEq,2,numComp,2)]; | ||
const double A_10_0 = A_p[INDEX5(k,1,m,0,0, numEq,2,numComp,2)]; | ||
const double A_11_0 = A_p[INDEX5(k,1,m,1,0, numEq,2,numComp,2)]; | ||
const double A_00_1 = A_p[INDEX5(k,0,m,0,1, numEq,2,numComp,2)]; | ||
const double A_01_1 = A_p[INDEX5(k,0,m,1,1, numEq,2,numComp,2)]; | ||
const double A_10_1 = A_p[INDEX5(k,1,m,0,1, numEq,2,numComp,2)]; | ||
const double A_11_1 = A_p[INDEX5(k,1,m,1,1, numEq,2,numComp,2)]; | ||
const double A_00_2 = A_p[INDEX5(k,0,m,0,2, numEq,2,numComp,2)]; | ||
const double A_01_2 = A_p[INDEX5(k,0,m,1,2, numEq,2,numComp,2)]; | ||
const double A_10_2 = A_p[INDEX5(k,1,m,0,2, numEq,2,numComp,2)]; | ||
const double A_11_2 = A_p[INDEX5(k,1,m,1,2, numEq,2,numComp,2)]; | ||
const double A_00_3 = A_p[INDEX5(k,0,m,0,3, numEq,2,numComp,2)]; | ||
const double A_01_3 = A_p[INDEX5(k,0,m,1,3, numEq,2,numComp,2)]; | ||
const double A_10_3 = A_p[INDEX5(k,1,m,0,3, numEq,2,numComp,2)]; | ||
const double A_11_3 = A_p[INDEX5(k,1,m,1,3, numEq,2,numComp,2)]; | ||
const double tmp0_0 = A_01_0 + A_01_3; | ||
const double tmp1_0 = A_00_0 + A_00_1; | ||
const double tmp2_0 = A_11_0 + A_11_1 + A_11_2 + A_11_3; | ||
const double tmp3_0 = A_00_2 + A_00_3; | ||
const double tmp4_0 = A_10_1 + A_10_2; | ||
const double tmp5_0 = A_00_0 + A_00_1 + A_00_2 + A_00_3; | ||
const double tmp6_0 = A_01_3 + A_10_0; | ||
const double tmp7_0 = A_01_0 + A_10_3; | ||
const double tmp8_0 = A_01_1 + A_01_2 + A_10_1 + A_10_2; | ||
const double tmp9_0 = A_01_0 + A_10_0; | ||
const double tmp10_0 = A_01_3 + A_10_3; | ||
const double tmp11_0 = A_11_1 + A_11_3; | ||
const double tmp12_0 = A_11_0 + A_11_2; | ||
const double tmp13_0 = A_01_2 + A_10_1; | ||
const double tmp14_0 = A_01_0 + A_01_3 + A_10_0 + A_10_3; | ||
const double tmp15_0 = A_01_1 + A_10_2; | ||
const double tmp16_0 = A_10_0 + A_10_3; | ||
const double tmp17_0 = A_01_1 + A_01_2; | ||
const double tmp18_0 = A_01_1 + A_10_1; | ||
const double tmp19_0 = A_01_2 + A_10_2; | ||
const double tmp0_1 = A_10_3*w8; | ||
const double tmp1_1 = tmp0_0*w1; | ||
const double tmp2_1 = A_01_1*w4; | ||
const double tmp3_1 = tmp1_0*w0; | ||
const double tmp4_1 = A_01_2*w7; | ||
const double tmp5_1 = tmp2_0*w3; | ||
const double tmp6_1 = tmp3_0*w6; | ||
const double tmp7_1 = A_10_0*w2; | ||
const double tmp8_1 = tmp4_0*w5; | ||
const double tmp9_1 = tmp2_0*w10; | ||
const double tmp10_1 = tmp5_0*w9; | ||
const double tmp11_1 = tmp6_0*w7; | ||
const double tmp12_1 = tmp7_0*w4; | ||
const double tmp13_1 = tmp8_0*w1; | ||
const double tmp14_1 = A_10_0*w8; | ||
const double tmp15_1 = A_01_2*w4; | ||
const double tmp16_1 = tmp3_0*w0; | ||
const double tmp17_1 = A_01_1*w7; | ||
const double tmp18_1 = tmp1_0*w6; | ||
const double tmp19_1 = A_10_3*w2; | ||
const double tmp20_1 = tmp9_0*w4; | ||
const double tmp21_1 = tmp1_0*w11; | ||
const double tmp22_1 = tmp10_0*w7; | ||
const double tmp23_1 = tmp3_0*w14; | ||
const double tmp24_1 = tmp11_0*w13; | ||
const double tmp25_1 = tmp12_0*w12; | ||
const double tmp26_1 = tmp10_0*w4; | ||
const double tmp27_1 = tmp3_0*w11; | ||
const double tmp28_1 = tmp9_0*w7; | ||
const double tmp29_1 = tmp1_0*w14; | ||
const double tmp30_1 = tmp12_0*w13; | ||
const double tmp31_1 = tmp11_0*w12; | ||
const double tmp32_1 = tmp13_0*w2; | ||
const double tmp33_1 = tmp14_0*w5; | ||
const double tmp34_1 = tmp15_0*w8; | ||
const double tmp35_1 = A_01_0*w8; | ||
const double tmp36_1 = tmp16_0*w1; | ||
const double tmp37_1 = A_10_1*w4; | ||
const double tmp38_1 = tmp5_0*w15; | ||
const double tmp39_1 = A_10_2*w7; | ||
const double tmp40_1 = tmp11_0*w17; | ||
const double tmp41_1 = A_01_3*w2; | ||
const double tmp42_1 = tmp12_0*w16; | ||
const double tmp43_1 = tmp17_0*w5; | ||
const double tmp44_1 = tmp7_0*w7; | ||
const double tmp45_1 = tmp6_0*w4; | ||
const double tmp46_1 = A_01_3*w8; | ||
const double tmp47_1 = A_10_2*w4; | ||
const double tmp48_1 = A_10_1*w7; | ||
const double tmp49_1 = tmp12_0*w17; | ||
const double tmp50_1 = A_01_0*w2; | ||
const double tmp51_1 = tmp11_0*w16; | ||
const double tmp52_1 = tmp18_0*w8; | ||
const double tmp53_1 = tmp19_0*w2; | ||
const double tmp54_1 = tmp13_0*w8; | ||
const double tmp55_1 = tmp15_0*w2; | ||
const double tmp56_1 = tmp19_0*w8; | ||
const double tmp57_1 = tmp18_0*w2; | ||
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp13_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1 + tmp24_1 + tmp25_1; | ||
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp36_1 + tmp37_1 + tmp39_1 + tmp3_1 + tmp43_1 + tmp46_1 + tmp50_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp0_1 + tmp15_1 + tmp17_1 + tmp1_1 + tmp38_1 + tmp49_1 + tmp51_1 + tmp7_1 + tmp8_1; | ||
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp10_1 + tmp32_1 + tmp33_1 + tmp34_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1 + tmp8_1; | ||
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp21_1 + tmp23_1 + tmp30_1 + tmp31_1 + tmp33_1 + tmp56_1 + tmp57_1; | ||
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp10_1 + tmp13_1 + tmp44_1 + tmp45_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1 + tmp40_1 + tmp41_1 + tmp42_1 + tmp43_1; | ||
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp36_1 + tmp38_1 + tmp43_1 + tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | ||
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp24_1 + tmp25_1 + tmp27_1 + tmp29_1 + tmp33_1 + tmp52_1 + tmp53_1; | ||
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp14_1 + tmp15_1 + tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp1_1 + tmp5_1 + tmp8_1; | ||
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp10_1 + tmp33_1 + tmp54_1 + tmp55_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp14_1 + tmp19_1 + tmp1_1 + tmp2_1 + tmp38_1 + tmp40_1 + tmp42_1 + tmp4_1 + tmp8_1; | ||
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp16_1 + tmp18_1 + tmp35_1 + tmp36_1 + tmp41_1 + tmp43_1 + tmp47_1 + tmp48_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp13_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | ||
} | ||
} | ||
} else { // constant data | ||
for (index_t k=0; k<numEq; k++) { | ||
for (index_t m=0; m<numComp; m++) { | ||
const double A_00 = A_p[INDEX4(k,0,m,0, numEq,2, numComp)]; | ||
const double A_01 = A_p[INDEX4(k,0,m,1, numEq,2, numComp)]; | ||
const double A_10 = A_p[INDEX4(k,1,m,0, numEq,2, numComp)]; | ||
const double A_11 = A_p[INDEX4(k,1,m,1, numEq,2, numComp)]; | ||
const double tmp0_0 = A_01 + A_10; | ||
const double tmp0_1 = A_00*w18; | ||
const double tmp1_1 = A_01*w19; | ||
const double tmp2_1 = A_10*w20; | ||
const double tmp3_1 = A_11*w21; | ||
const double tmp4_1 = A_00*w22; | ||
const double tmp5_1 = tmp0_0*w19; | ||
const double tmp6_1 = A_11*w23; | ||
const double tmp7_1 = A_11*w25; | ||
const double tmp8_1 = A_00*w24; | ||
const double tmp9_1 = tmp0_0*w20; | ||
const double tmp10_1 = A_01*w20; | ||
const double tmp11_1 = A_11*w27; | ||
const double tmp12_1 = A_00*w26; | ||
const double tmp13_1 = A_10*w19; | ||
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp5_1 + tmp7_1 + tmp8_1; | ||
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; | ||
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; | ||
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp4_1 + tmp6_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp4_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; | ||
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; | ||
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp4_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp4_1 + tmp6_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; | ||
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; | ||
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp5_1 + tmp7_1 + tmp8_1; | ||
} | ||
} | ||
} | ||
} | ||
/////////////// | ||
// process B // | ||
/////////////// | ||
if (!B.isEmpty()) { | ||
add_EM_S=true; | ||
const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); | ||
if (B.actsExpanded()) { | ||
for (index_t k=0; k<numEq; k++) { | ||
for (index_t m=0; m<numComp; m++) { | ||
const double B_0_0 = B_p[INDEX4(k,0,m,0, numEq,2,numComp)]; | ||
const double B_1_0 = B_p[INDEX4(k,1,m,0, numEq,2,numComp)]; | ||
const double B_0_1 = B_p[INDEX4(k,0,m,1, numEq,2,numComp)]; | ||
const double B_1_1 = B_p[INDEX4(k,1,m,1, numEq,2,numComp)]; | ||
const double B_0_2 = B_p[INDEX4(k,0,m,2, numEq,2,numComp)]; | ||
const double B_1_2 = B_p[INDEX4(k,1,m,2, numEq,2,numComp)]; | ||
const double B_0_3 = B_p[INDEX4(k,0,m,3, numEq,2,numComp)]; | ||
const double B_1_3 = B_p[INDEX4(k,1,m,3, numEq,2,numComp)]; | ||
const double tmp0_0 = B_1_0 + B_1_1; | ||
const double tmp1_0 = B_1_2 + B_1_3; | ||
const double tmp2_0 = B_0_1 + B_0_3; | ||
const double tmp3_0 = B_0_0 + B_0_2; | ||
const double tmp63_1 = B_1_1*w42; | ||
const double tmp79_1 = B_1_1*w40; | ||
const double tmp37_1 = tmp3_0*w35; | ||
const double tmp8_1 = tmp0_0*w32; | ||
const double tmp71_1 = B_0_1*w34; | ||
const double tmp19_1 = B_0_3*w31; | ||
const double tmp15_1 = B_0_3*w34; | ||
const double tmp9_1 = tmp3_0*w34; | ||
const double tmp35_1 = B_1_0*w36; | ||
const double tmp66_1 = B_0_3*w28; | ||
const double tmp28_1 = B_1_0*w42; | ||
const double tmp22_1 = B_1_0*w40; | ||
const double tmp16_1 = B_1_2*w29; | ||
const double tmp6_1 = tmp2_0*w35; | ||
const double tmp55_1 = B_1_3*w40; | ||
const double tmp50_1 = B_1_3*w42; | ||
const double tmp7_1 = tmp1_0*w29; | ||
const double tmp1_1 = tmp1_0*w32; | ||
const double tmp57_1 = B_0_3*w30; | ||
const double tmp18_1 = B_1_1*w32; | ||
const double tmp53_1 = B_1_0*w41; | ||
const double tmp61_1 = B_1_3*w36; | ||
const double tmp27_1 = B_0_3*w38; | ||
const double tmp64_1 = B_0_2*w30; | ||
const double tmp76_1 = B_0_1*w38; | ||
const double tmp39_1 = tmp2_0*w34; | ||
const double tmp62_1 = B_0_1*w31; | ||
const double tmp56_1 = B_0_0*w31; | ||
const double tmp49_1 = B_1_1*w36; | ||
const double tmp2_1 = B_0_2*w31; | ||
const double tmp23_1 = B_0_2*w33; | ||
const double tmp38_1 = B_1_1*w43; | ||
const double tmp74_1 = B_1_2*w41; | ||
const double tmp43_1 = B_1_1*w41; | ||
const double tmp58_1 = B_0_2*w28; | ||
const double tmp67_1 = B_0_0*w33; | ||
const double tmp33_1 = tmp0_0*w39; | ||
const double tmp4_1 = B_0_0*w28; | ||
const double tmp20_1 = B_0_0*w30; | ||
const double tmp13_1 = B_0_2*w38; | ||
const double tmp65_1 = B_1_2*w43; | ||
const double tmp0_1 = tmp0_0*w29; | ||
const double tmp41_1 = tmp3_0*w33; | ||
const double tmp73_1 = B_0_2*w37; | ||
const double tmp69_1 = B_0_0*w38; | ||
const double tmp48_1 = B_1_2*w39; | ||
const double tmp59_1 = B_0_1*w33; | ||
const double tmp17_1 = B_1_3*w41; | ||
const double tmp5_1 = B_0_3*w33; | ||
const double tmp3_1 = B_0_1*w30; | ||
const double tmp21_1 = B_0_1*w28; | ||
const double tmp42_1 = B_1_0*w29; | ||
const double tmp54_1 = B_1_2*w32; | ||
const double tmp60_1 = B_1_0*w39; | ||
const double tmp32_1 = tmp1_0*w36; | ||
const double tmp10_1 = B_0_1*w37; | ||
const double tmp14_1 = B_0_0*w35; | ||
const double tmp29_1 = B_0_1*w35; | ||
const double tmp26_1 = B_1_2*w36; | ||
const double tmp30_1 = B_1_3*w43; | ||
const double tmp70_1 = B_0_2*w35; | ||
const double tmp34_1 = B_1_3*w39; | ||
const double tmp51_1 = B_1_0*w43; | ||
const double tmp31_1 = B_0_2*w34; | ||
const double tmp45_1 = tmp3_0*w28; | ||
const double tmp11_1 = tmp1_0*w39; | ||
const double tmp52_1 = B_1_1*w29; | ||
const double tmp44_1 = B_1_3*w32; | ||
const double tmp25_1 = B_1_1*w39; | ||
const double tmp47_1 = tmp2_0*w33; | ||
const double tmp72_1 = B_1_3*w29; | ||
const double tmp40_1 = tmp2_0*w28; | ||
const double tmp46_1 = B_1_2*w40; | ||
const double tmp36_1 = B_1_2*w42; | ||
const double tmp24_1 = B_0_0*w37; | ||
const double tmp77_1 = B_0_3*w35; | ||
const double tmp68_1 = B_0_3*w37; | ||
const double tmp78_1 = B_0_0*w34; | ||
const double tmp12_1 = tmp0_0*w36; | ||
const double tmp75_1 = B_1_0*w32; | ||
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; | ||
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; | ||
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp45_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | ||
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp6_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; | ||
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; | ||
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; | ||
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; | ||
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; | ||
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | ||
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp40_1 + tmp41_1 + tmp7_1 + tmp8_1; | ||
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp37_1 + tmp39_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; | ||
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp11_1 + tmp12_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; | ||
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | ||
} | ||
} | ||
} else { // constant data | ||
for (index_t k=0; k<numEq; k++) { | ||
for (index_t m=0; m<numComp; m++) { | ||
const double B_0 = B_p[INDEX3(k,0,m, numEq, 2)]; | ||
const double B_1 = B_p[INDEX3(k,1,m, numEq, 2)]; | ||
const double tmp6_1 = B_1*w50; | ||
const double tmp1_1 = B_1*w45; | ||
const double tmp5_1 = B_1*w49; | ||
const double tmp4_1 = B_1*w48; | ||
const double tmp0_1 = B_0*w44; | ||
const double tmp2_1 = B_0*w46; | ||
const double tmp7_1 = B_0*w51; | ||
const double tmp3_1 = B_0*w47; | ||
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp0_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp1_1 + tmp3_1; | ||
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp6_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2_1 + tmp4_1; | ||
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp3_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp4_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp2_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp5_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp1_1 + tmp2_1; | ||
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp0_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp3_1 + tmp4_1; | ||
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp1_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp2_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp4_1; | ||
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp3_1 + tmp6_1; | ||
} | ||
} | ||
} | ||
} | ||
/////////////// | ||
// process C // | ||
/////////////// | ||
if (!C.isEmpty()) { | ||
add_EM_S=true; | ||
const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); | ||
if (C.actsExpanded()) { | ||
for (index_t k=0; k<numEq; k++) { | ||
for (index_t m=0; m<numComp; m++) { | ||
const double C_0_0 = C_p[INDEX4(k,m,0, 0, numEq,numComp,2)]; | ||
const double C_1_0 = C_p[INDEX4(k,m,1, 0, numEq,numComp,2)]; | ||
const double C_0_1 = C_p[INDEX4(k,m,0, 1, numEq,numComp,2)]; | ||
const double C_1_1 = C_p[INDEX4(k,m,1, 1, numEq,numComp,2)]; | ||
const double C_0_2 = C_p[INDEX4(k,m,0, 2, numEq,numComp,2)]; | ||
const double C_1_2 = C_p[INDEX4(k,m,1, 2, numEq,numComp,2)]; | ||
const double C_0_3 = C_p[INDEX4(k,m,0, 3, numEq,numComp,2)]; | ||
const double C_1_3 = C_p[INDEX4(k,m,1, 3, numEq,numComp,2)]; | ||
const double tmp2_0 = C_0_1 + C_0_3; | ||
const double tmp1_0 = C_1_2 + C_1_3; | ||
const double tmp3_0 = C_0_0 + C_0_2; | ||
const double tmp0_0 = C_1_0 + C_1_1; | ||
const double tmp64_1 = C_0_2*w30; | ||
const double tmp14_1 = C_0_2*w28; | ||
const double tmp19_1 = C_0_3*w31; | ||
const double tmp22_1 = C_1_0*w40; | ||
const double tmp37_1 = tmp3_0*w35; | ||
const double tmp29_1 = C_0_1*w35; | ||
const double tmp73_1 = C_0_2*w37; | ||
const double tmp74_1 = C_1_2*w41; | ||
const double tmp52_1 = C_1_3*w39; | ||
const double tmp25_1 = C_1_1*w39; | ||
const double tmp62_1 = C_0_1*w31; | ||
const double tmp79_1 = C_1_1*w40; | ||
const double tmp43_1 = C_1_1*w36; | ||
const double tmp27_1 = C_0_3*w38; | ||
const double tmp28_1 = C_1_0*w42; | ||
const double tmp63_1 = C_1_1*w42; | ||
const double tmp59_1 = C_0_3*w34; | ||
const double tmp72_1 = C_1_3*w29; | ||
const double tmp40_1 = tmp2_0*w35; | ||
const double tmp13_1 = C_0_3*w30; | ||
const double tmp51_1 = C_1_2*w40; | ||
const double tmp54_1 = C_1_2*w42; | ||
const double tmp12_1 = C_0_0*w31; | ||
const double tmp2_1 = tmp1_0*w32; | ||
const double tmp68_1 = C_0_2*w31; | ||
const double tmp75_1 = C_1_0*w32; | ||
const double tmp49_1 = C_1_1*w41; | ||
const double tmp4_1 = C_0_2*w35; | ||
const double tmp66_1 = C_0_3*w28; | ||
const double tmp56_1 = C_0_1*w37; | ||
const double tmp5_1 = C_0_1*w34; | ||
const double tmp38_1 = tmp2_0*w34; | ||
const double tmp76_1 = C_0_1*w38; | ||
const double tmp21_1 = C_0_1*w28; | ||
const double tmp69_1 = C_0_1*w30; | ||
const double tmp53_1 = C_1_0*w36; | ||
const double tmp42_1 = C_1_2*w39; | ||
const double tmp32_1 = tmp1_0*w29; | ||
const double tmp45_1 = C_1_0*w43; | ||
const double tmp33_1 = tmp0_0*w32; | ||
const double tmp35_1 = C_1_0*w41; | ||
const double tmp26_1 = C_1_2*w36; | ||
const double tmp67_1 = C_0_0*w33; | ||
const double tmp31_1 = C_0_2*w34; | ||
const double tmp20_1 = C_0_0*w30; | ||
const double tmp70_1 = C_0_0*w28; | ||
const double tmp8_1 = tmp0_0*w39; | ||
const double tmp30_1 = C_1_3*w43; | ||
const double tmp0_1 = tmp0_0*w29; | ||
const double tmp17_1 = C_1_3*w41; | ||
const double tmp58_1 = C_0_0*w35; | ||
const double tmp9_1 = tmp3_0*w33; | ||
const double tmp61_1 = C_1_3*w36; | ||
const double tmp41_1 = tmp3_0*w34; | ||
const double tmp50_1 = C_1_3*w32; | ||
const double tmp18_1 = C_1_1*w32; | ||
const double tmp6_1 = tmp1_0*w36; | ||
const double tmp3_1 = C_0_0*w38; | ||
const double tmp34_1 = C_1_1*w29; | ||
const double tmp77_1 = C_0_3*w35; | ||
const double tmp65_1 = C_1_2*w43; | ||
const double tmp71_1 = C_0_3*w33; | ||
const double tmp55_1 = C_1_1*w43; | ||
const double tmp46_1 = tmp3_0*w28; | ||
const double tmp24_1 = C_0_0*w37; | ||
const double tmp10_1 = tmp1_0*w39; | ||
const double tmp48_1 = C_1_0*w29; | ||
const double tmp15_1 = C_0_1*w33; | ||
const double tmp36_1 = C_1_2*w32; | ||
const double tmp60_1 = C_1_0*w39; | ||
const double tmp47_1 = tmp2_0*w33; | ||
const double tmp16_1 = C_1_2*w29; | ||
const double tmp1_1 = C_0_3*w37; | ||
const double tmp7_1 = tmp2_0*w28; | ||
const double tmp39_1 = C_1_3*w40; | ||
const double tmp44_1 = C_1_3*w42; | ||
const double tmp57_1 = C_0_2*w38; | ||
const double tmp78_1 = C_0_0*w34; | ||
const double tmp11_1 = tmp0_0*w36; | ||
const double tmp23_1 = C_0_2*w33; | ||
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | ||
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; | ||
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | ||
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp7_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; | ||
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; | ||
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; | ||
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | ||
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp37_1 + tmp38_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; | ||
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; | ||
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; | ||
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp2_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; | ||
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp40_1 + tmp41_1 + tmp6_1 + tmp8_1; | ||
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; | ||
} | ||
} | ||
} else { // constant data | ||
for (index_t k=0; k<numEq; k++) { | ||
for (index_t m=0; m<numComp; m++) { | ||
const double C_0 = C_p[INDEX3(k, m, 0, numEq, numComp)]; | ||
const double C_1 = C_p[INDEX3(k, m, 1, numEq, numComp)]; | ||
const double tmp1_1 = C_1*w45; | ||
const double tmp3_1 = C_0*w51; | ||
const double tmp4_1 = C_0*w44; | ||
const double tmp7_1 = C_0*w46; | ||
const double tmp5_1 = C_1*w49; | ||
const double tmp2_1 = C_1*w48; | ||
const double tmp0_1 = C_0*w47; | ||
const double tmp6_1 = C_1*w50; | ||
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp2_1 + tmp4_1; | ||
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp4_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp0_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp1_1 + tmp3_1; | ||
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp5_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp1_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp3_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp3_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp6_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp2_1; | ||
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp4_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp1_1 + tmp4_1; | ||
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp2_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp0_1 + tmp5_1; | ||
} | ||
} | ||
} | ||
} | ||
/////////////// | ||
// process D // | ||
/////////////// | ||
if (!D.isEmpty()) { | ||
add_EM_S=true; | ||
const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); | ||
if (D.actsExpanded()) { | ||
for (index_t k=0; k<numEq; k++) { | ||
for (index_t m=0; m<numComp; m++) { | ||
const double D_0 = D_p[INDEX3(k, m, 0, numEq, numComp)]; | ||
const double D_1 = D_p[INDEX3(k, m, 1, numEq, numComp)]; | ||
const double D_2 = D_p[INDEX3(k, m, 2, numEq, numComp)]; | ||
const double D_3 = D_p[INDEX3(k, m, 3, numEq, numComp)]; | ||
const double tmp4_0 = D_1 + D_3; | ||
const double tmp2_0 = D_0 + D_1 + D_2 + D_3; | ||
const double tmp5_0 = D_0 + D_2; | ||
const double tmp0_0 = D_0 + D_1; | ||
const double tmp6_0 = D_0 + D_3; | ||
const double tmp1_0 = D_2 + D_3; | ||
const double tmp3_0 = D_1 + D_2; | ||
const double tmp16_1 = D_1*w56; | ||
const double tmp14_1 = tmp6_0*w54; | ||
const double tmp8_1 = D_3*w55; | ||
const double tmp2_1 = tmp2_0*w54; | ||
const double tmp12_1 = tmp5_0*w52; | ||
const double tmp4_1 = tmp0_0*w53; | ||
const double tmp3_1 = tmp1_0*w52; | ||
const double tmp13_1 = tmp4_0*w53; | ||
const double tmp10_1 = tmp4_0*w52; | ||
const double tmp1_1 = tmp1_0*w53; | ||
const double tmp7_1 = D_3*w56; | ||
const double tmp0_1 = tmp0_0*w52; | ||
const double tmp11_1 = tmp5_0*w53; | ||
const double tmp9_1 = D_0*w56; | ||
const double tmp5_1 = tmp3_0*w54; | ||
const double tmp18_1 = D_2*w56; | ||
const double tmp17_1 = D_1*w55; | ||
const double tmp6_1 = D_0*w55; | ||
const double tmp15_1 = D_2*w55; | ||
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2_1; | ||
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp3_1 + tmp4_1; | ||
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp5_1 + tmp6_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp5_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2_1; | ||
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp10_1 + tmp11_1; | ||
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp2_1; | ||
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp12_1 + tmp13_1; | ||
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp12_1 + tmp13_1; | ||
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp10_1 + tmp11_1; | ||
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp3_1 + tmp4_1; | ||
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp14_1 + tmp15_1 + tmp16_1; | ||
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp2_1; | ||
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp14_1 + tmp17_1 + tmp18_1; | ||
} | ||
} | ||
} else { // constant data | ||
for (index_t k=0; k<numEq; k++) { | ||
for (index_t m=0; m<numComp; m++) { | ||
const double D_0 = D_p[INDEX2(k, m, numEq)]; | ||
const double tmp2_1 = D_0*w59; | ||
const double tmp1_1 = D_0*w58; | ||
const double tmp0_1 = D_0*w57; | ||
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1; | ||
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp1_1; | ||
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0_1; | ||
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp2_1; | ||
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp2_1; | ||
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp1_1; | ||
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp0_1; | ||
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp1_1; | ||
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp0_1; | ||
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp0_1; | ||
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp0_1; | ||
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1; | ||
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp2_1; | ||
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1; | ||
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp1_1; | ||
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp2_1; | ||
} | ||
} | ||
} | ||
} | ||
/////////////// | ||
// process X // | ||
/////////////// | ||
if (!X.isEmpty()) { | ||
add_EM_F=true; | ||
const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); | ||
if (X.actsExpanded()) { | ||
for (index_t k=0; k<numEq; k++) { | ||
const double X_0_0 = X_p[INDEX3(k, 0, 0, numEq, 2)]; | ||
const double X_1_0 = X_p[INDEX3(k, 1, 0, numEq, 2)]; | ||
const double X_0_1 = X_p[INDEX3(k, 0, 1, numEq, 2)]; | ||
const double X_1_1 = X_p[INDEX3(k, 1, 1, numEq, 2)]; | ||
const double X_0_2 = X_p[INDEX3(k, 0, 2, numEq, 2)]; | ||
const double X_1_2 = X_p[INDEX3(k, 1, 2, numEq, 2)]; | ||
const double X_0_3 = X_p[INDEX3(k, 0, 3, numEq, 2)]; | ||
const double X_1_3 = X_p[INDEX3(k, 1, 3, numEq, 2)]; | ||
const double tmp1_0 = X_1_1 + X_1_3; | ||
const double tmp3_0 = X_0_0 + X_0_1; | ||
const double tmp2_0 = X_1_0 + X_1_2; | ||
const double tmp0_0 = X_0_2 + X_0_3; | ||
const double tmp8_1 = tmp2_0*w66; | ||
const double tmp5_1 = tmp3_0*w64; | ||
const double tmp14_1 = tmp0_0*w64; | ||
const double tmp3_1 = tmp3_0*w60; | ||
const double tmp9_1 = tmp3_0*w63; | ||
const double tmp13_1 = tmp3_0*w65; | ||
const double tmp12_1 = tmp1_0*w66; | ||
const double tmp10_1 = tmp0_0*w60; | ||
const double tmp2_1 = tmp2_0*w61; | ||
const double tmp6_1 = tmp2_0*w62; | ||
const double tmp4_1 = tmp0_0*w65; | ||
const double tmp11_1 = tmp1_0*w67; | ||
const double tmp1_1 = tmp1_0*w62; | ||
const double tmp7_1 = tmp1_0*w61; | ||
const double tmp0_1 = tmp0_0*w63; | ||
  |