Parent Directory
|
Revision Log
|
Patch
revision 4013 by caltinay, Thu Oct 4 03:13:27 2012 UTC | revision 4765 by sshaw, Wed Mar 19 00:17:16 2014 UTC | |
---|---|---|
# | Line 1 | Line 1 |
1 | ||
2 | /***************************************************************************** | /***************************************************************************** |
3 | * | * |
4 | * Copyright (c) 2003-2012 by University of Queensland | * Copyright (c) 2003-2014 by University of Queensland |
5 | * http://www.uq.edu.au | * http://www.uq.edu.au |
6 | * | * |
7 | * Primary Business: Queensland, Australia | * Primary Business: Queensland, Australia |
# | Line 9 | Line 9 |
9 | * http://www.opensource.org/licenses/osl-3.0.php | * http://www.opensource.org/licenses/osl-3.0.php |
10 | * | * |
11 | * Development until 2012 by Earth Systems Science Computational Center (ESSCC) | * Development until 2012 by Earth Systems Science Computational Center (ESSCC) |
12 | * Development since 2012 by School of Earth Sciences | * Development 2012-2013 by School of Earth Sciences |
13 | * Development from 2014 by Centre for Geoscience Computing (GeoComp) | |
14 | * | * |
15 | *****************************************************************************/ | *****************************************************************************/ |
16 | ||
17 | #include <algorithm> | |
18 | ||
19 | #include <ripley/Rectangle.h> | #include <ripley/Rectangle.h> |
extern "C" { | ||
20 | #include <paso/SystemMatrix.h> | #include <paso/SystemMatrix.h> |
21 | } | #include <esysUtils/esysFileWriter.h> |
22 | #include <ripley/DefaultAssembler2D.h> | |
23 | #include <ripley/WaveAssembler2D.h> | |
24 | #include <ripley/LameAssembler2D.h> | |
25 | #include <ripley/domainhelpers.h> | |
26 | #include <boost/scoped_array.hpp> | |
27 | #include "esysUtils/EsysRandom.h" | |
28 | #include "blocktools.h" | |
29 | ||
30 | #ifdef USE_NETCDF | #ifdef USE_NETCDF |
31 | #include <netcdfcpp.h> | #include <netcdfcpp.h> |
# | Line 32 extern "C" { | Line 41 extern "C" { |
41 | #include <iomanip> | #include <iomanip> |
42 | ||
43 | using namespace std; | using namespace std; |
44 | using esysUtils::FileWriter; | |
45 | ||
46 | namespace ripley { | namespace ripley { |
47 | ||
48 | Rectangle::Rectangle(int n0, int n1, double x0, double y0, double x1, | Rectangle::Rectangle(int n0, int n1, double x0, double y0, double x1, |
49 | double y1, int d0, int d1) : | double y1, int d0, int d1, |
50 | RipleyDomain(2), | const std::vector<double>& points, |
51 | m_x0(x0), | const std::vector<int>& tags, |
52 | m_y0(y0), | const simap_t& tagnamestonums) : |
53 | m_l0(x1-x0), | RipleyDomain(2) |
m_l1(y1-y0) | ||
54 | { | { |
55 | // ignore subdivision parameters for serial run | // ignore subdivision parameters for serial run |
56 | if (m_mpiInfo->size == 1) { | if (m_mpiInfo->size == 1) { |
# | Line 50 Rectangle::Rectangle(int n0, int n1, dou | Line 59 Rectangle::Rectangle(int n0, int n1, dou |
59 | } | } |
60 | ||
61 | bool warn=false; | bool warn=false; |
62 | // if number of subdivisions is non-positive, try to subdivide by the same | std::vector<int> factors; |
63 | // ratio as the number of elements | int ranks = m_mpiInfo->size; |
64 | if (d0<=0 && d1<=0) { | int epr[2] = {n0,n1}; |
65 | warn=true; | int d[2] = {d0,d1}; |
66 | d0=max(1, (int)(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1)))); | if (d0<=0 || d1<=0) { |
67 | d1=m_mpiInfo->size/d0; | for (int i = 0; i < 2; i++) { |
68 | if (d0*d1 != m_mpiInfo->size) { | if (d[i] < 1) { |
69 | // ratios not the same so subdivide side with more elements only | d[i] = 1; |
70 | if (n0>n1) { | continue; |
d0=0; | ||
d1=1; | ||
} else { | ||
d0=1; | ||
d1=0; | ||
71 | } | } |
72 | } | epr[i] = -1; // can no longer be max |
73 | } | //remove |
74 | if (d0<=0) { | if (ranks % d[i] != 0) { |
75 | // d1 is preset, determine d0 - throw further down if result is no good | throw RipleyException("Invalid number of spatial subdivisions"); |
76 | d0=m_mpiInfo->size/d1; | } |
77 | } else if (d1<=0) { | ranks /= d[i]; |
78 | // d0 is preset, determine d1 - throw further down if result is no good | } |
79 | d1=m_mpiInfo->size/d0; | factorise(factors, ranks); |
80 | if (factors.size() != 0) { | |
81 | warn = true; | |
82 | } | |
83 | } | |
84 | ||
85 | while (factors.size() > 0) { | |
86 | int i = epr[0] > epr[1] ? 0 : 1; | |
87 | int f = factors.back(); | |
88 | factors.pop_back(); | |
89 | d[i] *= f; | |
90 | epr[i] /= f; | |
91 | } | } |
92 | d0 = d[0]; d1 = d[1]; | |
93 | m_NX=d0; | |
m_NY=d1; | ||
94 | // ensure number of subdivisions is valid and nodes can be distributed | // ensure number of subdivisions is valid and nodes can be distributed |
95 | // among number of ranks | // among number of ranks |
96 | if (m_NX*m_NY != m_mpiInfo->size) | if (d0*d1 != m_mpiInfo->size) |
97 | throw RipleyException("Invalid number of spatial subdivisions"); | throw RipleyException("Invalid number of spatial subdivisions"); |
98 | ||
99 | if (warn) { | if (warn) { |
# | Line 88 Rectangle::Rectangle(int n0, int n1, dou | Line 101 Rectangle::Rectangle(int n0, int n1, dou |
101 | << d1 << "). This may not be optimal!" << endl; | << d1 << "). This may not be optimal!" << endl; |
102 | } | } |
103 | ||
104 | if ((n0+1)%m_NX > 0) { | double l0 = x1-x0; |
105 | double Dx=m_l0/n0; | double l1 = y1-y0; |
106 | m_dx[0] = l0/n0; | |
107 | m_dx[1] = l1/n1; | |
108 | ||
109 | if ((n0+1)%d0 > 0) { | |
110 | n0=(int)round((float)(n0+1)/d0+0.5)*d0-1; | n0=(int)round((float)(n0+1)/d0+0.5)*d0-1; |
111 | m_l0=Dx*n0; | l0=m_dx[0]*n0; |
112 | cout << "Warning: Adjusted number of elements and length. N0=" | cout << "Warning: Adjusted number of elements and length. N0=" |
113 | << n0 << ", l0=" << m_l0 << endl; | << n0 << ", l0=" << l0 << endl; |
114 | } | } |
115 | if ((n1+1)%m_NY > 0) { | if ((n1+1)%d1 > 0) { |
double Dy=m_l1/n1; | ||
116 | n1=(int)round((float)(n1+1)/d1+0.5)*d1-1; | n1=(int)round((float)(n1+1)/d1+0.5)*d1-1; |
117 | m_l1=Dy*n1; | l1=m_dx[1]*n1; |
118 | cout << "Warning: Adjusted number of elements and length. N1=" | cout << "Warning: Adjusted number of elements and length. N1=" |
119 | << n1 << ", l1=" << m_l1 << endl; | << n1 << ", l1=" << l1 << endl; |
120 | } | } |
121 | ||
122 | m_gNE0=n0; | if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2)) |
m_gNE1=n1; | ||
if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2)) | ||
123 | throw RipleyException("Too few elements for the number of ranks"); | throw RipleyException("Too few elements for the number of ranks"); |
124 | ||
125 | m_gNE[0] = n0; | |
126 | m_gNE[1] = n1; | |
127 | m_origin[0] = x0; | |
128 | m_origin[1] = y0; | |
129 | m_length[0] = l0; | |
130 | m_length[1] = l1; | |
131 | m_NX[0] = d0; | |
132 | m_NX[1] = d1; | |
133 | ||
134 | // local number of elements (with and without overlap) | // local number of elements (with and without overlap) |
135 | m_NE0 = m_ownNE0 = (m_NX>1 ? (n0+1)/m_NX : n0); | m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0); |
136 | if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1) | if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1) |
137 | m_NE0++; | m_NE[0]++; |
138 | else if (m_NX>1 && m_mpiInfo->rank%m_NX==m_NX-1) | else if (d0>1 && m_mpiInfo->rank%d0==d0-1) |
139 | m_ownNE0--; | m_ownNE[0]--; |
140 | ||
141 | m_NE1 = m_ownNE1 = (m_NY>1 ? (n1+1)/m_NY : n1); | m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1); |
142 | if (m_mpiInfo->rank/m_NX>0 && m_mpiInfo->rank/m_NX<m_NY-1) | if (m_mpiInfo->rank/d0>0 && m_mpiInfo->rank/d0<d1-1) |
143 | m_NE1++; | m_NE[1]++; |
144 | else if (m_NY>1 && m_mpiInfo->rank/m_NX==m_NY-1) | else if (d1>1 && m_mpiInfo->rank/d0==d1-1) |
145 | m_ownNE1--; | m_ownNE[1]--; |
146 | ||
147 | // local number of nodes | // local number of nodes |
148 | m_N0 = m_NE0+1; | m_NN[0] = m_NE[0]+1; |
149 | m_N1 = m_NE1+1; | m_NN[1] = m_NE[1]+1; |
150 | ||
151 | // bottom-left node is at (offset0,offset1) in global mesh | // bottom-left node is at (offset0,offset1) in global mesh |
152 | m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX); | m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0); |
153 | if (m_offset0 > 0) | if (m_offset[0] > 0) |
154 | m_offset0--; | m_offset[0]--; |
155 | m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank/m_NX); | m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank/d0); |
156 | if (m_offset1 > 0) | if (m_offset[1] > 0) |
157 | m_offset1--; | m_offset[1]--; |
158 | ||
159 | populateSampleIds(); | populateSampleIds(); |
160 | createPattern(); | createPattern(); |
161 | assembler = new DefaultAssembler2D(this, m_dx, m_NX, m_NE, m_NN); | |
162 | for (map<string, int>::const_iterator i = tagnamestonums.begin(); | |
163 | i != tagnamestonums.end(); i++) { | |
164 | setTagMap(i->first, i->second); | |
165 | } | |
166 | addPoints(tags.size(), &points[0], &tags[0]); | |
167 | } | } |
168 | ||
169 | Rectangle::~Rectangle() | Rectangle::~Rectangle() |
170 | { | { |
171 | Paso_SystemMatrixPattern_free(m_pattern); | Paso_SystemMatrixPattern_free(m_pattern); |
172 | Paso_Connector_free(m_connector); | Paso_Connector_free(m_connector); |
173 | delete assembler; | |
174 | } | } |
175 | ||
176 | string Rectangle::getDescription() const | string Rectangle::getDescription() const |
# | Line 154 bool Rectangle::operator==(const Abstrac | Line 183 bool Rectangle::operator==(const Abstrac |
183 | const Rectangle* o=dynamic_cast<const Rectangle*>(&other); | const Rectangle* o=dynamic_cast<const Rectangle*>(&other); |
184 | if (o) { | if (o) { |
185 | return (RipleyDomain::operator==(other) && | return (RipleyDomain::operator==(other) && |
186 | m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 | m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1] |
187 | && m_x0==o->m_x0 && m_y0==o->m_y0 | && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1] |
188 | && m_l0==o->m_l0 && m_l1==o->m_l1 | && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1] |
189 | && m_NX==o->m_NX && m_NY==o->m_NY); | && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1]); |
190 | } | } |
191 | ||
192 | return false; | return false; |
193 | } | } |
194 | ||
195 | void Rectangle::readNcGrid(escript::Data& out, string filename, string varname, | void Rectangle::readNcGrid(escript::Data& out, string filename, string varname, |
196 | const vector<int>& first, const vector<int>& numValues) const | const ReaderParameters& params) const |
197 | { | { |
198 | #ifdef USE_NETCDF | #ifdef USE_NETCDF |
199 | // check destination function space | // check destination function space |
200 | int myN0, myN1; | int myN0, myN1; |
201 | if (out.getFunctionSpace().getTypeCode() == Nodes) { | if (out.getFunctionSpace().getTypeCode() == Nodes) { |
202 | myN0 = m_N0; | myN0 = m_NN[0]; |
203 | myN1 = m_N1; | myN1 = m_NN[1]; |
204 | } else if (out.getFunctionSpace().getTypeCode() == Elements || | } else if (out.getFunctionSpace().getTypeCode() == Elements || |
205 | out.getFunctionSpace().getTypeCode() == ReducedElements) { | out.getFunctionSpace().getTypeCode() == ReducedElements) { |
206 | myN0 = m_NE0; | myN0 = m_NE[0]; |
207 | myN1 = m_NE1; | myN1 = m_NE[1]; |
208 | } else | } else |
209 | throw RipleyException("readNcGrid(): invalid function space for output data object"); | throw RipleyException("readNcGrid(): invalid function space for output data object"); |
210 | ||
211 | if (first.size() != 2) | if (params.first.size() != 2) |
212 | throw RipleyException("readNcGrid(): argument 'first' must have 2 entries"); | throw RipleyException("readNcGrid(): argument 'first' must have 2 entries"); |
213 | ||
214 | if (numValues.size() != 2) | if (params.numValues.size() != 2) |
215 | throw RipleyException("readNcGrid(): argument 'numValues' must have 2 entries"); | throw RipleyException("readNcGrid(): argument 'numValues' must have 2 entries"); |
216 | ||
217 | if (params.multiplier.size() != 2) | |
218 | throw RipleyException("readNcGrid(): argument 'multiplier' must have 2 entries"); | |
219 | for (size_t i=0; i<params.multiplier.size(); i++) | |
220 | if (params.multiplier[i]<1) | |
221 | throw RipleyException("readNcGrid(): all multipliers must be positive"); | |
222 | if (params.reverse.size() != 2) | |
223 | throw RipleyException("readNcGrid(): argument 'reverse' must have 2 entries"); | |
224 | ||
225 | // check file existence and size | // check file existence and size |
226 | NcFile f(filename.c_str(), NcFile::ReadOnly); | NcFile f(filename.c_str(), NcFile::ReadOnly); |
227 | if (!f.is_valid()) | if (!f.is_valid()) |
# | Line 200 void Rectangle::readNcGrid(escript::Data | Line 237 void Rectangle::readNcGrid(escript::Data |
237 | throw RipleyException("readNcGrid(): only scalar data supported"); | throw RipleyException("readNcGrid(): only scalar data supported"); |
238 | ||
239 | const int dims = var->num_dims(); | const int dims = var->num_dims(); |
240 | const long *edges = var->edges(); | boost::scoped_array<long> edges(var->edges()); |
241 | ||
242 | // is this a slice of the data object (dims!=2)? | // is this a slice of the data object (dims!=2)? |
243 | // note the expected ordering of edges (as in numpy: y,x) | // note the expected ordering of edges (as in numpy: y,x) |
244 | if ( (dims==2 && (numValues[1] > edges[0] || numValues[0] > edges[1])) | if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1])) |
245 | || (dims==1 && numValues[1]>1) ) { | || (dims==1 && params.numValues[1]>1) ) { |
246 | throw RipleyException("readNcGrid(): not enough data in file"); | throw RipleyException("readNcGrid(): not enough data in file"); |
247 | } | } |
248 | ||
249 | // check if this rank contributes anything | // check if this rank contributes anything |
250 | if (first[0] >= m_offset0+myN0 || first[0]+numValues[0] <= m_offset0 || | if (params.first[0] >= m_offset[0]+myN0 || |
251 | first[1] >= m_offset1+myN1 || first[1]+numValues[1] <= m_offset1) | params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] || |
252 | params.first[1] >= m_offset[1]+myN1 || | |
253 | params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1]) | |
254 | return; | return; |
255 | ||
256 | // now determine how much this rank has to write | // now determine how much this rank has to write |
257 | ||
258 | // first coordinates in data object to write to | // first coordinates in data object to write to |
259 | const int first0 = max(0, first[0]-m_offset0); | const int first0 = max(0, params.first[0]-m_offset[0]); |
260 | const int first1 = max(0, first[1]-m_offset1); | const int first1 = max(0, params.first[1]-m_offset[1]); |
261 | // indices to first value in file | // indices to first value in file (not accounting for reverse yet) |
262 | const int idx0 = max(0, m_offset0-first[0]); | int idx0 = max(0, m_offset[0]-params.first[0]); |
263 | const int idx1 = max(0, m_offset1-first[1]); | int idx1 = max(0, m_offset[1]-params.first[1]); |
264 | // number of values to write | // number of values to read |
265 | const int num0 = min(numValues[0]-idx0, myN0-first0); | const int num0 = min(params.numValues[0]-idx0, myN0-first0); |
266 | const int num1 = min(numValues[1]-idx1, myN1-first1); | const int num1 = min(params.numValues[1]-idx1, myN1-first1); |
267 | ||
268 | // make sure we read the right block if going backwards through file | |
269 | if (params.reverse[0]) | |
270 | idx0 = edges[dims-1]-num0-idx0; | |
271 | if (dims>1 && params.reverse[1]) | |
272 | idx1 = edges[dims-2]-num1-idx1; | |
273 | ||
274 | vector<double> values(num0*num1); | vector<double> values(num0*num1); |
275 | if (dims==2) { | if (dims==2) { |
# | Line 238 void Rectangle::readNcGrid(escript::Data | Line 283 void Rectangle::readNcGrid(escript::Data |
283 | const int dpp = out.getNumDataPointsPerSample(); | const int dpp = out.getNumDataPointsPerSample(); |
284 | out.requireWrite(); | out.requireWrite(); |
285 | ||
286 | // helpers for reversing | |
287 | const int x0 = (params.reverse[0] ? num0-1 : 0); | |
288 | const int x_mult = (params.reverse[0] ? -1 : 1); | |
289 | const int y0 = (params.reverse[1] ? num1-1 : 0); | |
290 | const int y_mult = (params.reverse[1] ? -1 : 1); | |
291 | ||
292 | for (index_t y=0; y<num1; y++) { | for (index_t y=0; y<num1; y++) { |
293 | #pragma omp parallel for | #pragma omp parallel for |
294 | for (index_t x=0; x<num0; x++) { | for (index_t x=0; x<num0; x++) { |
295 | const int dataIndex = (first1+y)*myN0+first0+x; | const int baseIndex = first0+x*params.multiplier[0] |
296 | const int srcIndex=y*num0+x; | +(first1+y*params.multiplier[1])*myN0; |
297 | double* dest = out.getSampleDataRW(dataIndex); | const int srcIndex = (y0+y_mult*y)*num0+(x0+x_mult*x); |
298 | for (index_t q=0; q<dpp; q++) { | if (!isnan(values[srcIndex])) { |
299 | *dest++ = values[srcIndex]; | for (index_t m1=0; m1<params.multiplier[1]; m1++) { |
300 | for (index_t m0=0; m0<params.multiplier[0]; m0++) { | |
301 | const int dataIndex = baseIndex+m0+m1*myN0; | |
302 | double* dest = out.getSampleDataRW(dataIndex); | |
303 | for (index_t q=0; q<dpp; q++) { | |
304 | *dest++ = values[srcIndex]; | |
305 | } | |
306 | } | |
307 | } | |
308 | } | } |
309 | } | } |
310 | } | } |
# | Line 255 void Rectangle::readNcGrid(escript::Data | Line 314 void Rectangle::readNcGrid(escript::Data |
314 | } | } |
315 | ||
316 | void Rectangle::readBinaryGrid(escript::Data& out, string filename, | void Rectangle::readBinaryGrid(escript::Data& out, string filename, |
317 | const vector<int>& first, const vector<int>& numValues) const | const ReaderParameters& params) const |
318 | { | |
319 | // the mapping is not universally correct but should work on our | |
320 | // supported platforms | |
321 | switch (params.dataType) { | |
322 | case DATATYPE_INT32: | |
323 | readBinaryGridImpl<int>(out, filename, params); | |
324 | break; | |
325 | case DATATYPE_FLOAT32: | |
326 | readBinaryGridImpl<float>(out, filename, params); | |
327 | break; | |
328 | case DATATYPE_FLOAT64: | |
329 | readBinaryGridImpl<double>(out, filename, params); | |
330 | break; | |
331 | default: | |
332 | throw RipleyException("readBinaryGrid(): invalid or unsupported datatype"); | |
333 | } | |
334 | } | |
335 | ||
336 | void Rectangle::readBinaryGridFromZipped(escript::Data& out, string filename, | |
337 | const ReaderParameters& params) const | |
338 | { | |
339 | // the mapping is not universally correct but should work on our | |
340 | // supported platforms | |
341 | switch (params.dataType) { | |
342 | case DATATYPE_INT32: | |
343 | readBinaryGridZippedImpl<int>(out, filename, params); | |
344 | break; | |
345 | case DATATYPE_FLOAT32: | |
346 | readBinaryGridZippedImpl<float>(out, filename, params); | |
347 | break; | |
348 | case DATATYPE_FLOAT64: | |
349 | readBinaryGridZippedImpl<double>(out, filename, params); | |
350 | break; | |
351 | default: | |
352 | throw RipleyException("readBinaryGridFromZipped(): invalid or unsupported datatype"); | |
353 | } | |
354 | } | |
355 | ||
356 | template<typename ValueType> | |
357 | void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename, | |
358 | const ReaderParameters& params) const | |
359 | { | { |
360 | // check destination function space | // check destination function space |
361 | int myN0, myN1; | int myN0, myN1; |
362 | if (out.getFunctionSpace().getTypeCode() == Nodes) { | if (out.getFunctionSpace().getTypeCode() == Nodes) { |
363 | myN0 = m_N0; | myN0 = m_NN[0]; |
364 | myN1 = m_N1; | myN1 = m_NN[1]; |
365 | } else if (out.getFunctionSpace().getTypeCode() == Elements || | } else if (out.getFunctionSpace().getTypeCode() == Elements || |
366 | out.getFunctionSpace().getTypeCode() == ReducedElements) { | out.getFunctionSpace().getTypeCode() == ReducedElements) { |
367 | myN0 = m_NE0; | myN0 = m_NE[0]; |
368 | myN1 = m_NE1; | myN1 = m_NE[1]; |
369 | } else | } else |
370 | throw RipleyException("readBinaryGrid(): invalid function space for output data object"); | throw RipleyException("readBinaryGrid(): invalid function space for output data object"); |
371 | ||
# | Line 277 void Rectangle::readBinaryGrid(escript:: | Line 377 void Rectangle::readBinaryGrid(escript:: |
377 | f.seekg(0, ios::end); | f.seekg(0, ios::end); |
378 | const int numComp = out.getDataPointSize(); | const int numComp = out.getDataPointSize(); |
379 | const int filesize = f.tellg(); | const int filesize = f.tellg(); |
380 | const int reqsize = numValues[0]*numValues[1]*numComp*sizeof(float); | const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType); |
381 | if (filesize < reqsize) { | if (filesize < reqsize) { |
382 | f.close(); | f.close(); |
383 | throw RipleyException("readBinaryGrid(): not enough data in file"); | throw RipleyException("readBinaryGrid(): not enough data in file"); |
384 | } | } |
385 | ||
386 | // check if this rank contributes anything | // check if this rank contributes anything |
387 | if (first[0] >= m_offset0+myN0 || first[0]+numValues[0] <= m_offset0 || | if (params.first[0] >= m_offset[0]+myN0 || |
388 | first[1] >= m_offset1+myN1 || first[1]+numValues[1] <= m_offset1) { | params.first[0]+params.numValues[0] <= m_offset[0] || |
389 | params.first[1] >= m_offset[1]+myN1 || | |
390 | params.first[1]+params.numValues[1] <= m_offset[1]) { | |
391 | f.close(); | f.close(); |
392 | return; | return; |
393 | } | } |
# | Line 293 void Rectangle::readBinaryGrid(escript:: | Line 395 void Rectangle::readBinaryGrid(escript:: |
395 | // now determine how much this rank has to write | // now determine how much this rank has to write |
396 | ||
397 | // first coordinates in data object to write to | // first coordinates in data object to write to |
398 | const int first0 = max(0, first[0]-m_offset0); | const int first0 = max(0, params.first[0]-m_offset[0]); |
399 | const int first1 = max(0, first[1]-m_offset1); | const int first1 = max(0, params.first[1]-m_offset[1]); |
400 | // indices to first value in file | // indices to first value in file |
401 | const int idx0 = max(0, m_offset0-first[0]); | const int idx0 = max(0, m_offset[0]-params.first[0]); |
402 | const int idx1 = max(0, m_offset1-first[1]); | const int idx1 = max(0, m_offset[1]-params.first[1]); |
403 | // number of values to write | // number of values to read |
404 | const int num0 = min(numValues[0]-idx0, myN0-first0); | const int num0 = min(params.numValues[0]-idx0, myN0-first0); |
405 | const int num1 = min(numValues[1]-idx1, myN1-first1); | const int num1 = min(params.numValues[1]-idx1, myN1-first1); |
406 | ||
407 | out.requireWrite(); | out.requireWrite(); |
408 | vector<float> values(num0*numComp); | vector<ValueType> values(num0*numComp); |
409 | const int dpp = out.getNumDataPointsPerSample(); | const int dpp = out.getNumDataPointsPerSample(); |
410 | ||
411 | for (index_t y=0; y<num1; y++) { | for (int y=0; y<num1; y++) { |
412 | const int fileofs = numComp*(idx0+(idx1+y)*numValues[0]); | const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]); |
413 | f.seekg(fileofs*sizeof(float)); | f.seekg(fileofs*sizeof(ValueType)); |
414 | f.read((char*)&values[0], num0*numComp*sizeof(float)); | f.read((char*)&values[0], num0*numComp*sizeof(ValueType)); |
415 | for (index_t x=0; x<num0; x++) { | for (int x=0; x<num0; x++) { |
416 | double* dest = out.getSampleDataRW(first0+x+(first1+y)*myN0); | const int baseIndex = first0+x*params.multiplier[0] |
417 | for (index_t c=0; c<numComp; c++) { | +(first1+y*params.multiplier[1])*myN0; |
418 | for (index_t q=0; q<dpp; q++) { | for (int m1=0; m1<params.multiplier[1]; m1++) { |
419 | *dest++ = static_cast<double>(values[x*numComp+c]); | for (int m0=0; m0<params.multiplier[0]; m0++) { |
420 | const int dataIndex = baseIndex+m0+m1*myN0; | |
421 | double* dest = out.getSampleDataRW(dataIndex); | |
422 | for (int c=0; c<numComp; c++) { | |
423 | ValueType val = values[x*numComp+c]; | |
424 | ||
425 | if (params.byteOrder != BYTEORDER_NATIVE) { | |
426 | char* cval = reinterpret_cast<char*>(&val); | |
427 | // this will alter val!! | |
428 | byte_swap32(cval); | |
429 | } | |
430 | if (!std::isnan(val)) { | |
431 | for (int q=0; q<dpp; q++) { | |
432 | *dest++ = static_cast<double>(val); | |
433 | } | |
434 | } | |
435 | } | |
436 | } | |
437 | } | |
438 | } | |
439 | } | |
440 | ||
441 | f.close(); | |
442 | } | |
443 | ||
444 | template<typename ValueType> | |
445 | void Rectangle::readBinaryGridZippedImpl(escript::Data& out, const string& filename, | |
446 | const ReaderParameters& params) const | |
447 | { | |
448 | // check destination function space | |
449 | int myN0, myN1; | |
450 | if (out.getFunctionSpace().getTypeCode() == Nodes) { | |
451 | myN0 = m_NN[0]; | |
452 | myN1 = m_NN[1]; | |
453 | } else if (out.getFunctionSpace().getTypeCode() == Elements || | |
454 | out.getFunctionSpace().getTypeCode() == ReducedElements) { | |
455 | myN0 = m_NE[0]; | |
456 | myN1 = m_NE[1]; | |
457 | } else | |
458 | throw RipleyException("readBinaryGrid(): invalid function space for output data object"); | |
459 | ||
460 | // check file existence and size | |
461 | ifstream f(filename.c_str(), ifstream::binary); | |
462 | if (f.fail()) { | |
463 | throw RipleyException("readBinaryGridFromZipped(): cannot open file"); | |
464 | } | |
465 | f.seekg(0, ios::end); | |
466 | const int numComp = out.getDataPointSize(); | |
467 | int filesize = f.tellg(); | |
468 | f.seekg(0, ios::beg); | |
469 | std::vector<char> compressed(filesize); | |
470 | f.read((char*)&compressed[0], filesize); | |
471 | f.close(); | |
472 | std::vector<char> decompressed = unzip(compressed); | |
473 | filesize = decompressed.size(); | |
474 | const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType); | |
475 | if (filesize < reqsize) { | |
476 | throw RipleyException("readBinaryGridFromZipped(): not enough data in file"); | |
477 | } | |
478 | ||
479 | // check if this rank contributes anything | |
480 | if (params.first[0] >= m_offset[0]+myN0 || | |
481 | params.first[0]+params.numValues[0] <= m_offset[0] || | |
482 | params.first[1] >= m_offset[1]+myN1 || | |
483 | params.first[1]+params.numValues[1] <= m_offset[1]) { | |
484 | f.close(); | |
485 | return; | |
486 | } | |
487 | ||
488 | // now determine how much this rank has to write | |
489 | ||
490 | // first coordinates in data object to write to | |
491 | const int first0 = max(0, params.first[0]-m_offset[0]); | |
492 | const int first1 = max(0, params.first[1]-m_offset[1]); | |
493 | // indices to first value in file | |
494 | const int idx0 = max(0, m_offset[0]-params.first[0]); | |
495 | const int idx1 = max(0, m_offset[1]-params.first[1]); | |
496 | // number of values to read | |
497 | const int num0 = min(params.numValues[0]-idx0, myN0-first0); | |
498 | const int num1 = min(params.numValues[1]-idx1, myN1-first1); | |
499 | ||
500 | out.requireWrite(); | |
501 | vector<ValueType> values(num0*numComp); | |
502 | const int dpp = out.getNumDataPointsPerSample(); | |
503 | ||
504 | for (int y=0; y<num1; y++) { | |
505 | const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]); | |
506 | memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType)); | |
507 | for (int x=0; x<num0; x++) { | |
508 | const int baseIndex = first0+x*params.multiplier[0] | |
509 | +(first1+y*params.multiplier[1])*myN0; | |
510 | for (int m1=0; m1<params.multiplier[1]; m1++) { | |
511 | for (int m0=0; m0<params.multiplier[0]; m0++) { | |
512 | const int dataIndex = baseIndex+m0+m1*myN0; | |
513 | double* dest = out.getSampleDataRW(dataIndex); | |
514 | for (int c=0; c<numComp; c++) { | |
515 | ValueType val = values[x*numComp+c]; | |
516 | ||
517 | if (params.byteOrder != BYTEORDER_NATIVE) { | |
518 | char* cval = reinterpret_cast<char*>(&val); | |
519 | // this will alter val!! | |
520 | byte_swap32(cval); | |
521 | } | |
522 | if (!std::isnan(val)) { | |
523 | for (int q=0; q<dpp; q++) { | |
524 | *dest++ = static_cast<double>(val); | |
525 | } | |
526 | } | |
527 | } | |
528 | } | } |
529 | } | } |
530 | } | } |
# | Line 323 void Rectangle::readBinaryGrid(escript:: | Line 533 void Rectangle::readBinaryGrid(escript:: |
533 | f.close(); | f.close(); |
534 | } | } |
535 | ||
536 | void Rectangle::writeBinaryGrid(const escript::Data& in, string filename, | |
537 | int byteOrder, int dataType) const | |
538 | { | |
539 | // the mapping is not universally correct but should work on our | |
540 | // supported platforms | |
541 | switch (dataType) { | |
542 | case DATATYPE_INT32: | |
543 | writeBinaryGridImpl<int>(in, filename, byteOrder); | |
544 | break; | |
545 | case DATATYPE_FLOAT32: | |
546 | writeBinaryGridImpl<float>(in, filename, byteOrder); | |
547 | break; | |
548 | case DATATYPE_FLOAT64: | |
549 | writeBinaryGridImpl<double>(in, filename, byteOrder); | |
550 | break; | |
551 | default: | |
552 | throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype"); | |
553 | } | |
554 | } | |
555 | ||
556 | template<typename ValueType> | |
557 | void Rectangle::writeBinaryGridImpl(const escript::Data& in, | |
558 | const string& filename, int byteOrder) const | |
559 | { | |
560 | // check function space and determine number of points | |
561 | int myN0, myN1; | |
562 | int totalN0, totalN1; | |
563 | if (in.getFunctionSpace().getTypeCode() == Nodes) { | |
564 | myN0 = m_NN[0]; | |
565 | myN1 = m_NN[1]; | |
566 | totalN0 = m_gNE[0]+1; | |
567 | totalN1 = m_gNE[1]+1; | |
568 | } else if (in.getFunctionSpace().getTypeCode() == Elements || | |
569 | in.getFunctionSpace().getTypeCode() == ReducedElements) { | |
570 | myN0 = m_NE[0]; | |
571 | myN1 = m_NE[1]; | |
572 | totalN0 = m_gNE[0]; | |
573 | totalN1 = m_gNE[1]; | |
574 | } else | |
575 | throw RipleyException("writeBinaryGrid(): invalid function space of data object"); | |
576 | ||
577 | const int numComp = in.getDataPointSize(); | |
578 | const int dpp = in.getNumDataPointsPerSample(); | |
579 | ||
580 | if (numComp > 1 || dpp > 1) | |
581 | throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported"); | |
582 | ||
583 | const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1; | |
584 | ||
585 | // from here on we know that each sample consists of one value | |
586 | FileWriter fw; | |
587 | fw.openFile(filename, fileSize); | |
588 | MPIBarrier(); | |
589 | ||
590 | for (index_t y=0; y<myN1; y++) { | |
591 | const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0)*sizeof(ValueType); | |
592 | ostringstream oss; | |
593 | ||
594 | for (index_t x=0; x<myN0; x++) { | |
595 | const double* sample = in.getSampleDataRO(y*myN0+x); | |
596 | ValueType fvalue = static_cast<ValueType>(*sample); | |
597 | if (byteOrder == BYTEORDER_NATIVE) { | |
598 | oss.write((char*)&fvalue, sizeof(fvalue)); | |
599 | } else { | |
600 | char* value = reinterpret_cast<char*>(&fvalue); | |
601 | oss.write(byte_swap32(value), sizeof(fvalue)); | |
602 | } | |
603 | } | |
604 | fw.writeAt(oss, fileofs); | |
605 | } | |
606 | fw.close(); | |
607 | } | |
608 | ||
609 | void Rectangle::dump(const string& fileName) const | void Rectangle::dump(const string& fileName) const |
610 | { | { |
611 | #if USE_SILO | #if USE_SILO |
# | Line 385 void Rectangle::dump(const string& fileN | Line 668 void Rectangle::dump(const string& fileN |
668 | } | } |
669 | */ | */ |
670 | ||
671 | boost::scoped_ptr<double> x(new double[m_N0]); | boost::scoped_ptr<double> x(new double[m_NN[0]]); |
672 | boost::scoped_ptr<double> y(new double[m_N1]); | boost::scoped_ptr<double> y(new double[m_NN[1]]); |
673 | double* coords[2] = { x.get(), y.get() }; | double* coords[2] = { x.get(), y.get() }; |
pair<double,double> xdx = getFirstCoordAndSpacing(0); | ||
pair<double,double> ydy = getFirstCoordAndSpacing(1); | ||
674 | #pragma omp parallel | #pragma omp parallel |
675 | { | { |
676 | #pragma omp for nowait | #pragma omp for nowait |
677 | for (dim_t i0 = 0; i0 < m_N0; i0++) { | for (dim_t i0 = 0; i0 < m_NN[0]; i0++) { |
678 | coords[0][i0]=xdx.first+i0*xdx.second; | coords[0][i0]=getLocalCoordinate(i0, 0); |
679 | } | } |
680 | #pragma omp for nowait | #pragma omp for nowait |
681 | for (dim_t i1 = 0; i1 < m_N1; i1++) { | for (dim_t i1 = 0; i1 < m_NN[1]; i1++) { |
682 | coords[1][i1]=ydy.first+i1*ydy.second; | coords[1][i1]=getLocalCoordinate(i1, 1); |
683 | } | } |
684 | } | } |
685 | IndexVector dims = getNumNodesPerDim(); | int* dims = const_cast<int*>(getNumNodesPerDim()); |
686 | ||
687 | // write mesh | // write mesh |
688 | DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE, | DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 2, DB_DOUBLE, |
689 | DB_COLLINEAR, NULL); | DB_COLLINEAR, NULL); |
690 | ||
691 | // write node ids | // write node ids |
692 | DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2, | DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 2, |
693 | NULL, 0, DB_INT, DB_NODECENT, NULL); | NULL, 0, DB_INT, DB_NODECENT, NULL); |
694 | ||
695 | // write element ids | // write element ids |
696 | dims = getNumElementsPerDim(); | dims = const_cast<int*>(getNumElementsPerDim()); |
697 | DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0], | DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0], |
698 | &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL); | dims, 2, NULL, 0, DB_INT, DB_ZONECENT, NULL); |
699 | ||
700 | // rank 0 writes multimesh and multivar | // rank 0 writes multimesh and multivar |
701 | if (m_mpiInfo->rank == 0) { | if (m_mpiInfo->rank == 0) { |
# | Line 482 const int* Rectangle::borrowSampleRefere | Line 763 const int* Rectangle::borrowSampleRefere |
763 | case FaceElements: | case FaceElements: |
764 | case ReducedFaceElements: | case ReducedFaceElements: |
765 | return &m_faceId[0]; | return &m_faceId[0]; |
766 | case Points: | |
767 | return &m_diracPointNodeIDs[0]; | |
768 | default: | default: |
769 | break; | break; |
770 | } | } |
# | Line 506 bool Rectangle::ownSample(int fsType, in | Line 789 bool Rectangle::ownSample(int fsType, in |
789 | case Elements: | case Elements: |
790 | case ReducedElements: | case ReducedElements: |
791 | // check ownership of element's bottom left node | // check ownership of element's bottom left node |
792 | return (m_dofMap[id%m_NE0+m_N0*(id/m_NE0)] < getNumDOF()); | return (m_dofMap[id%m_NE[0]+m_NN[0]*(id/m_NE[0])] < getNumDOF()); |
793 | case FaceElements: | case FaceElements: |
794 | case ReducedFaceElements: | case ReducedFaceElements: |
795 | { | { |
796 | // determine which face the sample belongs to before | // determine which face the sample belongs to before |
797 | // checking ownership of corresponding element's first node | // checking ownership of corresponding element's first node |
const IndexVector faces = getNumFacesPerBoundary(); | ||
798 | dim_t n=0; | dim_t n=0; |
799 | for (size_t i=0; i<faces.size(); i++) { | for (size_t i=0; i<4; i++) { |
800 | n+=faces[i]; | n+=m_faceCount[i]; |
801 | if (id<n) { | if (id<n) { |
802 | index_t k; | index_t k; |
803 | if (i==1) | if (i==1) |
804 | k=m_N0-2; | k=m_NN[0]-2; |
805 | else if (i==3) | else if (i==3) |
806 | k=m_N0*(m_N1-2); | k=m_NN[0]*(m_NN[1]-2); |
807 | else | else |
808 | k=0; | k=0; |
809 | // determine whether to move right or up | // determine whether to move right or up |
810 | const index_t delta=(i/2==0 ? m_N0 : 1); | const index_t delta=(i/2==0 ? m_NN[0] : 1); |
811 | return (m_dofMap[k+(id-n+faces[i])*delta] < getNumDOF()); | return (m_dofMap[k+(id-n+m_faceCount[i])*delta] < getNumDOF()); |
812 | } | } |
813 | } | } |
814 | return false; | return false; |
# | Line 548 void Rectangle::setToNormal(escript::Dat | Line 830 void Rectangle::setToNormal(escript::Dat |
830 | { | { |
831 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
832 | #pragma omp for nowait | #pragma omp for nowait |
833 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = 0; k1 < m_NE[1]; ++k1) { |
834 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
835 | // set vector at two quadrature points | // set vector at two quadrature points |
836 | *o++ = -1.; | *o++ = -1.; |
# | Line 560 void Rectangle::setToNormal(escript::Dat | Line 842 void Rectangle::setToNormal(escript::Dat |
842 | ||
843 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
844 | #pragma omp for nowait | #pragma omp for nowait |
845 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = 0; k1 < m_NE[1]; ++k1) { |
846 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
847 | // set vector at two quadrature points | // set vector at two quadrature points |
848 | *o++ = 1.; | *o++ = 1.; |
# | Line 572 void Rectangle::setToNormal(escript::Dat | Line 854 void Rectangle::setToNormal(escript::Dat |
854 | ||
855 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
856 | #pragma omp for nowait | #pragma omp for nowait |
857 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = 0; k0 < m_NE[0]; ++k0) { |
858 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
859 | // set vector at two quadrature points | // set vector at two quadrature points |
860 | *o++ = 0.; | *o++ = 0.; |
# | Line 584 void Rectangle::setToNormal(escript::Dat | Line 866 void Rectangle::setToNormal(escript::Dat |
866 | ||
867 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
868 | #pragma omp for nowait | #pragma omp for nowait |
869 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = 0; k0 < m_NE[0]; ++k0) { |
870 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
871 | // set vector at two quadrature points | // set vector at two quadrature points |
872 | *o++ = 0.; | *o++ = 0.; |
# | Line 600 void Rectangle::setToNormal(escript::Dat | Line 882 void Rectangle::setToNormal(escript::Dat |
882 | { | { |
883 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
884 | #pragma omp for nowait | #pragma omp for nowait |
885 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = 0; k1 < m_NE[1]; ++k1) { |
886 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
887 | *o++ = -1.; | *o++ = -1.; |
888 | *o = 0.; | *o = 0.; |
# | Line 609 void Rectangle::setToNormal(escript::Dat | Line 891 void Rectangle::setToNormal(escript::Dat |
891 | ||
892 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
893 | #pragma omp for nowait | #pragma omp for nowait |
894 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = 0; k1 < m_NE[1]; ++k1) { |
895 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
896 | *o++ = 1.; | *o++ = 1.; |
897 | *o = 0.; | *o = 0.; |
# | Line 618 void Rectangle::setToNormal(escript::Dat | Line 900 void Rectangle::setToNormal(escript::Dat |
900 | ||
901 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
902 | #pragma omp for nowait | #pragma omp for nowait |
903 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = 0; k0 < m_NE[0]; ++k0) { |
904 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
905 | *o++ = 0.; | *o++ = 0.; |
906 | *o = -1.; | *o = -1.; |
# | Line 627 void Rectangle::setToNormal(escript::Dat | Line 909 void Rectangle::setToNormal(escript::Dat |
909 | ||
910 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
911 | #pragma omp for nowait | #pragma omp for nowait |
912 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = 0; k0 < m_NE[0]; ++k0) { |
913 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
914 | *o++ = 0.; | *o++ = 0.; |
915 | *o = 1.; | *o = 1.; |
# | Line 649 void Rectangle::setToSize(escript::Data& | Line 931 void Rectangle::setToSize(escript::Data& |
931 | || out.getFunctionSpace().getTypeCode() == ReducedElements) { | || out.getFunctionSpace().getTypeCode() == ReducedElements) { |
932 | out.requireWrite(); | out.requireWrite(); |
933 | const dim_t numQuad=out.getNumDataPointsPerSample(); | const dim_t numQuad=out.getNumDataPointsPerSample(); |
934 | const double hSize=getFirstCoordAndSpacing(0).second; | const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]); |
const double vSize=getFirstCoordAndSpacing(1).second; | ||
const double size=sqrt(hSize*hSize+vSize*vSize); | ||
935 | #pragma omp parallel for | #pragma omp parallel for |
936 | for (index_t k = 0; k < getNumElements(); ++k) { | for (index_t k = 0; k < getNumElements(); ++k) { |
937 | double* o = out.getSampleDataRW(k); | double* o = out.getSampleDataRW(k); |
# | Line 661 void Rectangle::setToSize(escript::Data& | Line 941 void Rectangle::setToSize(escript::Data& |
941 | || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { | || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { |
942 | out.requireWrite(); | out.requireWrite(); |
943 | const dim_t numQuad=out.getNumDataPointsPerSample(); | const dim_t numQuad=out.getNumDataPointsPerSample(); |
const double hSize=getFirstCoordAndSpacing(0).second; | ||
const double vSize=getFirstCoordAndSpacing(1).second; | ||
944 | #pragma omp parallel | #pragma omp parallel |
945 | { | { |
946 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
947 | #pragma omp for nowait | #pragma omp for nowait |
948 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = 0; k1 < m_NE[1]; ++k1) { |
949 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
950 | fill(o, o+numQuad, vSize); | fill(o, o+numQuad, m_dx[1]); |
951 | } | } |
952 | } | } |
953 | ||
954 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
955 | #pragma omp for nowait | #pragma omp for nowait |
956 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = 0; k1 < m_NE[1]; ++k1) { |
957 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
958 | fill(o, o+numQuad, vSize); | fill(o, o+numQuad, m_dx[1]); |
959 | } | } |
960 | } | } |
961 | ||
962 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
963 | #pragma omp for nowait | #pragma omp for nowait |
964 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = 0; k0 < m_NE[0]; ++k0) { |
965 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
966 | fill(o, o+numQuad, hSize); | fill(o, o+numQuad, m_dx[0]); |
967 | } | } |
968 | } | } |
969 | ||
970 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
971 | #pragma omp for nowait | #pragma omp for nowait |
972 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = 0; k0 < m_NE[0]; ++k0) { |
973 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
974 | fill(o, o+numQuad, hSize); | fill(o, o+numQuad, m_dx[0]); |
975 | } | } |
976 | } | } |
977 | } // end of parallel section | } // end of parallel section |
# | Line 706 void Rectangle::setToSize(escript::Data& | Line 984 void Rectangle::setToSize(escript::Data& |
984 | } | } |
985 | } | } |
986 | ||
Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder, | ||
bool reducedColOrder) const | ||
{ | ||
/* FIXME: reduced | ||
if (reducedRowOrder || reducedColOrder) | ||
throw RipleyException("getPattern() not implemented for reduced order"); | ||
*/ | ||
return m_pattern; | ||
} | ||
987 | void Rectangle::Print_Mesh_Info(const bool full) const | void Rectangle::Print_Mesh_Info(const bool full) const |
988 | { | { |
989 | RipleyDomain::Print_Mesh_Info(full); | RipleyDomain::Print_Mesh_Info(full); |
# | Line 723 void Rectangle::Print_Mesh_Info(const bo | Line 991 void Rectangle::Print_Mesh_Info(const bo |
991 | cout << " Id Coordinates" << endl; | cout << " Id Coordinates" << endl; |
992 | cout.precision(15); | cout.precision(15); |
993 | cout.setf(ios::scientific, ios::floatfield); | cout.setf(ios::scientific, ios::floatfield); |
pair<double,double> xdx = getFirstCoordAndSpacing(0); | ||
pair<double,double> ydy = getFirstCoordAndSpacing(1); | ||
994 | for (index_t i=0; i < getNumNodes(); i++) { | for (index_t i=0; i < getNumNodes(); i++) { |
995 | cout << " " << setw(5) << m_nodeId[i] | cout << " " << setw(5) << m_nodeId[i] |
996 | << " " << xdx.first+(i%m_N0)*xdx.second | << " " << getLocalCoordinate(i%m_NN[0], 0) |
997 | << " " << ydy.first+(i/m_N0)*ydy.second << endl; | << " " << getLocalCoordinate(i/m_NN[0], 1) << endl; |
998 | } | } |
999 | } | } |
1000 | } | } |
1001 | ||
IndexVector Rectangle::getNumNodesPerDim() const | ||
{ | ||
IndexVector ret; | ||
ret.push_back(m_N0); | ||
ret.push_back(m_N1); | ||
return ret; | ||
} | ||
IndexVector Rectangle::getNumElementsPerDim() const | ||
{ | ||
IndexVector ret; | ||
ret.push_back(m_NE0); | ||
ret.push_back(m_NE1); | ||
return ret; | ||
} | ||
IndexVector Rectangle::getNumFacesPerBoundary() const | ||
{ | ||
IndexVector ret(4, 0); | ||
//left | ||
if (m_offset0==0) | ||
ret[0]=m_NE1; | ||
//right | ||
if (m_mpiInfo->rank%m_NX==m_NX-1) | ||
ret[1]=m_NE1; | ||
//bottom | ||
if (m_offset1==0) | ||
ret[2]=m_NE0; | ||
//top | ||
if (m_mpiInfo->rank/m_NX==m_NY-1) | ||
ret[3]=m_NE0; | ||
return ret; | ||
} | ||
IndexVector Rectangle::getNumSubdivisionsPerDim() const | ||
{ | ||
IndexVector ret; | ||
ret.push_back(m_NX); | ||
ret.push_back(m_NY); | ||
return ret; | ||
} | ||
pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const | ||
{ | ||
if (dim==0) { | ||
return pair<double,double>(m_x0+(m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0); | ||
} else if (dim==1) { | ||
return pair<double,double>(m_y0+(m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1); | ||
} | ||
throw RipleyException("getFirstCoordAndSpacing: invalid argument"); | ||
} | ||
//protected | ||
dim_t Rectangle::getNumDOF() const | ||
{ | ||
return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY; | ||
} | ||
//protected | ||
dim_t Rectangle::getNumFaceElements() const | ||
{ | ||
const IndexVector faces = getNumFacesPerBoundary(); | ||
dim_t n=0; | ||
for (size_t i=0; i<faces.size(); i++) | ||
n+=faces[i]; | ||
return n; | ||
} | ||
1002 | ||
1003 | //protected | //protected |
1004 | void Rectangle::assembleCoordinates(escript::Data& arg) const | void Rectangle::assembleCoordinates(escript::Data& arg) const |
# | Line 811 void Rectangle::assembleCoordinates(escr | Line 1010 void Rectangle::assembleCoordinates(escr |
1010 | if (!numSamplesEqual(&x, 1, getNumNodes())) | if (!numSamplesEqual(&x, 1, getNumNodes())) |
1011 | throw RipleyException("setToX: Illegal number of samples in Data object"); | throw RipleyException("setToX: Illegal number of samples in Data object"); |
1012 | ||
pair<double,double> xdx = getFirstCoordAndSpacing(0); | ||
pair<double,double> ydy = getFirstCoordAndSpacing(1); | ||
1013 | arg.requireWrite(); | arg.requireWrite(); |
1014 | #pragma omp parallel for | #pragma omp parallel for |
1015 | for (dim_t i1 = 0; i1 < m_N1; i1++) { | for (dim_t i1 = 0; i1 < m_NN[1]; i1++) { |
1016 | for (dim_t i0 = 0; i0 < m_N0; i0++) { | for (dim_t i0 = 0; i0 < m_NN[0]; i0++) { |
1017 | double* point = arg.getSampleDataRW(i0+m_N0*i1); | double* point = arg.getSampleDataRW(i0+m_NN[0]*i1); |
1018 | point[0] = xdx.first+i0*xdx.second; | point[0] = getLocalCoordinate(i0, 0); |
1019 | point[1] = ydy.first+i1*ydy.second; | point[1] = getLocalCoordinate(i1, 1); |
1020 | } | } |
1021 | } | } |
1022 | } | } |
1023 | ||
1024 | //protected | //protected |
1025 | void Rectangle::assembleGradient(escript::Data& out, escript::Data& in) const | void Rectangle::assembleGradient(escript::Data& out, const escript::Data& in) const |
1026 | { | { |
1027 | const dim_t numComp = in.getDataPointSize(); | const dim_t numComp = in.getDataPointSize(); |
1028 | const double h0 = m_l0/m_gNE0; | const double cx0 = .21132486540518711775/m_dx[0]; |
1029 | const double h1 = m_l1/m_gNE1; | const double cx1 = .78867513459481288225/m_dx[0]; |
1030 | const double cx0 = -1./h0; | const double cx2 = 1./m_dx[0]; |
1031 | const double cx1 = -.78867513459481288225/h0; | const double cy0 = .21132486540518711775/m_dx[1]; |
1032 | const double cx2 = -.5/h0; | const double cy1 = .78867513459481288225/m_dx[1]; |
1033 | const double cx3 = -.21132486540518711775/h0; | const double cy2 = 1./m_dx[1]; |
const double cx4 = .21132486540518711775/h0; | ||
const double cx5 = .5/h0; | ||
const double cx6 = .78867513459481288225/h0; | ||
const double cx7 = 1./h0; | ||
const double cy0 = -1./h1; | ||
const double cy1 = -.78867513459481288225/h1; | ||
const double cy2 = -.5/h1; | ||
const double cy3 = -.21132486540518711775/h1; | ||
const double cy4 = .21132486540518711775/h1; | ||
const double cy5 = .5/h1; | ||
const double cy6 = .78867513459481288225/h1; | ||
const double cy7 = 1./h1; | ||
1034 | ||
1035 | if (out.getFunctionSpace().getTypeCode() == Elements) { | if (out.getFunctionSpace().getTypeCode() == Elements) { |
1036 | out.requireWrite(); | out.requireWrite(); |
# | Line 856 void Rectangle::assembleGradient(escript | Line 1041 void Rectangle::assembleGradient(escript |
1041 | vector<double> f_10(numComp); | vector<double> f_10(numComp); |
1042 | vector<double> f_11(numComp); | vector<double> f_11(numComp); |
1043 | #pragma omp for | #pragma omp for |
1044 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1045 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1046 | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double)); |
1047 | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double)); |
1048 | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double)); |
1049 | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1050 | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0])); |
1051 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1052 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; | o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0; |
1053 | o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; | o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0; |
1054 | o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; | o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0; |
1055 | o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; | o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1; |
1056 | o[INDEX3(i,0,2,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; | o[INDEX3(i,0,2,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1; |
1057 | o[INDEX3(i,1,2,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; | o[INDEX3(i,1,2,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0; |
1058 | o[INDEX3(i,0,3,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; | o[INDEX3(i,0,3,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1; |
1059 | o[INDEX3(i,1,3,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; | o[INDEX3(i,1,3,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1; |
1060 | } // end of component loop i | } // end of component loop i |
1061 | } // end of k0 loop | } // end of k0 loop |
1062 | } // end of k1 loop | } // end of k1 loop |
# | Line 885 void Rectangle::assembleGradient(escript | Line 1070 void Rectangle::assembleGradient(escript |
1070 | vector<double> f_10(numComp); | vector<double> f_10(numComp); |
1071 | vector<double> f_11(numComp); | vector<double> f_11(numComp); |
1072 | #pragma omp for | #pragma omp for |
1073 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1074 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1075 | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double)); |
1076 | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double)); |
1077 | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double)); |
1078 | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1079 | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0])); |
1080 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1081 | o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); | o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2; |
1082 | o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]); | o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2; |
1083 | } // end of component loop i | } // end of component loop i |
1084 | } // end of k0 loop | } // end of k0 loop |
1085 | } // end of k1 loop | } // end of k1 loop |
# | Line 909 void Rectangle::assembleGradient(escript | Line 1094 void Rectangle::assembleGradient(escript |
1094 | vector<double> f_11(numComp); | vector<double> f_11(numComp); |
1095 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
1096 | #pragma omp for nowait | #pragma omp for nowait |
1097 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1098 | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double)); |
1099 | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double)); |
1100 | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double)); |
1101 | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1102 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
1103 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1104 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; | o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0; |
1105 | o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7; | o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2; |
1106 | o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; | o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1; |
1107 | o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7; | o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy2; |
1108 | } // end of component loop i | } // end of component loop i |
1109 | } // end of k1 loop | } // end of k1 loop |
1110 | } // end of face 0 | } // end of face 0 |
1111 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
1112 | #pragma omp for nowait | #pragma omp for nowait |
1113 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1114 | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double)); |
1115 | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double)); |
1116 | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double)); |
1117 | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1118 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
1119 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1120 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; | o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0; |
1121 | o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7; | o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2; |
1122 | o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; | o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1; |
1123 | o[INDEX3(i,1,1,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7; | o[INDEX3(i,1,1,numComp,2)] = (f_11[i]-f_10[i])*cy2; |
1124 | } // end of component loop i | } // end of component loop i |
1125 | } // end of k1 loop | } // end of k1 loop |
1126 | } // end of face 1 | } // end of face 1 |
1127 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
1128 | #pragma omp for nowait | #pragma omp for nowait |
1129 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1130 | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_N0)), numComp*sizeof(double)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double)); |
1131 | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_N0)), numComp*sizeof(double)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double)); |
1132 | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_N0)), numComp*sizeof(double)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double)); |
1133 | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_N0)), numComp*sizeof(double)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double)); |
1134 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
1135 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1136 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; | o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2; |
1137 | o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; | o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0; |
1138 | o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; | o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx2; |
1139 | o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; | o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1; |
1140 | } // end of component loop i | } // end of component loop i |
1141 | } // end of k0 loop | } // end of k0 loop |
1142 | } // end of face 2 | } // end of face 2 |
1143 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
1144 | #pragma omp for nowait | #pragma omp for nowait |
1145 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1146 | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0)), numComp*sizeof(double)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double)); |
1147 | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)), numComp*sizeof(double)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
1148 | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0)), numComp*sizeof(double)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double)); |
1149 | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)), numComp*sizeof(double)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
1150 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
1151 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1152 | o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; | o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2; |
1153 | o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; | o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0; |
1154 | o[INDEX3(i,0,1,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; | o[INDEX3(i,0,1,numComp,2)] = (f_11[i]-f_01[i])*cx2; |
1155 | o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; | o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1; |
1156 | } // end of component loop i | } // end of component loop i |
1157 | } // end of k0 loop | } // end of k0 loop |
1158 | } // end of face 3 | } // end of face 3 |
# | Line 983 void Rectangle::assembleGradient(escript | Line 1168 void Rectangle::assembleGradient(escript |
1168 | vector<double> f_11(numComp); | vector<double> f_11(numComp); |
1169 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
1170 | #pragma omp for nowait | #pragma omp for nowait |
1171 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1172 | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double)); |
1173 | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double)); |
1174 | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double)); |
1175 | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1176 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
1177 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1178 | o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); | o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2; |
1179 | o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7; | o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2; |
1180 | } // end of component loop i | } // end of component loop i |
1181 | } // end of k1 loop | } // end of k1 loop |
1182 | } // end of face 0 | } // end of face 0 |
1183 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
1184 | #pragma omp for nowait | #pragma omp for nowait |
1185 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1186 | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double)); |
1187 | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double)); |
1188 | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double)); |
1189 | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1190 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
1191 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1192 | o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); | o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2; |
1193 | o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7; | o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2; |
1194 | } // end of component loop i | } // end of component loop i |
1195 | } // end of k1 loop | } // end of k1 loop |
1196 | } // end of face 1 | } // end of face 1 |
1197 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
1198 | #pragma omp for nowait | #pragma omp for nowait |
1199 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1200 | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_N0)), numComp*sizeof(double)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double)); |
1201 | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_N0)), numComp*sizeof(double)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double)); |
1202 | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_N0)), numComp*sizeof(double)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double)); |
1203 | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_N0)), numComp*sizeof(double)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double)); |
1204 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
1205 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1206 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; | o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2; |
1207 | o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]); | o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2; |
1208 | } // end of component loop i | } // end of component loop i |
1209 | } // end of k0 loop | } // end of k0 loop |
1210 | } // end of face 2 | } // end of face 2 |
1211 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
1212 | #pragma omp for nowait | #pragma omp for nowait |
1213 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1214 | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0)), numComp*sizeof(double)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double)); |
1215 | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)), numComp*sizeof(double)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
1216 | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0)), numComp*sizeof(double)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double)); |
1217 | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)), numComp*sizeof(double)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
1218 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
1219 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1220 | o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; | o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2; |
1221 | o[INDEX3(i,1,0,numComp,2)] = cy5*(f_01[i] + f_11[i]) + cy2*(f_00[i] + f_10[i]); | o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2; |
1222 | } // end of component loop i | } // end of component loop i |
1223 | } // end of k0 loop | } // end of k0 loop |
1224 | } // end of face 3 | } // end of face 3 |
# | Line 1042 void Rectangle::assembleGradient(escript | Line 1227 void Rectangle::assembleGradient(escript |
1227 | } | } |
1228 | ||
1229 | //protected | //protected |
1230 | void Rectangle::assembleIntegrate(vector<double>& integrals, escript::Data& arg) const | void Rectangle::assembleIntegrate(vector<double>& integrals, |
1231 | const escript::Data& arg) const | |
1232 | { | { |
1233 | const dim_t numComp = arg.getDataPointSize(); | const dim_t numComp = arg.getDataPointSize(); |
1234 | const double h0 = m_l0/m_gNE0; | const index_t left = (m_offset[0]==0 ? 0 : 1); |
1235 | const double h1 = m_l1/m_gNE1; | const index_t bottom = (m_offset[1]==0 ? 0 : 1); |
const index_t left = (m_offset0==0 ? 0 : 1); | ||
const index_t bottom = (m_offset1==0 ? 0 : 1); | ||
1236 | const int fs=arg.getFunctionSpace().getTypeCode(); | const int fs=arg.getFunctionSpace().getTypeCode(); |
1237 | if (fs == Elements && arg.actsExpanded()) { | if (fs == Elements && arg.actsExpanded()) { |
1238 | #pragma omp parallel | #pragma omp parallel |
1239 | { | { |
1240 | vector<double> int_local(numComp, 0); | vector<double> int_local(numComp, 0); |
1241 | const double w = h0*h1/4.; | const double w = m_dx[0]*m_dx[1]/4.; |
1242 | #pragma omp for nowait | #pragma omp for nowait |
1243 | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) { |
1244 | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) { |
1245 | const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE0)); | const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0])); |
1246 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1247 | const double f0 = f[INDEX2(i,0,numComp)]; | const double f0 = f[INDEX2(i,0,numComp)]; |
1248 | const double f1 = f[INDEX2(i,1,numComp)]; | const double f1 = f[INDEX2(i,1,numComp)]; |
# | Line 1074 void Rectangle::assembleIntegrate(vector | Line 1258 void Rectangle::assembleIntegrate(vector |
1258 | } // end of parallel section | } // end of parallel section |
1259 | ||
1260 | } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) { | } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) { |
1261 | const double w = h0*h1; | const double w = m_dx[0]*m_dx[1]; |
1262 | #pragma omp parallel | #pragma omp parallel |
1263 | { | { |
1264 | vector<double> int_local(numComp, 0); | vector<double> int_local(numComp, 0); |
1265 | #pragma omp for nowait | #pragma omp for nowait |
1266 | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) { |
1267 | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) { |
1268 | const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE0)); | const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0])); |
1269 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1270 | int_local[i]+=f[i]*w; | int_local[i]+=f[i]*w; |
1271 | } | } |
# | Line 1096 void Rectangle::assembleIntegrate(vector | Line 1280 void Rectangle::assembleIntegrate(vector |
1280 | #pragma omp parallel | #pragma omp parallel |
1281 | { | { |
1282 | vector<double> int_local(numComp, 0); | vector<double> int_local(numComp, 0); |
1283 | const double w0 = h0/2.; | const double w0 = m_dx[0]/2.; |
1284 | const double w1 = h1/2.; | const double w1 = m_dx[1]/2.; |
1285 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
1286 | #pragma omp for nowait | #pragma omp for nowait |
1287 | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) { |
1288 | const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1); | const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1); |
1289 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1290 | const double f0 = f[INDEX2(i,0,numComp)]; | const double f0 = f[INDEX2(i,0,numComp)]; |
# | Line 1112 void Rectangle::assembleIntegrate(vector | Line 1296 void Rectangle::assembleIntegrate(vector |
1296 | ||
1297 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
1298 | #pragma omp for nowait | #pragma omp for nowait |
1299 | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) { |
1300 | const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1); | const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1); |
1301 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1302 | const double f0 = f[INDEX2(i,0,numComp)]; | const double f0 = f[INDEX2(i,0,numComp)]; |
# | Line 1124 void Rectangle::assembleIntegrate(vector | Line 1308 void Rectangle::assembleIntegrate(vector |
1308 | ||
1309 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
1310 | #pragma omp for nowait | #pragma omp for nowait |
1311 | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) { |
1312 | const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0); | const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0); |
1313 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1314 | const double f0 = f[INDEX2(i,0,numComp)]; | const double f0 = f[INDEX2(i,0,numComp)]; |
# | Line 1136 void Rectangle::assembleIntegrate(vector | Line 1320 void Rectangle::assembleIntegrate(vector |
1320 | ||
1321 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
1322 | #pragma omp for nowait | #pragma omp for nowait |
1323 | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) { |
1324 | const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0); | const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0); |
1325 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1326 | const double f0 = f[INDEX2(i,0,numComp)]; | const double f0 = f[INDEX2(i,0,numComp)]; |
# | Line 1156 void Rectangle::assembleIntegrate(vector | Line 1340 void Rectangle::assembleIntegrate(vector |
1340 | vector<double> int_local(numComp, 0); | vector<double> int_local(numComp, 0); |
1341 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
1342 | #pragma omp for nowait | #pragma omp for nowait |
1343 | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) { |
1344 | const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1); | const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1); |
1345 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1346 | int_local[i]+=f[i]*h1; | int_local[i]+=f[i]*m_dx[1]; |
1347 | } | } |
1348 | } | } |
1349 | } | } |
1350 | ||
1351 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
1352 | #pragma omp for nowait | #pragma omp for nowait |
1353 | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) { |
1354 | const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1); | const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1); |
1355 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1356 | int_local[i]+=f[i]*h1; | int_local[i]+=f[i]*m_dx[1]; |
1357 | } | } |
1358 | } | } |
1359 | } | } |
1360 | ||
1361 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
1362 | #pragma omp for nowait | #pragma omp for nowait |
1363 | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) { |
1364 | const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0); | const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0); |
1365 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1366 | int_local[i]+=f[i]*h0; | int_local[i]+=f[i]*m_dx[0]; |
1367 | } | } |
1368 | } | } |
1369 | } | } |
1370 | ||
1371 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
1372 | #pragma omp for nowait | #pragma omp for nowait |
1373 | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) { |
1374 | const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0); | const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0); |
1375 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1376 | int_local[i]+=f[i]*h0; | int_local[i]+=f[i]*m_dx[0]; |
1377 | } | } |
1378 | } | } |
1379 | } | } |
# | Line 1204 void Rectangle::assembleIntegrate(vector | Line 1388 void Rectangle::assembleIntegrate(vector |
1388 | //protected | //protected |
1389 | dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const | dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const |
1390 | { | { |
1391 | const dim_t nDOF0 = (m_gNE0+1)/m_NX; | const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0]; |
1392 | const dim_t nDOF1 = (m_gNE1+1)/m_NY; | const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1]; |
1393 | const int x=node%nDOF0; | const int x=node%nDOF0; |
1394 | const int y=node/nDOF0; | const int y=node/nDOF0; |
1395 | dim_t num=0; | dim_t num=0; |
# | Line 1230 dim_t Rectangle::insertNeighbourNodes(In | Line 1414 dim_t Rectangle::insertNeighbourNodes(In |
1414 | } | } |
1415 | ||
1416 | //protected | //protected |
1417 | void Rectangle::nodesToDOF(escript::Data& out, escript::Data& in) const | void Rectangle::nodesToDOF(escript::Data& out, const escript::Data& in) const |
1418 | { | { |
1419 | const dim_t numComp = in.getDataPointSize(); | const dim_t numComp = in.getDataPointSize(); |
1420 | out.requireWrite(); | out.requireWrite(); |
1421 | ||
1422 | const index_t left = (m_offset0==0 ? 0 : 1); | const index_t left = (m_offset[0]==0 ? 0 : 1); |
1423 | const index_t bottom = (m_offset1==0 ? 0 : 1); | const index_t bottom = (m_offset[1]==0 ? 0 : 1); |
1424 | const dim_t nDOF0 = (m_gNE0+1)/m_NX; | const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0]; |
1425 | const dim_t nDOF1 = (m_gNE1+1)/m_NY; | const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1]; |
1426 | #pragma omp parallel for | #pragma omp parallel for |
1427 | for (index_t i=0; i<nDOF1; i++) { | for (index_t i=0; i<nDOF1; i++) { |
1428 | for (index_t j=0; j<nDOF0; j++) { | for (index_t j=0; j<nDOF0; j++) { |
1429 | const index_t n=j+left+(i+bottom)*m_N0; | const index_t n=j+left+(i+bottom)*m_NN[0]; |
1430 | const double* src=in.getSampleDataRO(n); | const double* src=in.getSampleDataRO(n); |
1431 | copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0)); | copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0)); |
1432 | } | } |
# | Line 1250 void Rectangle::nodesToDOF(escript::Data | Line 1434 void Rectangle::nodesToDOF(escript::Data |
1434 | } | } |
1435 | ||
1436 | //protected | //protected |
1437 | void Rectangle::dofToNodes(escript::Data& out, escript::Data& in) const | void Rectangle::dofToNodes(escript::Data& out, const escript::Data& in) const |
1438 | { | { |
1439 | const dim_t numComp = in.getDataPointSize(); | const dim_t numComp = in.getDataPointSize(); |
1440 | Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp); | Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp); |
1441 | in.requireWrite(); | // expand data object if necessary to be able to grab the whole data |
1442 | Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0)); | const_cast<escript::Data*>(&in)->expand(); |
1443 | Paso_Coupler_startCollect(coupler, in.getSampleDataRO(0)); | |
1444 | ||
1445 | const dim_t numDOF = getNumDOF(); | const dim_t numDOF = getNumDOF(); |
1446 | out.requireWrite(); | out.requireWrite(); |
# | Line 1274 void Rectangle::dofToNodes(escript::Data | Line 1459 void Rectangle::dofToNodes(escript::Data |
1459 | //private | //private |
1460 | void Rectangle::populateSampleIds() | void Rectangle::populateSampleIds() |
1461 | { | { |
1462 | // identifiers are ordered from left to right, bottom to top globablly. | // degrees of freedom are numbered from left to right, bottom to top in |
1463 | // each rank, continuing on the next rank (ranks also go left-right, | |
1464 | // bottom-top). | |
1465 | // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which | |
1466 | // helps when writing out data rank after rank. | |
1467 | ||
1468 | // build node distribution vector first. | // build node distribution vector first. |
1469 | // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes | // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is |
1470 | // constant for all ranks in this implementation | |
1471 | m_nodeDistribution.assign(m_mpiInfo->size+1, 0); | m_nodeDistribution.assign(m_mpiInfo->size+1, 0); |
1472 | const dim_t numDOF=getNumDOF(); | const dim_t numDOF=getNumDOF(); |
1473 | for (dim_t k=1; k<m_mpiInfo->size; k++) { | for (dim_t k=1; k<m_mpiInfo->size; k++) { |
# | Line 1287 void Rectangle::populateSampleIds() | Line 1477 void Rectangle::populateSampleIds() |
1477 | m_nodeId.resize(getNumNodes()); | m_nodeId.resize(getNumNodes()); |
1478 | m_dofId.resize(numDOF); | m_dofId.resize(numDOF); |
1479 | m_elementId.resize(getNumElements()); | m_elementId.resize(getNumElements()); |
1480 | ||
1481 | // populate face element counts | |
1482 | //left | |
1483 | if (m_offset[0]==0) | |
1484 | m_faceCount[0]=m_NE[1]; | |
1485 | else | |
1486 | m_faceCount[0]=0; | |
1487 | //right | |
1488 | if (m_mpiInfo->rank%m_NX[0]==m_NX[0]-1) | |
1489 | m_faceCount[1]=m_NE[1]; | |
1490 | else | |
1491 | m_faceCount[1]=0; | |
1492 | //bottom | |
1493 | if (m_offset[1]==0) | |
1494 | m_faceCount[2]=m_NE[0]; | |
1495 | else | |
1496 | m_faceCount[2]=0; | |
1497 | //top | |
1498 | if (m_mpiInfo->rank/m_NX[0]==m_NX[1]-1) | |
1499 | m_faceCount[3]=m_NE[0]; | |
1500 | else | |
1501 | m_faceCount[3]=0; | |
1502 | ||
1503 | m_faceId.resize(getNumFaceElements()); | m_faceId.resize(getNumFaceElements()); |
1504 | ||
1505 | const index_t left = (m_offset[0]==0 ? 0 : 1); | |
1506 | const index_t bottom = (m_offset[1]==0 ? 0 : 1); | |
1507 | const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0]; | |
1508 | const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1]; | |
1509 | ||
1510 | #define globalNodeId(x,y) \ | |
1511 | ((m_offset[0]+x)/nDOF0)*nDOF0*nDOF1+(m_offset[0]+x)%nDOF0 \ | |
1512 | + ((m_offset[1]+y)/nDOF1)*nDOF0*nDOF1*m_NX[0]+((m_offset[1]+y)%nDOF1)*nDOF0 | |
1513 | ||
1514 | // set corner id's outside the parallel region | |
1515 | m_nodeId[0] = globalNodeId(0, 0); | |
1516 | m_nodeId[m_NN[0]-1] = globalNodeId(m_NN[0]-1, 0); | |
1517 | m_nodeId[m_NN[0]*(m_NN[1]-1)] = globalNodeId(0, m_NN[1]-1); | |
1518 | m_nodeId[m_NN[0]*m_NN[1]-1] = globalNodeId(m_NN[0]-1,m_NN[1]-1); | |
1519 | #undef globalNodeId | |
1520 | ||
1521 | #pragma omp parallel | #pragma omp parallel |
1522 | { | { |
1523 | // nodes | // populate degrees of freedom and own nodes (identical id) |
1524 | #pragma omp for nowait | #pragma omp for nowait |
1525 | for (dim_t i1=0; i1<m_N1; i1++) { | for (dim_t i=0; i<nDOF1; i++) { |
1526 | for (dim_t i0=0; i0<m_N0; i0++) { | for (dim_t j=0; j<nDOF0; j++) { |
1527 | m_nodeId[i0+i1*m_N0] = (m_offset1+i1)*(m_gNE0+1)+m_offset0+i0; | const index_t nodeIdx=j+left+(i+bottom)*m_NN[0]; |
1528 | const index_t dofIdx=j+i*nDOF0; | |
1529 | m_dofId[dofIdx] = m_nodeId[nodeIdx] | |
1530 | = m_nodeDistribution[m_mpiInfo->rank]+dofIdx; | |
1531 | } | } |
1532 | } | } |
1533 | ||
1534 | // degrees of freedom | // populate the rest of the nodes (shared with other ranks) |
1535 | if (m_faceCount[0]==0) { // left column | |
1536 | #pragma omp for nowait | |
1537 | for (dim_t i=0; i<nDOF1; i++) { | |
1538 | const index_t nodeIdx=(i+bottom)*m_NN[0]; | |
1539 | const index_t dofId=(i+1)*nDOF0-1; | |
1540 | m_nodeId[nodeIdx] | |
1541 | = m_nodeDistribution[m_mpiInfo->rank-1]+dofId; | |
1542 | } | |
1543 | } | |
1544 | if (m_faceCount[1]==0) { // right column | |
1545 | #pragma omp for nowait | |
1546 | for (dim_t i=0; i<nDOF1; i++) { | |
1547 | const index_t nodeIdx=(i+bottom+1)*m_NN[0]-1; | |
1548 | const index_t dofId=i*nDOF0; | |
1549 | m_nodeId[nodeIdx] | |
1550 | = m_nodeDistribution[m_mpiInfo->rank+1]+dofId; | |
1551 | } | |
1552 | } | |
1553 | if (m_faceCount[2]==0) { // bottom row | |
1554 | #pragma omp for nowait | |
1555 | for (dim_t i=0; i<nDOF0; i++) { | |
1556 | const index_t nodeIdx=i+left; | |
1557 | const index_t dofId=nDOF0*(nDOF1-1)+i; | |
1558 | m_nodeId[nodeIdx] | |
1559 | = m_nodeDistribution[m_mpiInfo->rank-m_NX[0]]+dofId; | |
1560 | } | |
1561 | } | |
1562 | if (m_faceCount[3]==0) { // top row | |
1563 | #pragma omp for nowait | #pragma omp for nowait |
1564 | for (dim_t k=0; k<numDOF; k++) | for (dim_t i=0; i<nDOF0; i++) { |
1565 | m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k; | const index_t nodeIdx=m_NN[0]*(m_NN[1]-1)+i+left; |
1566 | const index_t dofId=i; | |
1567 | m_nodeId[nodeIdx] | |
1568 | = m_nodeDistribution[m_mpiInfo->rank+m_NX[0]]+dofId; | |
1569 | } | |
1570 | } | |
1571 | ||
1572 | // elements | // populate element id's |
1573 | #pragma omp for nowait | #pragma omp for nowait |
1574 | for (dim_t i1=0; i1<m_NE1; i1++) { | for (dim_t i1=0; i1<m_NE[1]; i1++) { |
1575 | for (dim_t i0=0; i0<m_NE0; i0++) { | for (dim_t i0=0; i0<m_NE[0]; i0++) { |
1576 | m_elementId[i0+i1*m_NE0]=(m_offset1+i1)*m_gNE0+m_offset0+i0; | m_elementId[i0+i1*m_NE[0]]=(m_offset[1]+i1)*m_gNE[0]+m_offset[0]+i0; |
1577 | } | } |
1578 | } | } |
1579 | ||
# | Line 1325 void Rectangle::populateSampleIds() | Line 1590 void Rectangle::populateSampleIds() |
1590 | updateTagsInUse(Elements); | updateTagsInUse(Elements); |
1591 | ||
1592 | // generate face offset vector and set face tags | // generate face offset vector and set face tags |
const IndexVector facesPerEdge = getNumFacesPerBoundary(); | ||
1593 | const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20; | const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20; |
1594 | const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP }; | const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP }; |
1595 | m_faceOffset.assign(facesPerEdge.size(), -1); | m_faceOffset.assign(4, -1); |
1596 | m_faceTags.clear(); | m_faceTags.clear(); |
1597 | index_t offset=0; | index_t offset=0; |
1598 | for (size_t i=0; i<facesPerEdge.size(); i++) { | for (size_t i=0; i<4; i++) { |
1599 | if (facesPerEdge[i]>0) { | if (m_faceCount[i]>0) { |
1600 | m_faceOffset[i]=offset; | m_faceOffset[i]=offset; |
1601 | offset+=facesPerEdge[i]; | offset+=m_faceCount[i]; |
1602 | m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]); | m_faceTags.insert(m_faceTags.end(), m_faceCount[i], faceTag[i]); |
1603 | } | } |
1604 | } | } |
1605 | setTagMap("left", LEFT); | setTagMap("left", LEFT); |
# | Line 1348 void Rectangle::populateSampleIds() | Line 1612 void Rectangle::populateSampleIds() |
1612 | //private | //private |
1613 | void Rectangle::createPattern() | void Rectangle::createPattern() |
1614 | { | { |
1615 | const dim_t nDOF0 = (m_gNE0+1)/m_NX; | const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0]; |
1616 | const dim_t nDOF1 = (m_gNE1+1)/m_NY; | const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1]; |
1617 | const index_t left = (m_offset0==0 ? 0 : 1); | const index_t left = (m_offset[0]==0 ? 0 : 1); |
1618 | const index_t bottom = (m_offset1==0 ? 0 : 1); | const index_t bottom = (m_offset[1]==0 ? 0 : 1); |
1619 | ||
1620 | // populate node->DOF mapping with own degrees of freedom. | // populate node->DOF mapping with own degrees of freedom. |
1621 | // The rest is assigned in the loop further down | // The rest is assigned in the loop further down |
# | Line 1359 void Rectangle::createPattern() | Line 1623 void Rectangle::createPattern() |
1623 | #pragma omp parallel for | #pragma omp parallel for |
1624 | for (index_t i=bottom; i<bottom+nDOF1; i++) { | for (index_t i=bottom; i<bottom+nDOF1; i++) { |
1625 | for (index_t j=left; j<left+nDOF0; j++) { | for (index_t j=left; j<left+nDOF0; j++) { |
1626 | m_dofMap[i*m_N0+j]=(i-bottom)*nDOF0+j-left; | m_dofMap[i*m_NN[0]+j]=(i-bottom)*nDOF0+j-left; |
1627 | } | } |
1628 | } | } |
1629 | ||
# | Line 1371 void Rectangle::createPattern() | Line 1635 void Rectangle::createPattern() |
1635 | RankVector neighbour; | RankVector neighbour; |
1636 | IndexVector offsetInShared(1,0); | IndexVector offsetInShared(1,0); |
1637 | IndexVector sendShared, recvShared; | IndexVector sendShared, recvShared; |
1638 | int numShared=0; | int numShared=0, expectedShared=0; |
1639 | const int x=m_mpiInfo->rank%m_NX; | const int x=m_mpiInfo->rank%m_NX[0]; |
1640 | const int y=m_mpiInfo->rank/m_NX; | const int y=m_mpiInfo->rank/m_NX[0]; |
1641 | if (x > 0) | |
1642 | expectedShared += nDOF1; | |
1643 | if (x < m_NX[0] - 1) | |
1644 | expectedShared += nDOF1; | |
1645 | if (y > 0) | |
1646 | expectedShared += nDOF0; | |
1647 | if (y < m_NX[1] - 1) | |
1648 | expectedShared += nDOF0; | |
1649 | if (x > 0 && y > 0) expectedShared++; | |
1650 | if (x > 0 && y < m_NX[1] - 1) expectedShared++; | |
1651 | if (x < m_NX[0] - 1 && y > 0) expectedShared++; | |
1652 | if (x < m_NX[0] - 1 && y < m_NX[1] - 1) expectedShared++; | |
1653 | ||
1654 | vector<IndexVector> rowIndices(expectedShared); | |
1655 | ||
1656 | for (int i1=-1; i1<2; i1++) { | for (int i1=-1; i1<2; i1++) { |
1657 | for (int i0=-1; i0<2; i0++) { | for (int i0=-1; i0<2; i0++) { |
1658 | // skip this rank | // skip this rank |
# | Line 1382 void Rectangle::createPattern() | Line 1661 void Rectangle::createPattern() |
1661 | // location of neighbour rank | // location of neighbour rank |
1662 | const int nx=x+i0; | const int nx=x+i0; |
1663 | const int ny=y+i1; | const int ny=y+i1; |
1664 | if (nx>=0 && ny>=0 && nx<m_NX && ny<m_NY) { | if (nx>=0 && ny>=0 && nx<m_NX[0] && ny<m_NX[1]) { |
1665 | neighbour.push_back(ny*m_NX+nx); | neighbour.push_back(ny*m_NX[0]+nx); |
1666 | if (i0==0) { | if (i0==0) { |
1667 | // sharing top or bottom edge | // sharing top or bottom edge |
1668 | const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0); | const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0); |
1669 | const int firstNode=(i1==-1 ? left : m_N0*(m_N1-1)+left); | const int firstNode=(i1==-1 ? left : m_NN[0]*(m_NN[1]-1)+left); |
1670 | offsetInShared.push_back(offsetInShared.back()+nDOF0); | offsetInShared.push_back(offsetInShared.back()+nDOF0); |
1671 | for (dim_t i=0; i<nDOF0; i++, numShared++) { | for (dim_t i=0; i<nDOF0; i++, numShared++) { |
1672 | sendShared.push_back(firstDOF+i); | sendShared.push_back(firstDOF+i); |
1673 | recvShared.push_back(numDOF+numShared); | recvShared.push_back(numDOF+numShared); |
1674 | if (i>0) | if (i>0) |
1675 | colIndices[firstDOF+i-1].push_back(numShared); | doublyLink(colIndices, rowIndices, firstDOF+i-1, numShared); |
1676 | colIndices[firstDOF+i].push_back(numShared); | doublyLink(colIndices, rowIndices, firstDOF+i, numShared); |
1677 | if (i<nDOF0-1) | if (i<nDOF0-1) |
1678 | colIndices[firstDOF+i+1].push_back(numShared); | doublyLink(colIndices, rowIndices, firstDOF+i+1, numShared); |
1679 | m_dofMap[firstNode+i]=numDOF+numShared; | m_dofMap[firstNode+i]=numDOF+numShared; |
1680 | } | } |
1681 | } else if (i1==0) { | } else if (i1==0) { |
1682 | // sharing left or right edge | // sharing left or right edge |
1683 | const int firstDOF=(i0==-1 ? 0 : nDOF0-1); | const int firstDOF=(i0==-1 ? 0 : nDOF0-1); |
1684 | const int firstNode=(i0==-1 ? bottom*m_N0 : (bottom+1)*m_N0-1); | const int firstNode=(i0==-1 ? bottom*m_NN[0] : (bottom+1)*m_NN[0]-1); |
1685 | offsetInShared.push_back(offsetInShared.back()+nDOF1); | offsetInShared.push_back(offsetInShared.back()+nDOF1); |
1686 | for (dim_t i=0; i<nDOF1; i++, numShared++) { | for (dim_t i=0; i<nDOF1; i++, numShared++) { |
1687 | sendShared.push_back(firstDOF+i*nDOF0); | sendShared.push_back(firstDOF+i*nDOF0); |
1688 | recvShared.push_back(numDOF+numShared); | recvShared.push_back(numDOF+numShared); |
1689 | if (i>0) | if (i>0) |
1690 | colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared); | doublyLink(colIndices, rowIndices, firstDOF+(i-1)*nDOF0, numShared); |
1691 | colIndices[firstDOF+i*nDOF0].push_back(numShared); | doublyLink(colIndices, rowIndices, firstDOF+i*nDOF0, numShared); |
1692 | if (i<nDOF1-1) | if (i<nDOF1-1) |
1693 | colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared); | doublyLink(colIndices, rowIndices, firstDOF+(i+1)*nDOF0, numShared); |
1694 | m_dofMap[firstNode+i*m_N0]=numDOF+numShared; | m_dofMap[firstNode+i*m_NN[0]]=numDOF+numShared; |
1695 | } | } |
1696 | } else { | } else { |
1697 | // sharing a node | // sharing a node |
1698 | const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0); | const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0); |
1699 | const int node=(i0+1)/2*(m_N0-1)+(i1+1)/2*m_N0*(m_N1-1); | const int node=(i0+1)/2*(m_NN[0]-1)+(i1+1)/2*m_NN[0]*(m_NN[1]-1); |
1700 | offsetInShared.push_back(offsetInShared.back()+1); | offsetInShared.push_back(offsetInShared.back()+1); |
1701 | sendShared.push_back(dof); | sendShared.push_back(dof); |
1702 | recvShared.push_back(numDOF+numShared); | recvShared.push_back(numDOF+numShared); |
1703 | colIndices[dof].push_back(numShared); | doublyLink(colIndices, rowIndices, dof, numShared); |
1704 | m_dofMap[node]=numDOF+numShared; | m_dofMap[node]=numDOF+numShared; |
1705 | ++numShared; | ++numShared; |
1706 | } | } |
1707 | } | } |
1708 | } | } |
1709 | } | } |
1710 | ||
1711 | #pragma omp parallel for | |
1712 | for (int i = 0; i < numShared; i++) { | |
1713 | std::sort(rowIndices[i].begin(), rowIndices[i].end()); | |
1714 | } | |
1715 | ||
1716 | // create connector | // create connector |
1717 | Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc( | Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc( |
1718 | numDOF, neighbour.size(), &neighbour[0], &sendShared[0], | numDOF, neighbour.size(), &neighbour[0], &sendShared[0], |
# | Line 1443 void Rectangle::createPattern() | Line 1727 void Rectangle::createPattern() |
1727 | // create main and couple blocks | // create main and couple blocks |
1728 | Paso_Pattern *mainPattern = createMainPattern(); | Paso_Pattern *mainPattern = createMainPattern(); |
1729 | Paso_Pattern *colPattern, *rowPattern; | Paso_Pattern *colPattern, *rowPattern; |
1730 | createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern); | createCouplePatterns(colIndices, rowIndices, numShared, &colPattern, &rowPattern); |
1731 | ||
1732 | // allocate paso distribution | // allocate paso distribution |
1733 | Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo, | Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo, |
# | Line 1527 void Rectangle::addToMatrixAndRHS(Paso_S | Line 1811 void Rectangle::addToMatrixAndRHS(Paso_S |
1811 | IndexVector rowIndex; | IndexVector rowIndex; |
1812 | rowIndex.push_back(m_dofMap[firstNode]); | rowIndex.push_back(m_dofMap[firstNode]); |
1813 | rowIndex.push_back(m_dofMap[firstNode+1]); | rowIndex.push_back(m_dofMap[firstNode+1]); |
1814 | rowIndex.push_back(m_dofMap[firstNode+m_N0]); | rowIndex.push_back(m_dofMap[firstNode+m_NN[0]]); |
1815 | rowIndex.push_back(m_dofMap[firstNode+m_N0+1]); | rowIndex.push_back(m_dofMap[firstNode+m_NN[0]+1]); |
1816 | if (addF) { | if (addF) { |
1817 | double *F_p=F.getSampleDataRW(0); | double *F_p=F.getSampleDataRW(0); |
1818 | for (index_t i=0; i<rowIndex.size(); i++) { | for (index_t i=0; i<rowIndex.size(); i++) { |
# | Line 1546 void Rectangle::addToMatrixAndRHS(Paso_S | Line 1830 void Rectangle::addToMatrixAndRHS(Paso_S |
1830 | ||
1831 | //protected | //protected |
1832 | void Rectangle::interpolateNodesOnElements(escript::Data& out, | void Rectangle::interpolateNodesOnElements(escript::Data& out, |
1833 | escript::Data& in, bool reduced) const | const escript::Data& in, |
1834 | bool reduced) const | |
1835 | { | { |
1836 | const dim_t numComp = in.getDataPointSize(); | const dim_t numComp = in.getDataPointSize(); |
1837 | if (reduced) { | if (reduced) { |
# | Line 1559 void Rectangle::interpolateNodesOnElemen | Line 1844 void Rectangle::interpolateNodesOnElemen |
1844 | vector<double> f_10(numComp); | vector<double> f_10(numComp); |
1845 | vector<double> f_11(numComp); | vector<double> f_11(numComp); |
1846 | #pragma omp for | #pragma omp for |
1847 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1848 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1849 | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double)); |
1850 | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double)); |
1851 | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double)); |
1852 | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1853 | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0])); |
1854 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1855 | o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]); | o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]); |
1856 | } /* end of component loop i */ | } /* end of component loop i */ |
# | Line 1584 void Rectangle::interpolateNodesOnElemen | Line 1869 void Rectangle::interpolateNodesOnElemen |
1869 | vector<double> f_10(numComp); | vector<double> f_10(numComp); |
1870 | vector<double> f_11(numComp); | vector<double> f_11(numComp); |
1871 | #pragma omp for | #pragma omp for |
1872 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1873 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1874 | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double)); |
1875 | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double)); |
1876 | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double)); |
1877 | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1878 | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0])); |
1879 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1880 | o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_10[i]) + c1*f_11[i] + c2*f_00[i]; | o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_10[i]) + c1*f_11[i] + c2*f_00[i]; |
1881 | o[INDEX2(i,numComp,1)] = c0*(f_00[i] + f_11[i]) + c1*f_01[i] + c2*f_10[i]; | o[INDEX2(i,numComp,1)] = c0*(f_00[i] + f_11[i]) + c1*f_01[i] + c2*f_10[i]; |
# | Line 1604 void Rectangle::interpolateNodesOnElemen | Line 1889 void Rectangle::interpolateNodesOnElemen |
1889 | } | } |
1890 | ||
1891 | //protected | //protected |
1892 | void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in, | void Rectangle::interpolateNodesOnFaces(escript::Data& out, |
1893 | const escript::Data& in, | |
1894 | bool reduced) const | bool reduced) const |
1895 | { | { |
1896 | const dim_t numComp = in.getDataPointSize(); | const dim_t numComp = in.getDataPointSize(); |
1897 | if (reduced) { | if (reduced) { |
1898 | out.requireWrite(); | out.requireWrite(); |
const double c0 = 0.5; | ||
1899 | #pragma omp parallel | #pragma omp parallel |
1900 | { | { |
1901 | vector<double> f_00(numComp); | vector<double> f_00(numComp); |
# | Line 1619 void Rectangle::interpolateNodesOnFaces( | Line 1904 void Rectangle::interpolateNodesOnFaces( |
1904 | vector<double> f_11(numComp); | vector<double> f_11(numComp); |
1905 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
1906 | #pragma omp for nowait | #pragma omp for nowait |
1907 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1908 | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double)); |
1909 | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double)); |
1910 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
1911 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1912 | o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i]); | o[INDEX2(i,numComp,0)] = (f_00[i] + f_01[i])/2; |
1913 | } /* end of component loop i */ | } /* end of component loop i */ |
1914 | } /* end of k1 loop */ | } /* end of k1 loop */ |
1915 | } /* end of face 0 */ | } /* end of face 0 */ |
1916 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
1917 | #pragma omp for nowait | #pragma omp for nowait |
1918 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1919 | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double)); |
1920 | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1921 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
1922 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1923 | o[INDEX2(i,numComp,0)] = c0*(f_10[i] + f_11[i]); | o[INDEX2(i,numComp,0)] = (f_10[i] + f_11[i])/2; |
1924 | } /* end of component loop i */ | } /* end of component loop i */ |
1925 | } /* end of k1 loop */ | } /* end of k1 loop */ |
1926 | } /* end of face 1 */ | } /* end of face 1 */ |
1927 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
1928 | #pragma omp for nowait | #pragma omp for nowait |
1929 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1930 | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_N0)), numComp*sizeof(double)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double)); |
1931 | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_N0)), numComp*sizeof(double)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double)); |
1932 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
1933 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1934 | o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_10[i]); | o[INDEX2(i,numComp,0)] = (f_00[i] + f_10[i])/2; |
1935 | } /* end of component loop i */ | } /* end of component loop i */ |
1936 | } /* end of k0 loop */ | } /* end of k0 loop */ |
1937 | } /* end of face 2 */ | } /* end of face 2 */ |
1938 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
1939 | #pragma omp for nowait | #pragma omp for nowait |
1940 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1941 | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)), numComp*sizeof(double)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
1942 | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)), numComp*sizeof(double)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
1943 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
1944 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1945 | o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_11[i]); | o[INDEX2(i,numComp,0)] = (f_01[i] + f_11[i])/2; |
1946 | } /* end of component loop i */ | } /* end of component loop i */ |
1947 | } /* end of k0 loop */ | } /* end of k0 loop */ |
1948 | } /* end of face 3 */ | } /* end of face 3 */ |
# | Line 1674 void Rectangle::interpolateNodesOnFaces( | Line 1959 void Rectangle::interpolateNodesOnFaces( |
1959 | vector<double> f_11(numComp); | vector<double> f_11(numComp); |
1960 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
1961 | #pragma omp for nowait | #pragma omp for nowait |
1962 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1963 | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double)); |
1964 | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double)); |
1965 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
1966 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1967 | o[INDEX2(i,numComp,0)] = c0*f_01[i] + c1*f_00[i]; | o[INDEX2(i,numComp,0)] = c0*f_01[i] + c1*f_00[i]; |
# | Line 1686 void Rectangle::interpolateNodesOnFaces( | Line 1971 void Rectangle::interpolateNodesOnFaces( |
1971 | } /* end of face 0 */ | } /* end of face 0 */ |
1972 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
1973 | #pragma omp for nowait | #pragma omp for nowait |
1974 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE[1]; ++k1) { |
1975 | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)), numComp*sizeof(double)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double)); |
1976 | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)), numComp*sizeof(double)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double)); |
1977 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
1978 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1979 | o[INDEX2(i,numComp,0)] = c1*f_10[i] + c0*f_11[i]; | o[INDEX2(i,numComp,0)] = c1*f_10[i] + c0*f_11[i]; |
# | Line 1698 void Rectangle::interpolateNodesOnFaces( | Line 1983 void Rectangle::interpolateNodesOnFaces( |
1983 | } /* end of face 1 */ | } /* end of face 1 */ |
1984 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
1985 | #pragma omp for nowait | #pragma omp for nowait |
1986 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1987 | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_N0)), numComp*sizeof(double)); | memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double)); |
1988 | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_N0)), numComp*sizeof(double)); | memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double)); |
1989 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
1990 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1991 | o[INDEX2(i,numComp,0)] = c0*f_10[i] + c1*f_00[i]; | o[INDEX2(i,numComp,0)] = c0*f_10[i] + c1*f_00[i]; |
# | Line 1710 void Rectangle::interpolateNodesOnFaces( | Line 1995 void Rectangle::interpolateNodesOnFaces( |
1995 | } /* end of face 2 */ | } /* end of face 2 */ |
1996 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
1997 | #pragma omp for nowait | #pragma omp for nowait |
1998 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE[0]; ++k0) { |
1999 | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)), numComp*sizeof(double)); | memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
2000 | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)), numComp*sizeof(double)); | memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double)); |
2001 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
2002 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
2003 | o[INDEX2(i,numComp,0)] = c0*f_11[i] + c1*f_01[i]; | o[INDEX2(i,numComp,0)] = c0*f_11[i] + c1*f_01[i]; |
# | Line 1724 void Rectangle::interpolateNodesOnFaces( | Line 2009 void Rectangle::interpolateNodesOnFaces( |
2009 | } | } |
2010 | } | } |
2011 | ||
//protected | ||
void Rectangle::assemblePDESingle(Paso_SystemMatrix* mat, | ||
escript::Data& rhs, const escript::Data& A, const escript::Data& B, | ||
const escript::Data& C, const escript::Data& D, | ||
const escript::Data& X, const escript::Data& Y) const | ||
{ | ||
const double h0 = m_l0/m_gNE0; | ||
const double h1 = m_l1/m_gNE1; | ||
const double w0 = -0.1555021169820365539*h1/h0; | ||
const double w1 = 0.041666666666666666667; | ||
const double w2 = -0.15550211698203655390; | ||
const double w3 = 0.041666666666666666667*h0/h1; | ||
const double w4 = 0.15550211698203655390; | ||
const double w5 = -0.041666666666666666667; | ||
const double w6 = -0.01116454968463011277*h1/h0; | ||
const double w7 = 0.011164549684630112770; | ||
const double w8 = -0.011164549684630112770; | ||
const double w9 = -0.041666666666666666667*h1/h0; | ||
const double w10 = -0.041666666666666666667*h0/h1; | ||
const double w11 = 0.1555021169820365539*h1/h0; | ||
const double w12 = 0.1555021169820365539*h0/h1; | ||
const double w13 = 0.01116454968463011277*h0/h1; | ||
const double w14 = 0.01116454968463011277*h1/h0; | ||
const double w15 = 0.041666666666666666667*h1/h0; | ||
const double w16 = -0.01116454968463011277*h0/h1; | ||
const double w17 = -0.1555021169820365539*h0/h1; | ||
const double w18 = -0.33333333333333333333*h1/h0; | ||
const double w19 = 0.25; | ||
const double w20 = -0.25; | ||
const double w21 = 0.16666666666666666667*h0/h1; | ||
const double w22 = -0.16666666666666666667*h1/h0; | ||
const double w23 = -0.16666666666666666667*h0/h1; | ||
const double w24 = 0.33333333333333333333*h1/h0; | ||
const double w25 = 0.33333333333333333333*h0/h1; | ||
const double w26 = 0.16666666666666666667*h1/h0; | ||
const double w27 = -0.33333333333333333333*h0/h1; | ||
const double w28 = -0.032861463941450536761*h1; | ||
const double w29 = -0.032861463941450536761*h0; | ||
const double w30 = -0.12264065304058601714*h1; | ||
const double w31 = -0.0023593469594139828636*h1; | ||
const double w32 = -0.008805202725216129906*h0; | ||
const double w33 = -0.008805202725216129906*h1; | ||
const double w34 = 0.032861463941450536761*h1; | ||
const double w35 = 0.008805202725216129906*h1; | ||
const double w36 = 0.008805202725216129906*h0; | ||
const double w37 = 0.0023593469594139828636*h1; | ||
const double w38 = 0.12264065304058601714*h1; | ||
const double w39 = 0.032861463941450536761*h0; | ||
const double w40 = -0.12264065304058601714*h0; | ||
const double w41 = -0.0023593469594139828636*h0; | ||
const double w42 = 0.0023593469594139828636*h0; | ||
const double w43 = 0.12264065304058601714*h0; | ||
const double w44 = -0.16666666666666666667*h1; | ||
const double w45 = -0.083333333333333333333*h0; | ||
const double w46 = 0.083333333333333333333*h1; | ||
const double w47 = 0.16666666666666666667*h1; | ||
const double w48 = 0.083333333333333333333*h0; | ||
const double w49 = -0.16666666666666666667*h0; | ||
const double w50 = 0.16666666666666666667*h0; | ||
const double w51 = -0.083333333333333333333*h1; | ||
const double w52 = 0.025917019497006092316*h0*h1; | ||
const double w53 = 0.0018607582807716854616*h0*h1; | ||
const double w54 = 0.0069444444444444444444*h0*h1; | ||
const double w55 = 0.09672363354357992482*h0*h1; | ||
const double w56 = 0.00049858867864229740201*h0*h1; | ||
const double w57 = 0.055555555555555555556*h0*h1; | ||
const double w58 = 0.027777777777777777778*h0*h1; | ||
const double w59 = 0.11111111111111111111*h0*h1; | ||
const double w60 = -0.19716878364870322056*h1; | ||
const double w61 = -0.19716878364870322056*h0; | ||
const double w62 = -0.052831216351296779436*h0; | ||
const double w63 = -0.052831216351296779436*h1; | ||
const double w64 = 0.19716878364870322056*h1; | ||
const double w65 = 0.052831216351296779436*h1; | ||
const double w66 = 0.19716878364870322056*h0; | ||
const double w67 = 0.052831216351296779436*h0; | ||
const double w68 = -0.5*h1; | ||
const double w69 = -0.5*h0; | ||
const double w70 = 0.5*h1; | ||
const double w71 = 0.5*h0; | ||
const double w72 = 0.1555021169820365539*h0*h1; | ||
const double w73 = 0.041666666666666666667*h0*h1; | ||
const double w74 = 0.01116454968463011277*h0*h1; | ||
const double w75 = 0.25*h0*h1; | ||
2012 | ||
rhs.requireWrite(); | ||
#pragma omp parallel | ||
{ | ||
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring | ||
#pragma omp for | ||
for (index_t k1=k1_0; k1<m_NE1; k1+=2) { | ||
for (index_t k0=0; k0<m_NE0; ++k0) { | ||
bool add_EM_S=false; | ||
bool add_EM_F=false; | ||
vector<double> EM_S(4*4, 0); | ||
vector<double> EM_F(4, 0); | ||
const index_t e = k0 + m_NE0*k1; | ||
/////////////// | ||
// process A // | ||
/////////////// | ||
if (!A.isEmpty()) { | ||
add_EM_S=true; | ||
const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); | ||
if (A.actsExpanded()) { | ||
const double A_00_0 = A_p[INDEX3(0,0,0,2,2)]; | ||
const double A_10_0 = A_p[INDEX3(1,0,0,2,2)]; | ||
const double A_01_0 = A_p[INDEX3(0,1,0,2,2)]; | ||
const double A_11_0 = A_p[INDEX3(1,1,0,2,2)]; | ||
const double A_00_1 = A_p[INDEX3(0,0,1,2,2)]; | ||
const double A_10_1 = A_p[INDEX3(1,0,1,2,2)]; | ||
const double A_01_1 = A_p[INDEX3(0,1,1,2,2)]; | ||
const double A_11_1 = A_p[INDEX3(1,1,1,2,2)]; | ||
const double A_00_2 = A_p[INDEX3(0,0,2,2,2)]; | ||
const double A_10_2 = A_p[INDEX3(1,0,2,2,2)]; | ||
const double A_01_2 = A_p[INDEX3(0,1,2,2,2)]; | ||
const double A_11_2 = A_p[INDEX3(1,1,2,2,2)]; | ||
const double A_00_3 = A_p[INDEX3(0,0,3,2,2)]; | ||
const double A_10_3 = A_p[INDEX3(1,0,3,2,2)]; | ||
const double A_01_3 = A_p[INDEX3(0,1,3,2,2)]; | ||
const double A_11_3 = A_p[INDEX3(1,1,3,2,2)]; | ||
const double tmp0_0 = A_01_0 + A_01_3; | ||
const double tmp1_0 = A_00_0 + A_00_1; | ||
const double tmp2_0 = A_11_0 + A_11_1 + A_11_2 + A_11_3; | ||
const double tmp3_0 = A_00_2 + A_00_3; | ||
const double tmp4_0 = A_10_1 + A_10_2; | ||
const double tmp5_0 = A_00_0 + A_00_1 + A_00_2 + A_00_3; | ||
const double tmp6_0 = A_01_3 + A_10_0; | ||
const double tmp7_0 = A_01_0 + A_10_3; | ||
const double tmp8_0 = A_01_1 + A_01_2 + A_10_1 + A_10_2; | ||
const double tmp9_0 = A_01_0 + A_10_0; | ||
const double tmp12_0 = A_11_0 + A_11_2; | ||
const double tmp10_0 = A_01_3 + A_10_3; | ||
const double tmp14_0 = A_01_0 + A_01_3 + A_10_0 + A_10_3; | ||
const double tmp13_0 = A_01_2 + A_10_1; | ||
const double tmp11_0 = A_11_1 + A_11_3; | ||
const double tmp18_0 = A_01_1 + A_10_1; | ||
const double tmp15_0 = A_01_1 + A_10_2; | ||
const double tmp16_0 = A_10_0 + A_10_3; | ||
const double tmp17_0 = A_01_1 + A_01_2; | ||
const double tmp19_0 = A_01_2 + A_10_2; | ||
const double tmp0_1 = A_10_3*w8; | ||
const double tmp1_1 = tmp0_0*w1; | ||
const double tmp2_1 = A_01_1*w4; | ||
const double tmp3_1 = tmp1_0*w0; | ||
const double tmp4_1 = A_01_2*w7; | ||
const double tmp5_1 = tmp2_0*w3; | ||
const double tmp6_1 = tmp3_0*w6; | ||
const double tmp7_1 = A_10_0*w2; | ||
const double tmp8_1 = tmp4_0*w5; | ||
const double tmp9_1 = tmp2_0*w10; | ||
const double tmp14_1 = A_10_0*w8; | ||
const double tmp23_1 = tmp3_0*w14; | ||
const double tmp35_1 = A_01_0*w8; | ||
const double tmp54_1 = tmp13_0*w8; | ||
const double tmp20_1 = tmp9_0*w4; | ||
const double tmp25_1 = tmp12_0*w12; | ||
const double tmp44_1 = tmp7_0*w7; | ||
const double tmp26_1 = tmp10_0*w4; | ||
const double tmp52_1 = tmp18_0*w8; | ||
const double tmp48_1 = A_10_1*w7; | ||
const double tmp46_1 = A_01_3*w8; | ||
const double tmp50_1 = A_01_0*w2; | ||
const double tmp56_1 = tmp19_0*w8; | ||
const double tmp19_1 = A_10_3*w2; | ||
const double tmp47_1 = A_10_2*w4; | ||
const double tmp16_1 = tmp3_0*w0; | ||
const double tmp18_1 = tmp1_0*w6; | ||
const double tmp31_1 = tmp11_0*w12; | ||
const double tmp55_1 = tmp15_0*w2; | ||
const double tmp39_1 = A_10_2*w7; | ||
const double tmp11_1 = tmp6_0*w7; | ||
const double tmp40_1 = tmp11_0*w17; | ||
const double tmp34_1 = tmp15_0*w8; | ||
const double tmp33_1 = tmp14_0*w5; | ||
const double tmp24_1 = tmp11_0*w13; | ||
const double tmp43_1 = tmp17_0*w5; | ||
const double tmp15_1 = A_01_2*w4; | ||
const double tmp53_1 = tmp19_0*w2; | ||
const double tmp27_1 = tmp3_0*w11; | ||
const double tmp32_1 = tmp13_0*w2; | ||
const double tmp10_1 = tmp5_0*w9; | ||
const double tmp37_1 = A_10_1*w4; | ||
const double tmp38_1 = tmp5_0*w15; | ||
const double tmp17_1 = A_01_1*w7; | ||
const double tmp12_1 = tmp7_0*w4; | ||
const double tmp22_1 = tmp10_0*w7; | ||
const double tmp57_1 = tmp18_0*w2; | ||
const double tmp28_1 = tmp9_0*w7; | ||
const double tmp29_1 = tmp1_0*w14; | ||
const double tmp51_1 = tmp11_0*w16; | ||
const double tmp42_1 = tmp12_0*w16; | ||
const double tmp49_1 = tmp12_0*w17; | ||
const double tmp21_1 = tmp1_0*w11; | ||
const double tmp45_1 = tmp6_0*w4; | ||
const double tmp13_1 = tmp8_0*w1; | ||
const double tmp36_1 = tmp16_0*w1; | ||
const double tmp41_1 = A_01_3*w2; | ||
const double tmp30_1 = tmp12_0*w13; | ||
EM_S[INDEX2(0,0,4)]+=tmp13_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1 + tmp24_1 + tmp25_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp36_1 + tmp37_1 + tmp39_1 + tmp3_1 + tmp43_1 + tmp46_1 + tmp50_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp0_1 + tmp15_1 + tmp17_1 + tmp1_1 + tmp38_1 + tmp49_1 + tmp51_1 + tmp7_1 + tmp8_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp10_1 + tmp32_1 + tmp33_1 + tmp34_1 + tmp9_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1 + tmp8_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp21_1 + tmp23_1 + tmp30_1 + tmp31_1 + tmp33_1 + tmp56_1 + tmp57_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp10_1 + tmp13_1 + tmp44_1 + tmp45_1 + tmp9_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1 + tmp40_1 + tmp41_1 + tmp42_1 + tmp43_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp36_1 + tmp38_1 + tmp43_1 + tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp9_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp24_1 + tmp25_1 + tmp27_1 + tmp29_1 + tmp33_1 + tmp52_1 + tmp53_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp1_1 + tmp5_1 + tmp8_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp10_1 + tmp33_1 + tmp54_1 + tmp55_1 + tmp9_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp14_1 + tmp19_1 + tmp1_1 + tmp2_1 + tmp38_1 + tmp40_1 + tmp42_1 + tmp4_1 + tmp8_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp16_1 + tmp18_1 + tmp35_1 + tmp36_1 + tmp41_1 + tmp43_1 + tmp47_1 + tmp48_1 + tmp5_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp13_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | ||
} else { // constant data | ||
const double A_00 = A_p[INDEX2(0,0,2)]; | ||
const double A_10 = A_p[INDEX2(1,0,2)]; | ||
const double A_01 = A_p[INDEX2(0,1,2)]; | ||
const double A_11 = A_p[INDEX2(1,1,2)]; | ||
const double tmp0_0 = A_01 + A_10; | ||
const double tmp0_1 = A_00*w18; | ||
const double tmp1_1 = A_01*w19; | ||
const double tmp2_1 = A_10*w20; | ||
const double tmp3_1 = A_11*w21; | ||
const double tmp4_1 = A_00*w22; | ||
const double tmp5_1 = tmp0_0*w19; | ||
const double tmp6_1 = A_11*w23; | ||
const double tmp7_1 = A_11*w25; | ||
const double tmp8_1 = A_00*w24; | ||
const double tmp9_1 = tmp0_0*w20; | ||
const double tmp10_1 = A_01*w20; | ||
const double tmp11_1 = A_11*w27; | ||
const double tmp12_1 = A_00*w26; | ||
const double tmp13_1 = A_10*w19; | ||
EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp7_1 + tmp8_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp4_1 + tmp6_1 + tmp9_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp4_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp4_1 + tmp6_1 + tmp9_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp7_1 + tmp8_1; | ||
} | ||
} | ||
/////////////// | ||
// process B // | ||
/////////////// | ||
if (!B.isEmpty()) { | ||
add_EM_S=true; | ||
const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); | ||
if (B.actsExpanded()) { | ||
const double B_0_0 = B_p[INDEX2(0,0,2)]; | ||
const double B_1_0 = B_p[INDEX2(1,0,2)]; | ||
const double B_0_1 = B_p[INDEX2(0,1,2)]; | ||
const double B_1_1 = B_p[INDEX2(1,1,2)]; | ||
const double B_0_2 = B_p[INDEX2(0,2,2)]; | ||
const double B_1_2 = B_p[INDEX2(1,2,2)]; | ||
const double B_0_3 = B_p[INDEX2(0,3,2)]; | ||
const double B_1_3 = B_p[INDEX2(1,3,2)]; | ||
const double tmp0_0 = B_1_0 + B_1_1; | ||
const double tmp1_0 = B_1_2 + B_1_3; | ||
const double tmp2_0 = B_0_1 + B_0_3; | ||
const double tmp3_0 = B_0_0 + B_0_2; | ||
const double tmp63_1 = B_1_1*w42; | ||
const double tmp79_1 = B_1_1*w40; | ||
const double tmp37_1 = tmp3_0*w35; | ||
const double tmp8_1 = tmp0_0*w32; | ||
const double tmp71_1 = B_0_1*w34; | ||
const double tmp19_1 = B_0_3*w31; | ||
const double tmp15_1 = B_0_3*w34; | ||
const double tmp9_1 = tmp3_0*w34; | ||
const double tmp35_1 = B_1_0*w36; | ||
const double tmp66_1 = B_0_3*w28; | ||
const double tmp28_1 = B_1_0*w42; | ||
const double tmp22_1 = B_1_0*w40; | ||
const double tmp16_1 = B_1_2*w29; | ||
const double tmp6_1 = tmp2_0*w35; | ||
const double tmp55_1 = B_1_3*w40; | ||
const double tmp50_1 = B_1_3*w42; | ||
const double tmp7_1 = tmp1_0*w29; | ||
const double tmp1_1 = tmp1_0*w32; | ||
const double tmp57_1 = B_0_3*w30; | ||
const double tmp18_1 = B_1_1*w32; | ||
const double tmp53_1 = B_1_0*w41; | ||
const double tmp61_1 = B_1_3*w36; | ||
const double tmp27_1 = B_0_3*w38; | ||
const double tmp64_1 = B_0_2*w30; | ||
const double tmp76_1 = B_0_1*w38; | ||
const double tmp39_1 = tmp2_0*w34; | ||
const double tmp62_1 = B_0_1*w31; | ||
const double tmp56_1 = B_0_0*w31; | ||
const double tmp49_1 = B_1_1*w36; | ||
const double tmp2_1 = B_0_2*w31; | ||
const double tmp23_1 = B_0_2*w33; | ||
const double tmp38_1 = B_1_1*w43; | ||
const double tmp74_1 = B_1_2*w41; | ||
const double tmp43_1 = B_1_1*w41; | ||
const double tmp58_1 = B_0_2*w28; | ||
const double tmp67_1 = B_0_0*w33; | ||
const double tmp33_1 = tmp0_0*w39; | ||
const double tmp4_1 = B_0_0*w28; | ||
const double tmp20_1 = B_0_0*w30; | ||
const double tmp13_1 = B_0_2*w38; | ||
const double tmp65_1 = B_1_2*w43; | ||
const double tmp0_1 = tmp0_0*w29; | ||
const double tmp41_1 = tmp3_0*w33; | ||
const double tmp73_1 = B_0_2*w37; | ||
const double tmp69_1 = B_0_0*w38; | ||
const double tmp48_1 = B_1_2*w39; | ||
const double tmp59_1 = B_0_1*w33; | ||
const double tmp17_1 = B_1_3*w41; | ||
const double tmp5_1 = B_0_3*w33; | ||
const double tmp3_1 = B_0_1*w30; | ||
const double tmp21_1 = B_0_1*w28; | ||
const double tmp42_1 = B_1_0*w29; | ||
const double tmp54_1 = B_1_2*w32; | ||
const double tmp60_1 = B_1_0*w39; | ||
const double tmp32_1 = tmp1_0*w36; | ||
const double tmp10_1 = B_0_1*w37; | ||
const double tmp14_1 = B_0_0*w35; | ||
const double tmp29_1 = B_0_1*w35; | ||
const double tmp26_1 = B_1_2*w36; | ||
const double tmp30_1 = B_1_3*w43; | ||
const double tmp70_1 = B_0_2*w35; | ||
const double tmp34_1 = B_1_3*w39; | ||
const double tmp51_1 = B_1_0*w43; | ||
const double tmp31_1 = B_0_2*w34; | ||
const double tmp45_1 = tmp3_0*w28; | ||
const double tmp11_1 = tmp1_0*w39; | ||
const double tmp52_1 = B_1_1*w29; | ||
const double tmp44_1 = B_1_3*w32; | ||
const double tmp25_1 = B_1_1*w39; | ||
const double tmp47_1 = tmp2_0*w33; | ||
const double tmp72_1 = B_1_3*w29; | ||
const double tmp40_1 = tmp2_0*w28; | ||
const double tmp46_1 = B_1_2*w40; | ||
const double tmp36_1 = B_1_2*w42; | ||
const double tmp24_1 = B_0_0*w37; | ||
const double tmp77_1 = B_0_3*w35; | ||
const double tmp68_1 = B_0_3*w37; | ||
const double tmp78_1 = B_0_0*w34; | ||
const double tmp12_1 = tmp0_0*w36; | ||
const double tmp75_1 = B_1_0*w32; | ||
EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp45_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp6_1 + tmp9_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp7_1 + tmp8_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp39_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp11_1 + tmp12_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | ||
} else { // constant data | ||
const double B_0 = B_p[0]; | ||
const double B_1 = B_p[1]; | ||
const double tmp0_1 = B_0*w44; | ||
const double tmp1_1 = B_1*w45; | ||
const double tmp2_1 = B_0*w46; | ||
const double tmp3_1 = B_0*w47; | ||
const double tmp4_1 = B_1*w48; | ||
const double tmp5_1 = B_1*w49; | ||
const double tmp6_1 = B_1*w50; | ||
const double tmp7_1 = B_0*w51; | ||
EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp5_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp3_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp6_1 + tmp7_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp2_1 + tmp4_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp3_1 + tmp5_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp7_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp2_1 + tmp6_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp5_1 + tmp7_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp1_1 + tmp2_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp0_1 + tmp6_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp1_1 + tmp7_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp2_1 + tmp5_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp4_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp3_1 + tmp6_1; | ||
} | ||
} | ||
/////////////// | ||
// process C // | ||
/////////////// | ||
if (!C.isEmpty()) { | ||
add_EM_S=true; | ||
const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); | ||
if (C.actsExpanded()) { | ||
const double C_0_0 = C_p[INDEX2(0,0,2)]; | ||
const double C_1_0 = C_p[INDEX2(1,0,2)]; | ||
const double C_0_1 = C_p[INDEX2(0,1,2)]; | ||
const double C_1_1 = C_p[INDEX2(1,1,2)]; | ||
const double C_0_2 = C_p[INDEX2(0,2,2)]; | ||
const double C_1_2 = C_p[INDEX2(1,2,2)]; | ||
const double C_0_3 = C_p[INDEX2(0,3,2)]; | ||
const double C_1_3 = C_p[INDEX2(1,3,2)]; | ||
const double tmp0_0 = C_1_0 + C_1_1; | ||
const double tmp1_0 = C_1_2 + C_1_3; | ||
const double tmp2_0 = C_0_1 + C_0_3; | ||
const double tmp3_0 = C_0_0 + C_0_2; | ||
const double tmp64_1 = C_0_2*w30; | ||
const double tmp14_1 = C_0_2*w28; | ||
const double tmp19_1 = C_0_3*w31; | ||
const double tmp22_1 = C_1_0*w40; | ||
const double tmp37_1 = tmp3_0*w35; | ||
const double tmp29_1 = C_0_1*w35; | ||
const double tmp73_1 = C_0_2*w37; | ||
const double tmp74_1 = C_1_2*w41; | ||
const double tmp52_1 = C_1_3*w39; | ||
const double tmp25_1 = C_1_1*w39; | ||
const double tmp62_1 = C_0_1*w31; | ||
const double tmp79_1 = C_1_1*w40; | ||
const double tmp43_1 = C_1_1*w36; | ||
const double tmp27_1 = C_0_3*w38; | ||
const double tmp28_1 = C_1_0*w42; | ||
const double tmp63_1 = C_1_1*w42; | ||
const double tmp59_1 = C_0_3*w34; | ||
const double tmp72_1 = C_1_3*w29; | ||
const double tmp40_1 = tmp2_0*w35; | ||
const double tmp13_1 = C_0_3*w30; | ||
const double tmp51_1 = C_1_2*w40; | ||
const double tmp54_1 = C_1_2*w42; | ||
const double tmp12_1 = C_0_0*w31; | ||
const double tmp2_1 = tmp1_0*w32; | ||
const double tmp68_1 = C_0_2*w31; | ||
const double tmp75_1 = C_1_0*w32; | ||
const double tmp49_1 = C_1_1*w41; | ||
const double tmp4_1 = C_0_2*w35; | ||
const double tmp66_1 = C_0_3*w28; | ||
const double tmp56_1 = C_0_1*w37; | ||
const double tmp5_1 = C_0_1*w34; | ||
const double tmp38_1 = tmp2_0*w34; | ||
const double tmp76_1 = C_0_1*w38; | ||
const double tmp21_1 = C_0_1*w28; | ||
const double tmp69_1 = C_0_1*w30; | ||
const double tmp53_1 = C_1_0*w36; | ||
const double tmp42_1 = C_1_2*w39; | ||
const double tmp32_1 = tmp1_0*w29; | ||
const double tmp45_1 = C_1_0*w43; | ||
const double tmp33_1 = tmp0_0*w32; | ||
const double tmp35_1 = C_1_0*w41; | ||
const double tmp26_1 = C_1_2*w36; | ||
const double tmp67_1 = C_0_0*w33; | ||
const double tmp31_1 = C_0_2*w34; | ||
const double tmp20_1 = C_0_0*w30; | ||
const double tmp70_1 = C_0_0*w28; | ||
const double tmp8_1 = tmp0_0*w39; | ||
const double tmp30_1 = C_1_3*w43; | ||
const double tmp0_1 = tmp0_0*w29; | ||
const double tmp17_1 = C_1_3*w41; | ||
const double tmp58_1 = C_0_0*w35; | ||
const double tmp9_1 = tmp3_0*w33; | ||
const double tmp61_1 = C_1_3*w36; | ||
const double tmp41_1 = tmp3_0*w34; | ||
const double tmp50_1 = C_1_3*w32; | ||
const double tmp18_1 = C_1_1*w32; | ||
const double tmp6_1 = tmp1_0*w36; | ||
const double tmp3_1 = C_0_0*w38; | ||
const double tmp34_1 = C_1_1*w29; | ||
const double tmp77_1 = C_0_3*w35; | ||
const double tmp65_1 = C_1_2*w43; | ||
const double tmp71_1 = C_0_3*w33; | ||
const double tmp55_1 = C_1_1*w43; | ||
const double tmp46_1 = tmp3_0*w28; | ||
const double tmp24_1 = C_0_0*w37; | ||
const double tmp10_1 = tmp1_0*w39; | ||
const double tmp48_1 = C_1_0*w29; | ||
const double tmp15_1 = C_0_1*w33; | ||
const double tmp36_1 = C_1_2*w32; | ||
const double tmp60_1 = C_1_0*w39; | ||
const double tmp47_1 = tmp2_0*w33; | ||
const double tmp16_1 = C_1_2*w29; | ||
const double tmp1_1 = C_0_3*w37; | ||
const double tmp7_1 = tmp2_0*w28; | ||
const double tmp39_1 = C_1_3*w40; | ||
const double tmp44_1 = C_1_3*w42; | ||
const double tmp57_1 = C_0_2*w38; | ||
const double tmp78_1 = C_0_0*w34; | ||
const double tmp11_1 = tmp0_0*w36; | ||
const double tmp23_1 = C_0_2*w33; | ||
EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp2_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp7_1 + tmp9_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp6_1 + tmp8_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp38_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp10_1 + tmp11_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | ||
} else { // constant data | ||
const double C_0 = C_p[0]; | ||
const double C_1 = C_p[1]; | ||
const double tmp0_1 = C_0*w47; | ||
const double tmp1_1 = C_1*w45; | ||
const double tmp2_1 = C_1*w48; | ||
const double tmp3_1 = C_0*w51; | ||
const double tmp4_1 = C_0*w44; | ||
const double tmp5_1 = C_1*w49; | ||
const double tmp6_1 = C_1*w50; | ||
const double tmp7_1 = C_0*w46; | ||
EM_S[INDEX2(0,0,4)]+=tmp4_1 + tmp5_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp4_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp3_1 + tmp5_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp1_1 + tmp3_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp0_1 + tmp5_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp1_1 + tmp7_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp5_1 + tmp7_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp3_1 + tmp6_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp4_1 + tmp6_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp2_1 + tmp4_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp2_1 + tmp7_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp6_1 + tmp7_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp2_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp6_1; | ||
} | ||
} | ||
/////////////// | ||
// process D // | ||
/////////////// | ||
if (!D.isEmpty()) { | ||
add_EM_S=true; | ||
const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); | ||
if (D.actsExpanded()) { | ||
const double D_0 = D_p[0]; | ||
const double D_1 = D_p[1]; | ||
const double D_2 = D_p[2]; | ||
const double D_3 = D_p[3]; | ||
const double tmp0_0 = D_0 + D_1; | ||
const double tmp1_0 = D_2 + D_3; | ||
const double tmp2_0 = D_0 + D_1 + D_2 + D_3; | ||
const double tmp3_0 = D_1 + D_2; | ||
const double tmp4_0 = D_1 + D_3; | ||
const double tmp5_0 = D_0 + D_2; | ||
const double tmp6_0 = D_0 + D_3; | ||
const double tmp0_1 = tmp0_0*w52; | ||
const double tmp1_1 = tmp1_0*w53; | ||
const double tmp2_1 = tmp2_0*w54; | ||
const double tmp3_1 = tmp1_0*w52; | ||
const double tmp4_1 = tmp0_0*w53; | ||
const double tmp5_1 = tmp3_0*w54; | ||
const double tmp6_1 = D_0*w55; | ||
const double tmp7_1 = D_3*w56; | ||
const double tmp8_1 = D_3*w55; | ||
const double tmp9_1 = D_0*w56; | ||
const double tmp10_1 = tmp4_0*w52; | ||
const double tmp11_1 = tmp5_0*w53; | ||
const double tmp12_1 = tmp5_0*w52; | ||
const double tmp13_1 = tmp4_0*w53; | ||
const double tmp14_1 = tmp6_0*w54; | ||
const double tmp15_1 = D_2*w55; | ||
const double tmp16_1 = D_1*w56; | ||
const double tmp17_1 = D_1*w55; | ||
const double tmp18_1 = D_2*w56; | ||
EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp6_1 + tmp7_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp12_1 + tmp13_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp2_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp14_1 + tmp17_1 + tmp18_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp2_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp12_1 + tmp13_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp2_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp2_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp10_1 + tmp11_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp3_1 + tmp4_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp8_1 + tmp9_1; | ||
} else { // constant data | ||
const double tmp0_1 = D_p[0]*w57; | ||
const double tmp1_1 = D_p[0]*w58; | ||
const double tmp2_1 = D_p[0]*w59; | ||
EM_S[INDEX2(0,0,4)]+=tmp2_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp0_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp0_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp1_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp2_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp1_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp0_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp0_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp1_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp2_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp0_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp1_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp0_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp2_1; | ||
} | ||
} | ||
/////////////// | ||
// process X // | ||
/////////////// | ||
if (!X.isEmpty()) { | ||
add_EM_F=true; | ||
const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); | ||
if (X.actsExpanded()) { | ||
const double X_0_0 = X_p[INDEX2(0,0,2)]; | ||
const double X_1_0 = X_p[INDEX2(1,0,2)]; | ||
const double X_0_1 = X_p[INDEX2(0,1,2)]; | ||
const double X_1_1 = X_p[INDEX2(1,1,2)]; | ||
const double X_0_2 = X_p[INDEX2(0,2,2)]; | ||
const double X_1_2 = X_p[INDEX2(1,2,2)]; | ||
const double X_0_3 = X_p[INDEX2(0,3,2)]; | ||
const double X_1_3 = X_p[INDEX2(1,3,2)]; | ||
const double tmp0_0 = X_0_2 + X_0_3; | ||
const double tmp1_0 = X_1_1 + X_1_3; | ||
const double tmp2_0 = X_1_0 + X_1_2; | ||
const double tmp3_0 = X_0_0 + X_0_1; | ||
const double tmp0_1 = tmp0_0*w63; | ||
const double tmp1_1 = tmp1_0*w62; | ||
const double tmp2_1 = tmp2_0*w61; | ||
const double tmp3_1 = tmp3_0*w60; | ||
const double tmp4_1 = tmp0_0*w65; | ||
const double tmp5_1 = tmp3_0*w64; | ||
const double tmp6_1 = tmp2_0*w62; | ||
const double tmp7_1 = tmp1_0*w61; | ||
const double tmp8_1 = tmp2_0*w66; | ||
const double tmp9_1 = tmp3_0*w63; | ||
const double tmp10_1 = tmp0_0*w60; | ||
const double tmp11_1 = tmp1_0*w67; | ||
const double tmp12_1 = tmp1_0*w66; | ||
const double tmp13_1 = tmp3_0*w65; | ||
const double tmp14_1 = tmp0_0*w64; | ||
const double tmp15_1 = tmp2_0*w67; | ||
EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
EM_F[1]+=tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1; | ||
EM_F[2]+=tmp10_1 + tmp11_1 + tmp8_1 + tmp9_1; | ||
EM_F[3]+=tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | ||
} else { // constant data | ||
const double tmp0_1 = X_p[1]*w69; | ||
const double tmp1_1 = X_p[0]*w68; | ||
const double tmp2_1 = X_p[0]*w70; | ||
const double tmp3_1 = X_p[1]*w71; | ||
EM_F[0]+=tmp0_1 + tmp1_1; | ||
EM_F[1]+=tmp0_1 + tmp2_1; | ||
EM_F[2]+=tmp1_1 + tmp3_1; | ||
EM_F[3]+=tmp2_1 + tmp3_1; | ||
} | ||
} | ||
/////////////// | ||
// process Y // | ||
/////////////// | ||
if (!Y.isEmpty()) { | ||
add_EM_F=true; | ||
const double* Y_p=const_cast<escript::Data*>(&Y)->getSampleDataRO(e); | ||
if (Y.actsExpanded()) { | ||
const double Y_0 = Y_p[0]; | ||
const double Y_1 = Y_p[1]; | ||
const double Y_2 = Y_p[2]; | ||
const double Y_3 = Y_p[3]; | ||
const double tmp0_0 = Y_1 + Y_2; | ||
const double tmp1_0 = Y_0 + Y_3; | ||
const double tmp0_1 = Y_0*w72; | ||
const double tmp1_1 = tmp0_0*w73; | ||
const double tmp2_1 = Y_3*w74; | ||
const double tmp3_1 = Y_1*w72; | ||
const double tmp4_1 = tmp1_0*w73; | ||
const double tmp5_1 = Y_2*w74; | ||
const double tmp6_1 = Y_2*w72; | ||
const double tmp7_1 = Y_1*w74; | ||
const double tmp8_1 = Y_3*w72; | ||
const double tmp9_1 = Y_0*w74; | ||
EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1; | ||
EM_F[1]+=tmp3_1 + tmp4_1 + tmp5_1; | ||
EM_F[2]+=tmp4_1 + tmp6_1 + tmp7_1; | ||
EM_F[3]+=tmp1_1 + tmp8_1 + tmp9_1; | ||
} else { // constant data | ||
const double tmp0_1 = Y_p[0]*w75; | ||
EM_F[0]+=tmp0_1; | ||
EM_F[1]+=tmp0_1; | ||
EM_F[2]+=tmp0_1; | ||
EM_F[3]+=tmp0_1; | ||
} | ||
} | ||
2013 | ||
2014 | // add to matrix (if add_EM_S) and RHS (if add_EM_F) | namespace |
2015 | const index_t firstNode=m_N0*k1+k0; | { |
2016 | addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, firstNode); | // Calculates a guassian blur colvolution matrix for 2D |
2017 | } // end k0 loop | // See wiki article on the subject |
2018 | } // end k1 loop | double* get2DGauss(unsigned radius, double sigma) |
2019 | } // end of colouring | { |
2020 | } // end of parallel region | double* arr=new double[(radius*2+1)*(radius*2+1)]; |
2021 | double common=M_1_PI*0.5*1/(sigma*sigma); | |
2022 | double total=0; | |
2023 | int r=static_cast<int>(radius); | |
2024 | for (int y=-r;y<=r;++y) | |
2025 | { | |
2026 | for (int x=-r;x<=r;++x) | |
2027 | { | |
2028 | arr[(x+r)+(y+r)*(r*2+1)]=common*exp(-(x*x+y*y)/(2*sigma*sigma)); | |
2029 | total+=arr[(x+r)+(y+r)*(r*2+1)]; | |
2030 | } | |
2031 | } | |
2032 | double invtotal=1/total; | |
2033 | for (size_t p=0;p<(radius*2+1)*(radius*2+1);++p) | |
2034 | { | |
2035 | arr[p]*=invtotal; | |
2036 | } | |
2037 | return arr; | |
2038 | } | |
2039 | ||
2040 | // applies conv to source to get a point. | |
2041 | // (xp, yp) are the coords in the source matrix not the destination matrix | |
2042 | double Convolve2D(double* conv, double* source, size_t xp, size_t yp, unsigned radius, size_t width) | |
2043 | { | |
2044 | size_t bx=xp-radius, by=yp-radius; | |
2045 | size_t sbase=bx+by*width; | |
2046 | double result=0; | |
2047 | for (int y=0;y<2*radius+1;++y) | |
2048 | { | |
2049 | for (int x=0;x<2*radius+1;++x) | |
2050 | { | |
2051 | result+=conv[x+y*(2*radius+1)] * source[sbase + x+y*width]; | |
2052 | } | |
2053 | } | |
2054 | return result; | |
2055 | } | |
2056 | } | } |
2057 | ||
//protected | ||
void Rectangle::assemblePDESingleReduced(Paso_SystemMatrix* mat, | ||
escript::Data& rhs, const escript::Data& A, const escript::Data& B, | ||
const escript::Data& C, const escript::Data& D, | ||
const escript::Data& X, const escript::Data& Y) const | ||
{ | ||
const double h0 = m_l0/m_gNE0; | ||
const double h1 = m_l1/m_gNE1; | ||
const double w0 = -.25*h1/h0; | ||
const double w1 = .25; | ||
const double w2 = -.25; | ||
const double w3 = .25*h0/h1; | ||
const double w4 = -.25*h0/h1; | ||
const double w5 = .25*h1/h0; | ||
const double w6 = -.125*h1; | ||
const double w7 = -.125*h0; | ||
const double w8 = .125*h1; | ||
const double w9 = .125*h0; | ||
const double w10 = .0625*h0*h1; | ||
const double w11 = -.5*h1; | ||
const double w12 = -.5*h0; | ||
const double w13 = .5*h1; | ||
const double w14 = .5*h0; | ||
const double w15 = .25*h0*h1; | ||
2058 | ||
2059 | rhs.requireWrite(); | /* This is a wrapper for filtered (and non-filtered) randoms |
2060 | #pragma omp parallel | * For detailed doco see randomFillWorker |
2061 | */ | |
2062 | escript::Data Rectangle::randomFill(const escript::DataTypes::ShapeType& shape, | |
2063 | const escript::FunctionSpace& what, | |
2064 | long seed, const boost::python::tuple& filter) const | |
2065 | { | |
2066 | int numvals=escript::DataTypes::noValues(shape); | |
2067 | if (len(filter)>0 && (numvals!=1)) | |
2068 | { | { |
2069 | for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring | throw RipleyException("Ripley only supports filters for scalar data."); |
2070 | #pragma omp for | } |
2071 | for (index_t k1=k1_0; k1<m_NE1; k1+=2) { | escript::Data res=randomFillWorker(shape, seed, filter); |
2072 | for (index_t k0=0; k0<m_NE0; ++k0) { | if (res.getFunctionSpace()!=what) |
2073 | bool add_EM_S=false; | { |
2074 | bool add_EM_F=false; | escript::Data r=escript::Data(res, what); |
2075 | vector<double> EM_S(4*4, 0); | return r; |
2076 | vector<double> EM_F(4, 0); | } |
2077 | const index_t e = k0 + m_NE0*k1; | return res; |
/////////////// | ||
// process A // | ||
/////////////// | ||
if (!A.isEmpty()) { | ||
add_EM_S=true; | ||
const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); | ||
const double A_00 = A_p[INDEX2(0,0,2)]; | ||
const double A_10 = A_p[INDEX2(1,0,2)]; | ||
const double A_01 = A_p[INDEX2(0,1,2)]; | ||
const double A_11 = A_p[INDEX2(1,1,2)]; | ||
const double tmp0_0 = A_01 + A_10; | ||
const double tmp0_1 = A_11*w3; | ||
const double tmp1_1 = A_00*w0; | ||
const double tmp2_1 = A_01*w1; | ||
const double tmp3_1 = A_10*w2; | ||
const double tmp4_1 = tmp0_0*w1; | ||
const double tmp5_1 = A_11*w4; | ||
const double tmp6_1 = A_00*w5; | ||
const double tmp7_1 = tmp0_0*w2; | ||
const double tmp8_1 = A_10*w1; | ||
const double tmp9_1 = A_01*w2; | ||
EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp4_1 + tmp6_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp2_1 + tmp3_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp1_1 + tmp5_1 + tmp7_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp0_1 + tmp6_1 + tmp7_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp1_1 + tmp4_1 + tmp5_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp5_1 + tmp6_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp5_1 + tmp6_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp1_1 + tmp4_1 + tmp5_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp0_1 + tmp6_1 + tmp7_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp1_1 + tmp5_1 + tmp7_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp2_1 + tmp3_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp1_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp4_1 + tmp6_1; | ||
} | ||
/////////////// | ||
// process B // | ||
/////////////// | ||
if (!B.isEmpty()) { | ||
add_EM_S=true; | ||
const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); | ||
const double tmp2_1 = B_p[0]*w8; | ||
const double tmp0_1 = B_p[0]*w6; | ||
const double tmp3_1 = B_p[1]*w9; | ||
const double tmp1_1 = B_p[1]*w7; | ||
EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp2_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp0_1 + tmp3_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp1_1 + tmp2_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp0_1 + tmp3_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp1_1 + tmp2_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp0_1 + tmp3_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp1_1 + tmp2_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp3_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp2_1 + tmp3_1; | ||
} | ||
/////////////// | ||
// process C // | ||
/////////////// | ||
if (!C.isEmpty()) { | ||
add_EM_S=true; | ||
const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); | ||
const double tmp1_1 = C_p[1]*w7; | ||
const double tmp0_1 = C_p[0]*w8; | ||
const double tmp3_1 = C_p[0]*w6; | ||
const double tmp2_1 = C_p[1]*w9; | ||
EM_S[INDEX2(0,0,4)]+=tmp1_1 + tmp3_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp3_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp1_1 + tmp3_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp1_1 + tmp3_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp0_1 + tmp2_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp0_1 + tmp2_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp2_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp2_1; | ||
} | ||
/////////////// | ||
// process D // | ||
/////////////// | ||
if (!D.isEmpty()) { | ||
add_EM_S=true; | ||
const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); | ||
const double tmp0_1 = D_p[0]*w10; | ||
EM_S[INDEX2(0,0,4)]+=tmp0_1; | ||
EM_S[INDEX2(1,0,4)]+=tmp0_1; | ||
EM_S[INDEX2(2,0,4)]+=tmp0_1; | ||
EM_S[INDEX2(3,0,4)]+=tmp0_1; | ||
EM_S[INDEX2(0,1,4)]+=tmp0_1; | ||
EM_S[INDEX2(1,1,4)]+=tmp0_1; | ||
EM_S[INDEX2(2,1,4)]+=tmp0_1; | ||
EM_S[INDEX2(3,1,4)]+=tmp0_1; | ||
EM_S[INDEX2(0,2,4)]+=tmp0_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp0_1; | ||
EM_S[INDEX2(2,2,4)]+=tmp0_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp0_1; | ||
EM_S[INDEX2(0,3,4)]+=tmp0_1; | ||
EM_S[INDEX2(1,3,4)]+=tmp0_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1; | ||
EM_S[INDEX2(3,3,4)]+=tmp0_1; | ||
} | ||
/////////////// | ||
// process X // | ||
/////////////// | ||
if (!X.isEmpty()) { | ||
add_EM_F=true; | ||
const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); | ||
const double tmp0_1 = X_p[0]*w11; | ||
const double tmp2_1 = X_p[0]*w13; | ||
const double tmp1_1 = X_p[1]*w12; | ||
const double tmp3_1 = X_p[1]*w14; | ||
EM_F[0]+=tmp0_1 + tmp1_1; | ||
EM_F[1]+=tmp1_1 + tmp2_1; | ||
EM_F[2]+=tmp0_1 + tmp3_1; | ||
EM_F[3]+=tmp2_1 + tmp3_1; | ||
} | ||
/////////////// | ||
// process Y // | ||
/////////////// | ||
if (!Y.isEmpty()) { | ||
add_EM_F=true; | ||
const double* Y_p=const_cast<escript::Data*>(&Y)->getSampleDataRO(e); | ||
const double tmp0_1 = Y_p[0]*w15; | ||
EM_F[0]+=tmp0_1; | ||
EM_F[1]+=tmp0_1; | ||
EM_F[2]+=tmp0_1; | ||
EM_F[3]+=tmp0_1; | ||
} | ||
// add to matrix (if add_EM_S) and RHS (if add_EM_F) | ||
const index_t firstNode=m_N0*k1+k0; | ||
addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, firstNode); | ||
} // end k0 loop | ||
} // end k1 loop | ||
} // end of colouring | ||
} // end of parallel region | ||
2078 | } | } |
2079 | ||
//protected | ||
void Rectangle::assemblePDESystem(Paso_SystemMatrix* mat, | ||
escript::Data& rhs, const escript::Data& A, const escript::Data& B, | ||
const escript::Data& C, const escript::Data& D, | ||
const escript::Data& X, const escript::Data& Y) const | ||
{ | ||
const double h0 = m_l0/m_gNE0; | ||
const double h1 = m_l1/m_gNE1; | ||
dim_t numEq, numComp; | ||
if (!mat) | ||
numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize()); | ||
else { | ||
numEq=mat->logical_row_block_size; | ||
numComp=mat->logical_col_block_size; | ||
} | ||
const double w0 = -0.1555021169820365539*h1/h0; | ||
const double w1 = 0.041666666666666666667; | ||
const double w2 = -0.15550211698203655390; | ||
const double w3 = 0.041666666666666666667*h0/h1; | ||
const double w4 = 0.15550211698203655390; | ||
const double w5 = -0.041666666666666666667; | ||
const double w6 = -0.01116454968463011277*h1/h0; | ||
const double w7 = 0.011164549684630112770; | ||
const double w8 = -0.011164549684630112770; | ||
const double w9 = -0.041666666666666666667*h1/h0; | ||
const double w10 = -0.041666666666666666667*h0/h1; | ||
const double w11 = 0.1555021169820365539*h1/h0; | ||
const double w12 = 0.1555021169820365539*h0/h1; | ||
const double w13 = 0.01116454968463011277*h0/h1; | ||
const double w14 = 0.01116454968463011277*h1/h0; | ||
const double w15 = 0.041666666666666666667*h1/h0; | ||
const double w16 = -0.01116454968463011277*h0/h1; | ||
const double w17 = -0.1555021169820365539*h0/h1; | ||
const double w18 = -0.33333333333333333333*h1/h0; | ||
const double w19 = 0.25000000000000000000; | ||
const double w20 = -0.25000000000000000000; | ||
const double w21 = 0.16666666666666666667*h0/h1; | ||
const double w22 = -0.16666666666666666667*h1/h0; | ||
const double w23 = -0.16666666666666666667*h0/h1; | ||
const double w24 = 0.33333333333333333333*h1/h0; | ||
const double w25 = 0.33333333333333333333*h0/h1; | ||
const double w26 = 0.16666666666666666667*h1/h0; | ||
const double w27 = -0.33333333333333333333*h0/h1; | ||
const double w28 = -0.032861463941450536761*h1; | ||
const double w29 = -0.032861463941450536761*h0; | ||
const double w30 = -0.12264065304058601714*h1; | ||
const double w31 = -0.0023593469594139828636*h1; | ||
const double w32 = -0.008805202725216129906*h0; | ||
const double w33 = -0.008805202725216129906*h1; | ||
const double w34 = 0.032861463941450536761*h1; | ||
const double w35 = 0.008805202725216129906*h1; | ||
const double w36 = 0.008805202725216129906*h0; | ||
const double w37 = 0.0023593469594139828636*h1; | ||
const double w38 = 0.12264065304058601714*h1; | ||
const double w39 = 0.032861463941450536761*h0; | ||
const double w40 = -0.12264065304058601714*h0; | ||
const double w41 = -0.0023593469594139828636*h0; | ||
const double w42 = 0.0023593469594139828636*h0; | ||
const double w43 = 0.12264065304058601714*h0; | ||
const double w44 = -0.16666666666666666667*h1; | ||
const double w45 = -0.083333333333333333333*h0; | ||
const double w46 = 0.083333333333333333333*h1; | ||
const double w47 = 0.16666666666666666667*h1; | ||
const double w48 = 0.083333333333333333333*h0; | ||
const double w49 = -0.16666666666666666667*h0; | ||
const double w50 = 0.16666666666666666667*h0; | ||
const double w51 = -0.083333333333333333333*h1; | ||
const double w52 = 0.025917019497006092316*h0*h1; | ||
const double w53 = 0.0018607582807716854616*h0*h1; | ||
const double w54 = 0.0069444444444444444444*h0*h1; | ||
const double w55 = 0.09672363354357992482*h0*h1; | ||
const double w56 = 0.00049858867864229740201*h0*h1; | ||
const double w57 = 0.055555555555555555556*h0*h1; | ||
const double w58 = 0.027777777777777777778*h0*h1; | ||
const double w59 = 0.11111111111111111111*h0*h1; | ||
const double w60 = -0.19716878364870322056*h1; | ||
const double w61 = -0.19716878364870322056*h0; | ||
const double w62 = -0.052831216351296779436*h0; | ||
const double w63 = -0.052831216351296779436*h1; | ||
const double w64 = 0.19716878364870322056*h1; | ||
const double w65 = 0.052831216351296779436*h1; | ||
const double w66 = 0.19716878364870322056*h0; | ||
const double w67 = 0.052831216351296779436*h0; | ||
const double w68 = -0.5*h1; | ||
const double w69 = -0.5*h0; | ||
const double w70 = 0.5*h1; | ||
const double w71 = 0.5*h0; | ||
const double w72 = 0.1555021169820365539*h0*h1; | ||
const double w73 = 0.041666666666666666667*h0*h1; | ||
const double w74 = 0.01116454968463011277*h0*h1; | ||
const double w75 = 0.25*h0*h1; | ||
2080 | ||
2081 | rhs.requireWrite(); | /* This routine produces a Data object filled with smoothed random data. |
2082 | #pragma omp parallel | The dimensions of the rectangle being filled are internal[0] x internal[1] points. |
2083 | { | A parameter radius gives the size of the stencil used for the smoothing. |
2084 | for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring | A point on the left hand edge for example, will still require `radius` extra points to the left |
2085 | #pragma omp for | in order to complete the stencil. |
for (index_t k1=k1_0; k1<m_NE1; k1+=2) { | ||
for (index_t k0=0; k0<m_NE0; ++k0) { | ||
bool add_EM_S=false; | ||
bool add_EM_F=false; | ||
vector<double> EM_S(4*4*numEq*numComp, 0); | ||
vector<double> EM_F(4*numEq, 0); | ||
const index_t e = k0 + m_NE0*k1; | ||
/////////////// | ||
// process A // | ||
/////////////// | ||
if (!A.isEmpty()) { | ||
add_EM_S=true; | ||
const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); | ||
if (A.actsExpanded()) { | ||
for (index_t k=0; k<numEq; k++) { | ||
for (index_t m=0; m<numComp; m++) { | ||
const double A_00_0 = A_p[INDEX5(k,0,m,0,0, numEq,2,numComp,2)]; | ||
const double A_01_0 = A_p[INDEX5(k,0,m,1,0, numEq,2,numComp,2)]; | ||
const double A_10_0 = A_p[INDEX5(k,1,m,0,0, numEq,2,numComp,2)]; | ||
const double A_11_0 = A_p[INDEX5(k,1,m,1,0, numEq,2,numComp,2)]; | ||
const double A_00_1 = A_p[INDEX5(k,0,m,0,1, numEq,2,numComp,2)]; | ||
const double A_01_1 = A_p[INDEX5(k,0,m,1,1, numEq,2,numComp,2)]; | ||
const double A_10_1 = A_p[INDEX5(k,1,m,0,1, numEq,2,numComp,2)]; | ||
const double A_11_1 = A_p[INDEX5(k,1,m,1,1, numEq,2,numComp,2)]; | ||
const double A_00_2 = A_p[INDEX5(k,0,m,0,2, numEq,2,numComp,2)]; | ||
const double A_01_2 = A_p[INDEX5(k,0,m,1,2, numEq,2,numComp,2)]; | ||
const double A_10_2 = A_p[INDEX5(k,1,m,0,2, numEq,2,numComp,2)]; | ||
const double A_11_2 = A_p[INDEX5(k,1,m,1,2, numEq,2,numComp,2)]; | ||
const double A_00_3 = A_p[INDEX5(k,0,m,0,3, numEq,2,numComp,2)]; | ||
const double A_01_3 = A_p[INDEX5(k,0,m,1,3, numEq,2,numComp,2)]; | ||
const double A_10_3 = A_p[INDEX5(k,1,m,0,3, numEq,2,numComp,2)]; | ||
const double A_11_3 = A_p[INDEX5(k,1,m,1,3, numEq,2,numComp,2)]; | ||
const double tmp0_0 = A_01_0 + A_01_3; | ||
const double tmp1_0 = A_00_0 + A_00_1; | ||
const double tmp2_0 = A_11_0 + A_11_1 + A_11_2 + A_11_3; | ||
const double tmp3_0 = A_00_2 + A_00_3; | ||
const double tmp4_0 = A_10_1 + A_10_2; | ||
const double tmp5_0 = A_00_0 + A_00_1 + A_00_2 + A_00_3; | ||
const double tmp6_0 = A_01_3 + A_10_0; | ||
const double tmp7_0 = A_01_0 + A_10_3; | ||
const double tmp8_0 = A_01_1 + A_01_2 + A_10_1 + A_10_2; | ||
const double tmp9_0 = A_01_0 + A_10_0; | ||
const double tmp10_0 = A_01_3 + A_10_3; | ||
const double tmp11_0 = A_11_1 + A_11_3; | ||
const double tmp12_0 = A_11_0 + A_11_2; | ||
const double tmp13_0 = A_01_2 + A_10_1; | ||
const double tmp14_0 = A_01_0 + A_01_3 + A_10_0 + A_10_3; | ||
const double tmp15_0 = A_01_1 + A_10_2; | ||
const double tmp16_0 = A_10_0 + A_10_3; | ||
const double tmp17_0 = A_01_1 + A_01_2; | ||
const double tmp18_0 = A_01_1 + A_10_1; | ||
const double tmp19_0 = A_01_2 + A_10_2; | ||
const double tmp0_1 = A_10_3*w8; | ||
const double tmp1_1 = tmp0_0*w1; | ||
const double tmp2_1 = A_01_1*w4; | ||
const double tmp3_1 = tmp1_0*w0; | ||
const double tmp4_1 = A_01_2*w7; | ||
const double tmp5_1 = tmp2_0*w3; | ||
const double tmp6_1 = tmp3_0*w6; | ||
const double tmp7_1 = A_10_0*w2; | ||
const double tmp8_1 = tmp4_0*w5; | ||
const double tmp9_1 = tmp2_0*w10; | ||
const double tmp10_1 = tmp5_0*w9; | ||
const double tmp11_1 = tmp6_0*w7; | ||
const double tmp12_1 = tmp7_0*w4; | ||
const double tmp13_1 = tmp8_0*w1; | ||
const double tmp14_1 = A_10_0*w8; | ||
const double tmp15_1 = A_01_2*w4; | ||
const double tmp16_1 = tmp3_0*w0; | ||
const double tmp17_1 = A_01_1*w7; | ||
const double tmp18_1 = tmp1_0*w6; | ||
const double tmp19_1 = A_10_3*w2; | ||
const double tmp20_1 = tmp9_0*w4; | ||
const double tmp21_1 = tmp1_0*w11; | ||
const double tmp22_1 = tmp10_0*w7; | ||
const double tmp23_1 = tmp3_0*w14; | ||
const double tmp24_1 = tmp11_0*w13; | ||
const double tmp25_1 = tmp12_0*w12; | ||
const double tmp26_1 = tmp10_0*w4; | ||
const double tmp27_1 = tmp3_0*w11; | ||
const double tmp28_1 = tmp9_0*w7; | ||
const double tmp29_1 = tmp1_0*w14; | ||
const double tmp30_1 = tmp12_0*w13; | ||
const double tmp31_1 = tmp11_0*w12; | ||
const double tmp32_1 = tmp13_0*w2; | ||
const double tmp33_1 = tmp14_0*w5; | ||
const double tmp34_1 = tmp15_0*w8; | ||
const double tmp35_1 = A_01_0*w8; | ||
const double tmp36_1 = tmp16_0*w1; | ||
const double tmp37_1 = A_10_1*w4; | ||
const double tmp38_1 = tmp5_0*w15; | ||
const double tmp39_1 = A_10_2*w7; | ||
const double tmp40_1 = tmp11_0*w17; | ||
const double tmp41_1 = A_01_3*w2; | ||
const double tmp42_1 = tmp12_0*w16; | ||
const double tmp43_1 = tmp17_0*w5; | ||
const double tmp44_1 = tmp7_0*w7; | ||
const double tmp45_1 = tmp6_0*w4; | ||
const double tmp46_1 = A_01_3*w8; | ||
const double tmp47_1 = A_10_2*w4; | ||
const double tmp48_1 = A_10_1*w7; | ||
const double tmp49_1 = tmp12_0*w17; | ||
const double tmp50_1 = A_01_0*w2; | ||
const double tmp51_1 = tmp11_0*w16; | ||
const double tmp52_1 = tmp18_0*w8; | ||
const double tmp53_1 = tmp19_0*w2; | ||
const double tmp54_1 = tmp13_0*w8; | ||
const double tmp55_1 = tmp15_0*w2; | ||
const double tmp56_1 = tmp19_0*w8; | ||
const double tmp57_1 = tmp18_0*w2; | ||
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp13_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1 + tmp24_1 + tmp25_1; | ||
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp36_1 + tmp37_1 + tmp39_1 + tmp3_1 + tmp43_1 + tmp46_1 + tmp50_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp0_1 + tmp15_1 + tmp17_1 + tmp1_1 + tmp38_1 + tmp49_1 + tmp51_1 + tmp7_1 + tmp8_1; | ||
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp10_1 + tmp32_1 + tmp33_1 + tmp34_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1 + tmp8_1; | ||
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp21_1 + tmp23_1 + tmp30_1 + tmp31_1 + tmp33_1 + tmp56_1 + tmp57_1; | ||
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp10_1 + tmp13_1 + tmp44_1 + tmp45_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1 + tmp40_1 + tmp41_1 + tmp42_1 + tmp43_1; | ||
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp36_1 + tmp38_1 + tmp43_1 + tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | ||
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp24_1 + tmp25_1 + tmp27_1 + tmp29_1 + tmp33_1 + tmp52_1 + tmp53_1; | ||
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp14_1 + tmp15_1 + tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp1_1 + tmp5_1 + tmp8_1; | ||
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp10_1 + tmp33_1 + tmp54_1 + tmp55_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp14_1 + tmp19_1 + tmp1_1 + tmp2_1 + tmp38_1 + tmp40_1 + tmp42_1 + tmp4_1 + tmp8_1; | ||
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp16_1 + tmp18_1 + tmp35_1 + tmp36_1 + tmp41_1 + tmp43_1 + tmp47_1 + tmp48_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp13_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | ||
} | ||
} | ||
} else { // constant data | ||
for (index_t k=0; k<numEq; k++) { | ||
for (index_t m=0; m<numComp; m++) { | ||
const double A_00 = A_p[INDEX4(k,0,m,0, numEq,2, numComp)]; | ||
const double A_01 = A_p[INDEX4(k,0,m,1, numEq,2, numComp)]; | ||
const double A_10 = A_p[INDEX4(k,1,m,0, numEq,2, numComp)]; | ||
const double A_11 = A_p[INDEX4(k,1,m,1, numEq,2, numComp)]; | ||
const double tmp0_0 = A_01 + A_10; | ||
const double tmp0_1 = A_00*w18; | ||
const double tmp1_1 = A_01*w19; | ||
const double tmp2_1 = A_10*w20; | ||
const double tmp3_1 = A_11*w21; | ||
const double tmp4_1 = A_00*w22; | ||
const double tmp5_1 = tmp0_0*w19; | ||
const double tmp6_1 = A_11*w23; | ||
const double tmp7_1 = A_11*w25; | ||
const double tmp8_1 = A_00*w24; | ||
const double tmp9_1 = tmp0_0*w20; | ||
const double tmp10_1 = A_01*w20; | ||
const double tmp11_1 = A_11*w27; | ||
const double tmp12_1 = A_00*w26; | ||
const double tmp13_1 = A_10*w19; | ||
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp5_1 + tmp7_1 + tmp8_1; | ||
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; | ||
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; | ||
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp4_1 + tmp6_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp4_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; | ||
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; | ||
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp4_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp4_1 + tmp6_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; | ||
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; | ||
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp5_1 + tmp7_1 + tmp8_1; | ||
} | ||
} | ||
} | ||
} | ||
/////////////// | ||
// process B // | ||
/////////////// | ||
if (!B.isEmpty()) { | ||
add_EM_S=true; | ||
const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); | ||
if (B.actsExpanded()) { | ||
for (index_t k=0; k<numEq; k++) { | ||
for (index_t m=0; m<numComp; m++) { | ||
const double B_0_0 = B_p[INDEX4(k,0,m,0, numEq,2,numComp)]; | ||
const double B_1_0 = B_p[INDEX4(k,1,m,0, numEq,2,numComp)]; | ||
const double B_0_1 = B_p[INDEX4(k,0,m,1, numEq,2,numComp)]; | ||
const double B_1_1 = B_p[INDEX4(k,1,m,1, numEq,2,numComp)]; | ||
const double B_0_2 = B_p[INDEX4(k,0,m,2, numEq,2,numComp)]; | ||
const double B_1_2 = B_p[INDEX4(k,1,m,2, numEq,2,numComp)]; | ||
const double B_0_3 = B_p[INDEX4(k,0,m,3, numEq,2,numComp)]; | ||
const double B_1_3 = B_p[INDEX4(k,1,m,3, numEq,2,numComp)]; | ||
const double tmp0_0 = B_1_0 + B_1_1; | ||
const double tmp1_0 = B_1_2 + B_1_3; | ||
const double tmp2_0 = B_0_1 + B_0_3; | ||
const double tmp3_0 = B_0_0 + B_0_2; | ||
const double tmp63_1 = B_1_1*w42; | ||
const double tmp79_1 = B_1_1*w40; | ||
const double tmp37_1 = tmp3_0*w35; | ||
const double tmp8_1 = tmp0_0*w32; | ||
const double tmp71_1 = B_0_1*w34; | ||
const double tmp19_1 = B_0_3*w31; | ||
const double tmp15_1 = B_0_3*w34; | ||
const double tmp9_1 = tmp3_0*w34; | ||
const double tmp35_1 = B_1_0*w36; | ||
const double tmp66_1 = B_0_3*w28; | ||
const double tmp28_1 = B_1_0*w42; | ||
const double tmp22_1 = B_1_0*w40; | ||
const double tmp16_1 = B_1_2*w29; | ||
const double tmp6_1 = tmp2_0*w35; | ||
const double tmp55_1 = B_1_3*w40; | ||
const double tmp50_1 = B_1_3*w42; | ||
const double tmp7_1 = tmp1_0*w29; | ||
const double tmp1_1 = tmp1_0*w32; | ||
const double tmp57_1 = B_0_3*w30; | ||
const double tmp18_1 = B_1_1*w32; | ||
const double tmp53_1 = B_1_0*w41; | ||
const double tmp61_1 = B_1_3*w36; | ||
const double tmp27_1 = B_0_3*w38; | ||
const double tmp64_1 = B_0_2*w30; | ||
const double tmp76_1 = B_0_1*w38; | ||
const double tmp39_1 = tmp2_0*w34; | ||
const double tmp62_1 = B_0_1*w31; | ||
const double tmp56_1 = B_0_0*w31; | ||
const double tmp49_1 = B_1_1*w36; | ||
const double tmp2_1 = B_0_2*w31; | ||
const double tmp23_1 = B_0_2*w33; | ||
const double tmp38_1 = B_1_1*w43; | ||
const double tmp74_1 = B_1_2*w41; | ||
const double tmp43_1 = B_1_1*w41; | ||
const double tmp58_1 = B_0_2*w28; | ||
const double tmp67_1 = B_0_0*w33; | ||
const double tmp33_1 = tmp0_0*w39; | ||
const double tmp4_1 = B_0_0*w28; | ||
const double tmp20_1 = B_0_0*w30; | ||
const double tmp13_1 = B_0_2*w38; | ||
const double tmp65_1 = B_1_2*w43; | ||
const double tmp0_1 = tmp0_0*w29; | ||
const double tmp41_1 = tmp3_0*w33; | ||
const double tmp73_1 = B_0_2*w37; | ||
const double tmp69_1 = B_0_0*w38; | ||
const double tmp48_1 = B_1_2*w39; | ||
const double tmp59_1 = B_0_1*w33; | ||
const double tmp17_1 = B_1_3*w41; | ||
const double tmp5_1 = B_0_3*w33; | ||
const double tmp3_1 = B_0_1*w30; | ||
const double tmp21_1 = B_0_1*w28; | ||
const double tmp42_1 = B_1_0*w29; | ||
const double tmp54_1 = B_1_2*w32; | ||
const double tmp60_1 = B_1_0*w39; | ||
const double tmp32_1 = tmp1_0*w36; | ||
const double tmp10_1 = B_0_1*w37; | ||
const double tmp14_1 = B_0_0*w35; | ||
const double tmp29_1 = B_0_1*w35; | ||
const double tmp26_1 = B_1_2*w36; | ||
const double tmp30_1 = B_1_3*w43; | ||
const double tmp70_1 = B_0_2*w35; | ||
const double tmp34_1 = B_1_3*w39; | ||
const double tmp51_1 = B_1_0*w43; | ||
const double tmp31_1 = B_0_2*w34; | ||
const double tmp45_1 = tmp3_0*w28; | ||
const double tmp11_1 = tmp1_0*w39; | ||
const double tmp52_1 = B_1_1*w29; | ||
const double tmp44_1 = B_1_3*w32; | ||
const double tmp25_1 = B_1_1*w39; | ||
const double tmp47_1 = tmp2_0*w33; | ||
const double tmp72_1 = B_1_3*w29; | ||
const double tmp40_1 = tmp2_0*w28; | ||
const double tmp46_1 = B_1_2*w40; | ||
const double tmp36_1 = B_1_2*w42; | ||
const double tmp24_1 = B_0_0*w37; | ||
const double tmp77_1 = B_0_3*w35; | ||
const double tmp68_1 = B_0_3*w37; | ||
const double tmp78_1 = B_0_0*w34; | ||
const double tmp12_1 = tmp0_0*w36; | ||
const double tmp75_1 = B_1_0*w32; | ||
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; | ||
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; | ||
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp45_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | ||
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp6_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; | ||
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; | ||
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; | ||
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; | ||
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; | ||
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | ||
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp40_1 + tmp41_1 + tmp7_1 + tmp8_1; | ||
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp37_1 + tmp39_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; | ||
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp11_1 + tmp12_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; | ||
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | ||
} | ||
} | ||
} else { // constant data | ||
for (index_t k=0; k<numEq; k++) { | ||
for (index_t m=0; m<numComp; m++) { | ||
const double B_0 = B_p[INDEX3(k,0,m, numEq, 2)]; | ||
const double B_1 = B_p[INDEX3(k,1,m, numEq, 2)]; | ||
const double tmp6_1 = B_1*w50; | ||
const double tmp1_1 = B_1*w45; | ||
const double tmp5_1 = B_1*w49; | ||
const double tmp4_1 = B_1*w48; | ||
const double tmp0_1 = B_0*w44; | ||
const double tmp2_1 = B_0*w46; | ||
const double tmp7_1 = B_0*w51; | ||
const double tmp3_1 = B_0*w47; | ||
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp0_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp1_1 + tmp3_1; | ||
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp6_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2_1 + tmp4_1; | ||
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp3_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp4_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp2_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp5_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp1_1 + tmp2_1; | ||
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp0_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp3_1 + tmp4_1; | ||
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp1_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp2_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp4_1; | ||
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp3_1 + tmp6_1; | ||
} | ||
} | ||
} | ||
} | ||
/////////////// | ||
// process C // | ||
/////////////// | ||
if (!C.isEmpty()) { | ||
add_EM_S=true; | ||
const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); | ||
if (C.actsExpanded()) { | ||
for (index_t k=0; k<numEq; k++) { | ||
for (index_t m=0; m<numComp; m++) { | ||
const double C_0_0 = C_p[INDEX4(k,m,0, 0, numEq,numComp,2)]; | ||
const double C_1_0 = C_p[INDEX4(k,m,1, 0, numEq,numComp,2)]; | ||
const double C_0_1 = C_p[INDEX4(k,m,0, 1, numEq,numComp,2)]; | ||
const double C_1_1 = C_p[INDEX4(k,m,1, 1, numEq,numComp,2)]; | ||
const double C_0_2 = C_p[INDEX4(k,m,0, 2, numEq,numComp,2)]; | ||
const double C_1_2 = C_p[INDEX4(k,m,1, 2, numEq,numComp,2)]; | ||
const double C_0_3 = C_p[INDEX4(k,m,0, 3, numEq,numComp,2)]; | ||
const double C_1_3 = C_p[INDEX4(k,m,1, 3, numEq,numComp,2)]; | ||
const double tmp2_0 = C_0_1 + C_0_3; | ||
const double tmp1_0 = C_1_2 + C_1_3; | ||
const double tmp3_0 = C_0_0 + C_0_2; | ||
const double tmp0_0 = C_1_0 + C_1_1; | ||
const double tmp64_1 = C_0_2*w30; | ||
const double tmp14_1 = C_0_2*w28; | ||
const double tmp19_1 = C_0_3*w31; | ||
const double tmp22_1 = C_1_0*w40; | ||
const double tmp37_1 = tmp3_0*w35; | ||
const double tmp29_1 = C_0_1*w35; | ||
const double tmp73_1 = C_0_2*w37; | ||
const double tmp74_1 = C_1_2*w41; | ||
const double tmp52_1 = C_1_3*w39; | ||
const double tmp25_1 = C_1_1*w39; | ||
const double tmp62_1 = C_0_1*w31; | ||
const double tmp79_1 = C_1_1*w40; | ||
const double tmp43_1 = C_1_1*w36; | ||
const double tmp27_1 = C_0_3*w38; | ||
const double tmp28_1 = C_1_0*w42; | ||
const double tmp63_1 = C_1_1*w42; | ||
const double tmp59_1 = C_0_3*w34; | ||
const double tmp72_1 = C_1_3*w29; | ||
const double tmp40_1 = tmp2_0*w35; | ||
const double tmp13_1 = C_0_3*w30; | ||
const double tmp51_1 = C_1_2*w40; | ||
const double tmp54_1 = C_1_2*w42; | ||
const double tmp12_1 = C_0_0*w31; | ||
const double tmp2_1 = tmp1_0*w32; | ||
const double tmp68_1 = C_0_2*w31; | ||
const double tmp75_1 = C_1_0*w32; | ||
const double tmp49_1 = C_1_1*w41; | ||
const double tmp4_1 = C_0_2*w35; | ||
const double tmp66_1 = C_0_3*w28; | ||
const double tmp56_1 = C_0_1*w37; | ||
const double tmp5_1 = C_0_1*w34; | ||
const double tmp38_1 = tmp2_0*w34; | ||
const double tmp76_1 = C_0_1*w38; | ||
const double tmp21_1 = C_0_1*w28; | ||
const double tmp69_1 = C_0_1*w30; | ||
const double tmp53_1 = C_1_0*w36; | ||
const double tmp42_1 = C_1_2*w39; | ||
const double tmp32_1 = tmp1_0*w29; | ||
const double tmp45_1 = C_1_0*w43; | ||
const double tmp33_1 = tmp0_0*w32; | ||
const double tmp35_1 = C_1_0*w41; | ||
const double tmp26_1 = C_1_2*w36; | ||
const double tmp67_1 = C_0_0*w33; | ||
const double tmp31_1 = C_0_2*w34; | ||
const double tmp20_1 = C_0_0*w30; | ||
const double tmp70_1 = C_0_0*w28; | ||
const double tmp8_1 = tmp0_0*w39; | ||
const double tmp30_1 = C_1_3*w43; | ||
const double tmp0_1 = tmp0_0*w29; | ||
const double tmp17_1 = C_1_3*w41; | ||
const double tmp58_1 = C_0_0*w35; | ||
const double tmp9_1 = tmp3_0*w33; | ||
const double tmp61_1 = C_1_3*w36; | ||
const double tmp41_1 = tmp3_0*w34; | ||
const double tmp50_1 = C_1_3*w32; | ||
const double tmp18_1 = C_1_1*w32; | ||
const double tmp6_1 = tmp1_0*w36; | ||
const double tmp3_1 = C_0_0*w38; | ||
const double tmp34_1 = C_1_1*w29; | ||
const double tmp77_1 = C_0_3*w35; | ||
const double tmp65_1 = C_1_2*w43; | ||
const double tmp71_1 = C_0_3*w33; | ||
const double tmp55_1 = C_1_1*w43; | ||
const double tmp46_1 = tmp3_0*w28; | ||
const double tmp24_1 = C_0_0*w37; | ||
const double tmp10_1 = tmp1_0*w39; | ||
const double tmp48_1 = C_1_0*w29; | ||
const double tmp15_1 = C_0_1*w33; | ||
const double tmp36_1 = C_1_2*w32; | ||
const double tmp60_1 = C_1_0*w39; | ||
const double tmp47_1 = tmp2_0*w33; | ||
const double tmp16_1 = C_1_2*w29; | ||
const double tmp1_1 = C_0_3*w37; | ||
const double tmp7_1 = tmp2_0*w28; | ||
const double tmp39_1 = C_1_3*w40; | ||
const double tmp44_1 = C_1_3*w42; | ||
const double tmp57_1 = C_0_2*w38; | ||
const double tmp78_1 = C_0_0*w34; | ||
const double tmp11_1 = tmp0_0*w36; | ||
const double tmp23_1 = C_0_2*w33; | ||
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | ||
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; | ||
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | ||
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp7_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; | ||
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; | ||
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; | ||
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | ||
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp37_1 + tmp38_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; | ||
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; | ||
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; | ||
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp2_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; | ||
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp40_1 + tmp41_1 + tmp6_1 + tmp8_1; | ||
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; | ||
} | ||
} | ||
} else { // constant data | ||
for (index_t k=0; k<numEq; k++) { | ||
for (index_t m=0; m<numComp; m++) { | ||
const double C_0 = C_p[INDEX3(k, m, 0, numEq, numComp)]; | ||
const double C_1 = C_p[INDEX3(k, m, 1, numEq, numComp)]; | ||
const double tmp1_1 = C_1*w45; | ||
const double tmp3_1 = C_0*w51; | ||
const double tmp4_1 = C_0*w44; | ||
const double tmp7_1 = C_0*w46; | ||
const double tmp5_1 = C_1*w49; | ||
const double tmp2_1 = C_1*w48; | ||
const double tmp0_1 = C_0*w47; | ||
const double tmp6_1 = C_1*w50; | ||
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2_1 + tmp3_1; | ||
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp2_1 + tmp4_1; | ||
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp4_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp0_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp1_1 + tmp3_1; | ||
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp5_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp1_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp3_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp3_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp6_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp2_1; | ||
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp4_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp1_1 + tmp4_1; | ||
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp2_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp0_1 + tmp5_1; | ||
} | ||
} | ||
} | ||
} | ||
/////////////// | ||
// process D // | ||
/////////////// | ||
if (!D.isEmpty()) { | ||
add_EM_S=true; | ||
const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); | ||
if (D.actsExpanded()) { | ||
for (index_t k=0; k<numEq; k++) { | ||
for (index_t m=0; m<numComp; m++) { | ||
const double D_0 = D_p[INDEX3(k, m, 0, numEq, numComp)]; | ||
const double D_1 = D_p[INDEX3(k, m, 1, numEq, numComp)]; | ||
const double D_2 = D_p[INDEX3(k, m, 2, numEq, numComp)]; | ||
const double D_3 = D_p[INDEX3(k, m, 3, numEq, numComp)]; | ||
const double tmp4_0 = D_1 + D_3; | ||
const double tmp2_0 = D_0 + D_1 + D_2 + D_3; | ||
const double tmp5_0 = D_0 + D_2; | ||
const double tmp0_0 = D_0 + D_1; | ||
const double tmp6_0 = D_0 + D_3; | ||
const double tmp1_0 = D_2 + D_3; | ||
const double tmp3_0 = D_1 + D_2; | ||
const double tmp16_1 = D_1*w56; | ||
const double tmp14_1 = tmp6_0*w54; | ||
const double tmp8_1 = D_3*w55; | ||
const double tmp2_1 = tmp2_0*w54; | ||
const double tmp12_1 = tmp5_0*w52; | ||
const double tmp4_1 = tmp0_0*w53; | ||
const double tmp3_1 = tmp1_0*w52; | ||
const double tmp13_1 = tmp4_0*w53; | ||
const double tmp10_1 = tmp4_0*w52; | ||
const double tmp1_1 = tmp1_0*w53; | ||
const double tmp7_1 = D_3*w56; | ||
const double tmp0_1 = tmp0_0*w52; | ||
const double tmp11_1 = tmp5_0*w53; | ||
const double tmp9_1 = D_0*w56; | ||
const double tmp5_1 = tmp3_0*w54; | ||
const double tmp18_1 = D_2*w56; | ||
const double tmp17_1 = D_1*w55; | ||
const double tmp6_1 = D_0*w55; | ||
const double tmp15_1 = D_2*w55; | ||
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2_1; | ||
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp3_1 + tmp4_1; | ||
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp5_1 + tmp6_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp5_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2_1; | ||
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp10_1 + tmp11_1; | ||
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp2_1; | ||
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp12_1 + tmp13_1; | ||
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp12_1 + tmp13_1; | ||
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp10_1 + tmp11_1; | ||
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp3_1 + tmp4_1; | ||
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp14_1 + tmp15_1 + tmp16_1; | ||
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp2_1; | ||
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp14_1 + tmp17_1 + tmp18_1; | ||
} | ||
} | ||
} else { // constant data | ||
for (index_t k=0; k<numEq; k++) { | ||
for (index_t m=0; m<numComp; m++) { | ||
const double D_0 = D_p[INDEX2(k, m, numEq)]; | ||
const double tmp2_1 = D_0*w59; | ||
const double tmp1_1 = D_0*w58; | ||
const double tmp0_1 = D_0*w57; | ||
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1; | ||
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp1_1; | ||
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0_1; | ||
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp2_1; | ||
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp2_1; | ||
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp1_1; | ||
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp0_1; | ||
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp1_1; | ||
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp0_1; | ||
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp0_1; | ||
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp0_1; | ||
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1; | ||
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp2_1; | ||
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1; | ||
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp1_1; | ||
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp2_1; | ||
} | ||
} | ||
} | ||
} | ||
/////////////// | ||
// process X // | ||
/////////////// | ||
if (!X.isEmpty()) { | ||
add_EM_F=true; | ||
const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); | ||
if (X.actsExpanded()) { | ||
for (index_t k=0; k<numEq; k++) { | ||
const double X_0_0 = X_p[INDEX3(k, 0, 0, numEq, 2)]; | ||
const double X_1_0 = X_p[INDEX3(k, 1, 0, numEq, 2)]; | ||
const double X_0_1 = X_p[INDEX3(k, 0, 1, numEq, 2)]; | ||
const double X_1_1 = X_p[INDEX3(k, 1, 1, numEq, 2)]; | ||
const double X_0_2 = X_p[INDEX3(k, 0, 2, numEq, 2)]; | ||
const double X_1_2 = X_p[INDEX3(k, 1, 2, numEq, 2)]; | ||
const double X_0_3 = X_p[INDEX3(k, 0, 3, numEq, 2)]; | ||
const double X_1_3 = X_p[INDEX3(k, 1, 3, numEq, 2)]; | ||
const double tmp1_0 = X_1_1 + X_1_3; | ||
const double tmp3_0 = X_0_0 + X_0_1; | ||
const double tmp2_0 = X_1_0 + X_1_2; | ||
const double tmp0_0 = X_0_2 + X_0_3; | ||
const double tmp8_1 = tmp2_0*w66; | ||
const double tmp5_1 = tmp3_0*w64; | ||
const double tmp14_1 = tmp0_0*w64; | ||
const double tmp3_1 = tmp3_0*w60; | ||
const double tmp9_1 = tmp3_0*w63; | ||
const double tmp13_1 = tmp3_0*w65; | ||
const double tmp12_1 = tmp1_0*w66; | ||
const double tmp10_1 = tmp0_0*w60; | ||
const double tmp2_1 = tmp2_0*w61; | ||
const double tmp6_1 = tmp2_0*w62; | ||
const double tmp4_1 = tmp0_0*w65; | ||
const double tmp11_1 = tmp1_0*w67; | ||
const double tmp1_1 = tmp1_0*w62; | ||
const double tmp7_1 = tmp1_0*w61; | ||
const double tmp0_1 = tmp0_0*w63; | ||
const double tmp15_1 = tmp2_0*w67; | ||
EM_F[INDEX2(k,0,numEq)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
EM_F[INDEX2(k,1,numEq)]+=tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1; | ||
EM_F[INDEX2(k,2,numEq)]+=tmp10_1 + tmp11_1 + tmp8_1 + tmp9_1; | ||
EM_F[INDEX2(k,3,numEq)]+=tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | ||
} | ||
} else { // constant data | ||
for (index_t k=0; k<numEq; k++) { | ||
const double X_0 = X_p[INDEX2(k, 0, numEq)]; | ||
const double X_1 = X_p[INDEX2(k, 1, numEq)]; | ||
const double tmp0_1 = X_1*w69; | ||
const double tmp1_1 = X_0*w68; | ||
const double tmp2_1 = X_0*w70; | ||
const double tmp3_1 = X_1*w71; | ||
EM_F[INDEX2(k,0,numEq)]+=tmp0_1 + tmp1_1; | ||
EM_F[INDEX2(k,1,numEq)]+=tmp0_1 + tmp2_1; | ||
EM_F[INDEX2(k,2,numEq)]+=tmp1_1 + tmp3_1; | ||
EM_F[INDEX2(k,3,numEq)]+=tmp2_1 + tmp3_1; | ||
} | ||
} | ||
} | ||
/////////////// | ||
// process Y // | ||
/////////////// | ||
if (!Y.isEmpty()) { | ||
add_EM_F=true; | ||
const double* Y_p=const_cast<escript::Data*>(&Y)->getSampleDataRO(e); | ||
if (Y.actsExpanded()) { | ||
for (index_t k=0; k<numEq; k++) { | ||
const double Y_0 = Y_p[INDEX2(k, 0, numEq)]; | ||
const double Y_1 = Y_p[INDEX2(k, 1, numEq)]; | ||
const double Y_2 = Y_p[INDEX2(k, 2, numEq)]; | ||
const double Y_3 = Y_p[INDEX2(k, 3, numEq)]; | ||
const double tmp0_0 = Y_1 + Y_2; | ||
const double tmp1_0 = Y_0 + Y_3; | ||
const double tmp0_1 = Y_0*w72; | ||
const double tmp1_1 = tmp0_0*w73; | ||
const double tmp2_1 = Y_3*w74; | ||
const double tmp3_1 = Y_1*w72; | ||
const double tmp4_1 = tmp1_0*w73; | ||
const double tmp5_1 = Y_2*w74; | ||
const double tmp6_1 = Y_2*w72; | ||
const double tmp7_1 = Y_1*w74; | ||
const double tmp8_1 = Y_3*w72; | ||
const double tmp9_1 = Y_0*w74; | ||
EM_F[INDEX2(k,0,numEq)]+=tmp0_1 + tmp1_1 + tmp2_1; | ||
EM_F[INDEX2(k,1,numEq)]+=tmp3_1 + tmp4_1 + tmp5_1; | ||
EM_F[INDEX2(k,2,numEq)]+=tmp4_1 + tmp6_1 + tmp7_1; | ||
EM_F[INDEX2(k,3,numEq)]+=tmp1_1 + tmp8_1 + tmp9_1; | ||
} | ||
} else { // constant data | ||
for (index_t k=0; k<numEq; k++) { | ||
const double tmp0_1 = Y_p[k]*w75; | ||
EM_F[INDEX2(k,0,numEq)]+=tmp0_1; | ||
EM_F[INDEX2(k,1,numEq)]+=tmp0_1; | ||
EM_F[INDEX2(k,2,numEq)]+=tmp0_1; | ||
EM_F[INDEX2(k,3,numEq)]+=tmp0_1; | ||
} | ||
} | ||
} | ||
2086 | ||
2087 | // add to matrix (if add_EM_S) and RHS (if add_EM_F) | All local calculation is done on an array called `src`, with |
2088 | const index_t firstNode=m_N0*k1+k0; | dimensions = ext[0] * ext[1]. |
2089 | addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, | Where ext[i]= internal[i]+2*radius. |
firstNode, numEq, numComp); | ||
} // end k0 loop | ||
} // end k1 loop | ||
} // end of colouring | ||
} // end of parallel region | ||
} | ||
2090 | ||
2091 | //protected | Now for MPI there is overlap to deal with. We need to share both the overlapping |
2092 | void Rectangle::assemblePDESystemReduced(Paso_SystemMatrix* mat, | values themselves but also the external region. |
escript::Data& rhs, const escript::Data& A, const escript::Data& B, | ||
const escript::Data& C, const escript::Data& D, | ||
const escript::Data& X, const escript::Data& Y) const | ||
{ | ||
const double h0 = m_l0/m_gNE0; | ||
const double h1 = m_l1/m_gNE1; | ||
dim_t numEq, numComp; | ||
if (!mat) | ||
numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize()); | ||
else { | ||
numEq=mat->logical_row_block_size; | ||
numComp=mat->logical_col_block_size; | ||
} | ||
const double w0 = -.25*h1/h0; | ||
const double w1 = .25; | ||
const double w2 = -.25; | ||
const double w3 = .25*h0/h1; | ||
const double w4 = -.25*h0/h1; | ||
const double w5 = .25*h1/h0; | ||
const double w6 = -.125*h1; | ||
const double w7 = -.125*h0; | ||
const double w8 = .125*h1; | ||
const double w9 = .125*h0; | ||
const double w10 = .0625*h0*h1; | ||
const double w11 = -.5*h1; | ||
const double w12 = -.5*h0; | ||
const double w13 = .5*h1; | ||
const double w14 = .5*h0; | ||
const double w15 = .25*h0*h1; | ||
2093 | ||
2094 | rhs.requireWrite(); | In a hypothetical 1-D case: |
#pragma omp parallel | ||
{ | ||
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring | ||
#pragma omp for | ||
for (index_t k1=k1_0; k1<m_NE1; k1+=2) { | ||
for (index_t k0=0; k0<m_NE0; ++k0) { | ||
bool add_EM_S=false; | ||
bool add_EM_F=false; | ||
vector<double> EM_S(4*4*numEq*numComp, 0); | ||
vector<double> EM_F(4*numEq, 0); | ||
const index_t e = k0 + m_NE0*k1; | ||
/////////////// | ||
// process A // | ||
/////////////// | ||
if (!A.isEmpty()) { | ||
add_EM_S=true; | ||
const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); | ||
for (index_t k=0; k<numEq; k++) { | ||
for (index_t m=0; m<numComp; m++) { | ||
const double A_00 = A_p[INDEX4(k,0,m,0, numEq,2, numComp)]; | ||
const double A_01 = A_p[INDEX4(k,0,m,1, numEq,2, numComp)]; | ||
const double A_10 = A_p[INDEX4(k,1,m,0, numEq,2, numComp)]; | ||
const double A_11 = A_p[INDEX4(k,1,m,1, numEq,2, numComp)]; | ||
const double tmp0_0 = A_01 + A_10; | ||
const double tmp0_1 = A_11*w3; | ||
const double tmp1_1 = A_00*w0; | ||
const double tmp2_1 = A_01*w1; | ||
const double tmp3_1 = A_10*w2; | ||
const double tmp4_1 = tmp0_0*w1; | ||
const double tmp5_1 = A_11*w4; | ||
const double tmp6_1 = A_00*w5; | ||
const double tmp7_1 = tmp0_0*w2; | ||
const double tmp8_1 = A_10*w1; | ||
const double tmp9_1 = A_01*w2; | ||
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp0_1 + tmp4_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp2_1 + tmp3_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp1_1 + tmp5_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp0_1 + tmp6_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp1_1 + tmp4_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp5_1 + tmp6_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp5_1 + tmp6_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp1_1 + tmp4_1 + tmp5_1; | ||
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp0_1 + tmp6_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp1_1 + tmp5_1 + tmp7_1; | ||
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp2_1 + tmp3_1 + tmp5_1 + tmp6_1; | ||
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp0_1 + tmp4_1 + tmp6_1; | ||
} | ||
} | ||
} | ||
/////////////// | ||
// process B // | ||
/////////////// | ||
if (!B.isEmpty()) { | ||
add_EM_S=true; | ||