/[escript]/trunk/ripley/src/DefaultAssembler2D.cpp
ViewVC logotype

Contents of /trunk/ripley/src/DefaultAssembler2D.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5148 - (show annotations)
Mon Sep 15 01:25:23 2014 UTC (4 years, 11 months ago) by caltinay
File size: 127723 byte(s)
Merging ripley diagonal storage + CUDA support into trunk.
Options file version has been incremented due to new options
'cuda' and 'nvccflags'.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <ripley/DefaultAssembler2D.h>
18 #include <ripley/domainhelpers.h>
19
20 using namespace std;
21
22 using escript::AbstractSystemMatrix;
23 using escript::Data;
24
25 namespace ripley {
26
27 void DefaultAssembler2D::collateFunctionSpaceTypes(vector<int>& fsTypes,
28 const DataMap& coefs) const
29 {
30 if (isNotEmpty("A", coefs))
31 fsTypes.push_back(coefs.find("A")->second.getFunctionSpace().getTypeCode());
32 if (isNotEmpty("B", coefs))
33 fsTypes.push_back(coefs.find("B")->second.getFunctionSpace().getTypeCode());
34 if (isNotEmpty("C", coefs))
35 fsTypes.push_back(coefs.find("C")->second.getFunctionSpace().getTypeCode());
36 if (isNotEmpty("D", coefs))
37 fsTypes.push_back(coefs.find("D")->second.getFunctionSpace().getTypeCode());
38 if (isNotEmpty("X", coefs))
39 fsTypes.push_back(coefs.find("X")->second.getFunctionSpace().getTypeCode());
40 if (isNotEmpty("Y", coefs))
41 fsTypes.push_back(coefs.find("Y")->second.getFunctionSpace().getTypeCode());
42 }
43
44 void DefaultAssembler2D::assemblePDESingle(AbstractSystemMatrix* mat,
45 Data& rhs, const DataMap& coefs) const
46 {
47 const Data& A = unpackData("A", coefs);
48 const Data& B = unpackData("B", coefs);
49 const Data& C = unpackData("C", coefs);
50 const Data& D = unpackData("D", coefs);
51 const Data& X = unpackData("X", coefs);
52 const Data& Y = unpackData("Y", coefs);
53 assemblePDESingle(mat, rhs, A, B, C, D, X, Y);
54
55 }
56
57 void DefaultAssembler2D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
58 Data& rhs, const DataMap& coefs) const
59 {
60 const Data& d = unpackData("d", coefs);
61 const Data& y = unpackData("y", coefs);
62 assemblePDEBoundarySingle(mat, rhs, d, y);
63 }
64
65 void DefaultAssembler2D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
66 Data& rhs, const DataMap& coefs) const
67 {
68 const Data& A = unpackData("A", coefs);
69 const Data& B = unpackData("B", coefs);
70 const Data& C = unpackData("C", coefs);
71 const Data& D = unpackData("D", coefs);
72 const Data& X = unpackData("X", coefs);
73 const Data& Y = unpackData("Y", coefs);
74 assemblePDESingleReduced(mat, rhs, A, B, C, D, X, Y);
75 }
76
77 void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
78 AbstractSystemMatrix* mat, Data& rhs,
79 const DataMap& coefs) const
80 {
81 const Data& d = unpackData("d", coefs);
82 const Data& y = unpackData("y", coefs);
83 assemblePDEBoundarySingleReduced(mat, rhs, d, y);
84 }
85
86 void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
87 Data& rhs, const DataMap& coefs) const
88 {
89 const Data& A = unpackData("A", coefs);
90 const Data& B = unpackData("B", coefs);
91 const Data& C = unpackData("C", coefs);
92 const Data& D = unpackData("D", coefs);
93 const Data& X = unpackData("X", coefs);
94 const Data& Y = unpackData("Y", coefs);
95 assemblePDESystem(mat, rhs, A, B, C, D, X, Y);
96 }
97
98 void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
99 Data& rhs, const DataMap& coefs) const
100 {
101 const Data& d = unpackData("d", coefs);
102 const Data& y = unpackData("y", coefs);
103 assemblePDEBoundarySystem(mat, rhs, d, y);
104 }
105
106 void DefaultAssembler2D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
107 Data& rhs, const DataMap& coefs) const
108 {
109 const Data& A = unpackData("A", coefs);
110 const Data& B = unpackData("B", coefs);
111 const Data& C = unpackData("C", coefs);
112 const Data& D = unpackData("D", coefs);
113 const Data& X = unpackData("X", coefs);
114 const Data& Y = unpackData("Y", coefs);
115 assemblePDESystemReduced(mat, rhs, A, B, C, D, X, Y);
116 }
117
118 void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
119 AbstractSystemMatrix* mat, Data& rhs,
120 const DataMap& coefs) const
121 {
122 const Data& d = unpackData("d", coefs);
123 const Data& y = unpackData("y", coefs);
124 assemblePDEBoundarySystemReduced(mat, rhs, d, y);
125 }
126
127 void DefaultAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
128 Data& rhs, const Data& A, const Data& B,
129 const Data& C, const Data& D,
130 const Data& X, const Data& Y) const
131 {
132 dim_t numEq, numComp;
133 if (!mat)
134 numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize());
135 else {
136 numEq=mat->getRowBlockSize();
137 numComp=mat->getColumnBlockSize();
138 }
139 const double SQRT3 = 1.73205080756887719318;
140 const double w1 = 1.0/24;
141 const double w5 = -SQRT3/24 + 1.0/12;
142 const double w2 = -SQRT3/24 - 1.0/12;
143 const double w19 = -m_dx[0]/12;
144 const double w11 = w19*(SQRT3 + 3)/12;
145 const double w14 = w19*(-SQRT3 + 3)/12;
146 const double w16 = w19*(5*SQRT3 + 9)/12;
147 const double w17 = w19*(-5*SQRT3 + 9)/12;
148 const double w27 = w19*(-SQRT3 - 3)/2;
149 const double w28 = w19*(SQRT3 - 3)/2;
150 const double w18 = -m_dx[1]/12;
151 const double w10 = w18*(SQRT3 + 3)/12;
152 const double w15 = w18*(-SQRT3 + 3)/12;
153 const double w12 = w18*(5*SQRT3 + 9)/12;
154 const double w13 = w18*(-5*SQRT3 + 9)/12;
155 const double w25 = w18*(-SQRT3 - 3)/2;
156 const double w26 = w18*(SQRT3 - 3)/2;
157 const double w22 = m_dx[0]*m_dx[1]/144;
158 const double w20 = w22*(SQRT3 + 2);
159 const double w21 = w22*(-SQRT3 + 2);
160 const double w23 = w22*(4*SQRT3 + 7);
161 const double w24 = w22*(-4*SQRT3 + 7);
162 const double w3 = m_dx[0]/(24*m_dx[1]);
163 const double w7 = w3*(SQRT3 + 2);
164 const double w8 = w3*(-SQRT3 + 2);
165 const double w6 = -m_dx[1]/(24*m_dx[0]);
166 const double w0 = w6*(SQRT3 + 2);
167 const double w4 = w6*(-SQRT3 + 2);
168 const int NE0 = m_NE[0];
169 const int NE1 = m_NE[1];
170 rhs.requireWrite();
171
172 #pragma omp parallel
173 {
174 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
175 #pragma omp for
176 for (index_t k1=k1_0; k1 < NE1; k1+=2) {
177 for (index_t k0=0; k0 < NE0; ++k0) {
178 bool addEM_S=false;
179 bool addEM_F=false;
180 vector<double> EM_S(4*4*numEq*numComp, 0);
181 vector<double> EM_F(4*numEq, 0);
182 const index_t e = k0 + NE0*k1;
183 ///////////////
184 // process A //
185 ///////////////
186 if (!A.isEmpty()) {
187 addEM_S = true;
188 const double* A_p = A.getSampleDataRO(e);
189 if (A.actsExpanded()) {
190 for (index_t k=0; k<numEq; k++) {
191 for (index_t m=0; m<numComp; m++) {
192 const double A_00_0 = A_p[INDEX5(k,0,m,0,0,numEq,2,numComp,2)];
193 const double A_01_0 = A_p[INDEX5(k,0,m,1,0,numEq,2,numComp,2)];
194 const double A_10_0 = A_p[INDEX5(k,1,m,0,0,numEq,2,numComp,2)];
195 const double A_11_0 = A_p[INDEX5(k,1,m,1,0,numEq,2,numComp,2)];
196 const double A_00_1 = A_p[INDEX5(k,0,m,0,1,numEq,2,numComp,2)];
197 const double A_01_1 = A_p[INDEX5(k,0,m,1,1,numEq,2,numComp,2)];
198 const double A_10_1 = A_p[INDEX5(k,1,m,0,1,numEq,2,numComp,2)];
199 const double A_11_1 = A_p[INDEX5(k,1,m,1,1,numEq,2,numComp,2)];
200 const double A_00_2 = A_p[INDEX5(k,0,m,0,2,numEq,2,numComp,2)];
201 const double A_01_2 = A_p[INDEX5(k,0,m,1,2,numEq,2,numComp,2)];
202 const double A_10_2 = A_p[INDEX5(k,1,m,0,2,numEq,2,numComp,2)];
203 const double A_11_2 = A_p[INDEX5(k,1,m,1,2,numEq,2,numComp,2)];
204 const double A_00_3 = A_p[INDEX5(k,0,m,0,3,numEq,2,numComp,2)];
205 const double A_01_3 = A_p[INDEX5(k,0,m,1,3,numEq,2,numComp,2)];
206 const double A_10_3 = A_p[INDEX5(k,1,m,0,3,numEq,2,numComp,2)];
207 const double A_11_3 = A_p[INDEX5(k,1,m,1,3,numEq,2,numComp,2)];
208 const double tmp0 = w3*(A_11_0 + A_11_1 + A_11_2 + A_11_3);
209 const double tmp1 = w1*(A_01_0 + A_01_3 - A_10_1 - A_10_2);
210 const double tmp2 = w4*(A_00_2 + A_00_3);
211 const double tmp3 = w0*(A_00_0 + A_00_1);
212 const double tmp4 = w5*(A_01_2 - A_10_3);
213 const double tmp5 = w2*(-A_01_1 + A_10_0);
214 const double tmp6 = w5*(A_01_3 + A_10_0);
215 const double tmp7 = w3*(-A_11_0 - A_11_1 - A_11_2 - A_11_3);
216 const double tmp8 = w6*(A_00_0 + A_00_1 + A_00_2 + A_00_3);
217 const double tmp9 = w1*(A_01_1 + A_01_2 + A_10_1 + A_10_2);
218 const double tmp10 = w2*(-A_01_0 - A_10_3);
219 const double tmp11 = w4*(A_00_0 + A_00_1);
220 const double tmp12 = w0*(A_00_2 + A_00_3);
221 const double tmp13 = w5*(A_01_1 - A_10_0);
222 const double tmp14 = w2*(-A_01_2 + A_10_3);
223 const double tmp15 = w7*(A_11_0 + A_11_2);
224 const double tmp16 = w4*(-A_00_2 - A_00_3);
225 const double tmp17 = w0*(-A_00_0 - A_00_1);
226 const double tmp18 = w5*(A_01_3 + A_10_3);
227 const double tmp19 = w8*(A_11_1 + A_11_3);
228 const double tmp20 = w2*(-A_01_0 - A_10_0);
229 const double tmp21 = w7*(A_11_1 + A_11_3);
230 const double tmp22 = w4*(-A_00_0 - A_00_1);
231 const double tmp23 = w0*(-A_00_2 - A_00_3);
232 const double tmp24 = w5*(A_01_0 + A_10_0);
233 const double tmp25 = w8*(A_11_0 + A_11_2);
234 const double tmp26 = w2*(-A_01_3 - A_10_3);
235 const double tmp27 = w5*(-A_01_1 - A_10_2);
236 const double tmp28 = w1*(-A_01_0 - A_01_3 - A_10_0 - A_10_3);
237 const double tmp29 = w2*(A_01_2 + A_10_1);
238 const double tmp30 = w7*(-A_11_1 - A_11_3);
239 const double tmp31 = w1*(-A_01_1 - A_01_2 + A_10_0 + A_10_3);
240 const double tmp32 = w5*(-A_01_0 + A_10_2);
241 const double tmp33 = w8*(-A_11_0 - A_11_2);
242 const double tmp34 = w6*(-A_00_0 - A_00_1 - A_00_2 - A_00_3);
243 const double tmp35 = w2*(A_01_3 - A_10_1);
244 const double tmp36 = w5*(A_01_0 + A_10_3);
245 const double tmp37 = w2*(-A_01_3 - A_10_0);
246 const double tmp38 = w7*(-A_11_0 - A_11_2);
247 const double tmp39 = w5*(-A_01_3 + A_10_1);
248 const double tmp40 = w8*(-A_11_1 - A_11_3);
249 const double tmp41 = w2*(A_01_0 - A_10_2);
250 const double tmp42 = w5*(A_01_1 - A_10_3);
251 const double tmp43 = w2*(-A_01_2 + A_10_0);
252 const double tmp44 = w5*(A_01_2 - A_10_0);
253 const double tmp45 = w2*(-A_01_1 + A_10_3);
254 const double tmp46 = w5*(-A_01_0 + A_10_1);
255 const double tmp47 = w2*(A_01_3 - A_10_2);
256 const double tmp48 = w5*(-A_01_1 - A_10_1);
257 const double tmp49 = w2*(A_01_2 + A_10_2);
258 const double tmp50 = w5*(-A_01_3 + A_10_2);
259 const double tmp51 = w2*(A_01_0 - A_10_1);
260 const double tmp52 = w5*(-A_01_2 - A_10_1);
261 const double tmp53 = w2*(A_01_1 + A_10_2);
262 const double tmp54 = w5*(-A_01_2 - A_10_2);
263 const double tmp55 = w2*(A_01_1 + A_10_1);
264 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp15 + tmp16 + tmp17 + tmp18 + tmp19 + tmp20 + tmp9;
265 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5;
266 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp31 + tmp34 + tmp38 + tmp39 + tmp40 + tmp41;
267 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp28 + tmp52 + tmp53 + tmp7 + tmp8;
268 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0 + tmp2 + tmp3 + tmp31 + tmp50 + tmp51;
269 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp16 + tmp17 + tmp21 + tmp25 + tmp28 + tmp54 + tmp55;
270 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp10 + tmp6 + tmp7 + tmp8 + tmp9;
271 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp1 + tmp30 + tmp33 + tmp34 + tmp44 + tmp45;
272 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp1 + tmp34 + tmp38 + tmp40 + tmp42 + tmp43;
273 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp36 + tmp37 + tmp7 + tmp8 + tmp9;
274 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp15 + tmp19 + tmp22 + tmp23 + tmp28 + tmp48 + tmp49;
275 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0 + tmp11 + tmp12 + tmp31 + tmp46 + tmp47;
276 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp27 + tmp28 + tmp29 + tmp7 + tmp8;
277 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp30 + tmp31 + tmp32 + tmp33 + tmp34 + tmp35;
278 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0 + tmp1 + tmp11 + tmp12 + tmp13 + tmp14;
279 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp21 + tmp22 + tmp23 + tmp24 + tmp25 + tmp26 + tmp9;
280 }
281 }
282 } else { // constant data
283 for (index_t k=0; k<numEq; k++) {
284 for (index_t m=0; m<numComp; m++) {
285 const double A_00 = A_p[INDEX4(k,0,m,0, numEq,2, numComp)];
286 const double A_01 = A_p[INDEX4(k,0,m,1, numEq,2, numComp)];
287 const double A_10 = A_p[INDEX4(k,1,m,0, numEq,2, numComp)];
288 const double A_11 = A_p[INDEX4(k,1,m,1, numEq,2, numComp)];
289 const double tmp0 = 6*w1*(A_01 - A_10);
290 const double tmp1 = 6*w1*(A_01 + A_10);
291 const double tmp2 = 6*w1*(-A_01 - A_10);
292 const double tmp3 = 6*w1*(-A_01 + A_10);
293 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp1;
294 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp0;
295 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp3;
296 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp2;
297 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp3;
298 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp2;
299 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp1;
300 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp0;
301 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp0;
302 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp1;
303 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp2;
304 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp3;
305 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp2;
306 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp3;
307 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp0;
308 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp1;
309 }
310 }
311 }
312 }
313 ///////////////
314 // process B //
315 ///////////////
316 if (!B.isEmpty()) {
317 addEM_S=true;
318 const double* B_p=B.getSampleDataRO(e);
319 if (B.actsExpanded()) {
320 for (index_t k=0; k<numEq; k++) {
321 for (index_t m=0; m<numComp; m++) {
322 const double B_0_0 = B_p[INDEX4(k,0,m,0, numEq,2,numComp)];
323 const double B_1_0 = B_p[INDEX4(k,1,m,0, numEq,2,numComp)];
324 const double B_0_1 = B_p[INDEX4(k,0,m,1, numEq,2,numComp)];
325 const double B_1_1 = B_p[INDEX4(k,1,m,1, numEq,2,numComp)];
326 const double B_0_2 = B_p[INDEX4(k,0,m,2, numEq,2,numComp)];
327 const double B_1_2 = B_p[INDEX4(k,1,m,2, numEq,2,numComp)];
328 const double B_0_3 = B_p[INDEX4(k,0,m,3, numEq,2,numComp)];
329 const double B_1_3 = B_p[INDEX4(k,1,m,3, numEq,2,numComp)];
330 const double tmp0 = w11*(B_1_0 + B_1_1);
331 const double tmp1 = w14*(B_1_2 + B_1_3);
332 const double tmp2 = w15*(-B_0_1 - B_0_3);
333 const double tmp3 = w10*(-B_0_0 - B_0_2);
334 const double tmp4 = w11*(B_1_2 + B_1_3);
335 const double tmp5 = w14*(B_1_0 + B_1_1);
336 const double tmp6 = w11*(-B_1_2 - B_1_3);
337 const double tmp7 = w14*(-B_1_0 - B_1_1);
338 const double tmp8 = w11*(-B_1_0 - B_1_1);
339 const double tmp9 = w14*(-B_1_2 - B_1_3);
340 const double tmp10 = w10*(-B_0_1 - B_0_3);
341 const double tmp11 = w15*(-B_0_0 - B_0_2);
342 const double tmp12 = w15*(B_0_0 + B_0_2);
343 const double tmp13 = w10*(B_0_1 + B_0_3);
344 const double tmp14 = w10*(B_0_0 + B_0_2);
345 const double tmp15 = w15*(B_0_1 + B_0_3);
346 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=B_0_0*w12 + B_0_1*w10 + B_0_2*w15 + B_0_3*w13 + B_1_0*w16 + B_1_1*w14 + B_1_2*w11 + B_1_3*w17;
347 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=B_0_0*w10 + B_0_1*w12 + B_0_2*w13 + B_0_3*w15 + tmp0 + tmp1;
348 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=B_1_0*w11 + B_1_1*w17 + B_1_2*w16 + B_1_3*w14 + tmp14 + tmp15;
349 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp12 + tmp13 + tmp4 + tmp5;
350 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=-B_0_0*w12 - B_0_1*w10 - B_0_2*w15 - B_0_3*w13 + tmp0 + tmp1;
351 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-B_0_0*w10 - B_0_1*w12 - B_0_2*w13 - B_0_3*w15 + B_1_0*w14 + B_1_1*w16 + B_1_2*w17 + B_1_3*w11;
352 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2 + tmp3 + tmp4 + tmp5;
353 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=B_1_0*w17 + B_1_1*w11 + B_1_2*w14 + B_1_3*w16 + tmp10 + tmp11;
354 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=-B_1_0*w16 - B_1_1*w14 - B_1_2*w11 - B_1_3*w17 + tmp14 + tmp15;
355 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp12 + tmp13 + tmp8 + tmp9;
356 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=B_0_0*w15 + B_0_1*w13 + B_0_2*w12 + B_0_3*w10 - B_1_0*w11 - B_1_1*w17 - B_1_2*w16 - B_1_3*w14;
357 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=B_0_0*w13 + B_0_1*w15 + B_0_2*w10 + B_0_3*w12 + tmp6 + tmp7;
358 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2 + tmp3 + tmp8 + tmp9;
359 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=-B_1_0*w14 - B_1_1*w16 - B_1_2*w17 - B_1_3*w11 + tmp10 + tmp11;
360 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=-B_0_0*w15 - B_0_1*w13 - B_0_2*w12 - B_0_3*w10 + tmp6 + tmp7;
361 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-B_0_0*w13 - B_0_1*w15 - B_0_2*w10 - B_0_3*w12 - B_1_0*w17 - B_1_1*w11 - B_1_2*w14 - B_1_3*w16;
362 }
363 }
364 } else { // constant data
365 for (index_t k=0; k<numEq; k++) {
366 for (index_t m=0; m<numComp; m++) {
367 const double wB0 = B_p[INDEX3(k,0,m,numEq,2)]*w18;
368 const double wB1 = B_p[INDEX3(k,1,m,numEq,2)]*w19;
369 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+= 2*wB0 + 2*wB1;
370 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+= 2*wB0 + wB1;
371 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+= wB0 + 2*wB1;
372 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+= wB0 + wB1;
373 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=-2*wB0 + wB1;
374 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-2*wB0 + 2*wB1;
375 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+= -wB0 + wB1;
376 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+= -wB0 + 2*wB1;
377 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+= wB0 - 2*wB1;
378 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+= wB0 - wB1;
379 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+= 2*wB0 - 2*wB1;
380 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+= 2*wB0 - wB1;
381 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+= -wB0 - wB1;
382 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+= -wB0 - 2*wB1;
383 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=-2*wB0 - wB1;
384 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-2*wB0 - 2*wB1;
385 }
386 }
387 }
388 }
389 ///////////////
390 // process C //
391 ///////////////
392 if (!C.isEmpty()) {
393 addEM_S=true;
394 const double* C_p=C.getSampleDataRO(e);
395 if (C.actsExpanded()) {
396 for (index_t k=0; k<numEq; k++) {
397 for (index_t m=0; m<numComp; m++) {
398 const double C_0_0 = C_p[INDEX4(k,m,0, 0, numEq,numComp,2)];
399 const double C_1_0 = C_p[INDEX4(k,m,1, 0, numEq,numComp,2)];
400 const double C_0_1 = C_p[INDEX4(k,m,0, 1, numEq,numComp,2)];
401 const double C_1_1 = C_p[INDEX4(k,m,1, 1, numEq,numComp,2)];
402 const double C_0_2 = C_p[INDEX4(k,m,0, 2, numEq,numComp,2)];
403 const double C_1_2 = C_p[INDEX4(k,m,1, 2, numEq,numComp,2)];
404 const double C_0_3 = C_p[INDEX4(k,m,0, 3, numEq,numComp,2)];
405 const double C_1_3 = C_p[INDEX4(k,m,1, 3, numEq,numComp,2)];
406 const double tmp0 = w11*(C_1_0 + C_1_1);
407 const double tmp1 = w14*(C_1_2 + C_1_3);
408 const double tmp2 = w15*(C_0_0 + C_0_2);
409 const double tmp3 = w10*(C_0_1 + C_0_3);
410 const double tmp4 = w11*(-C_1_0 - C_1_1);
411 const double tmp5 = w14*(-C_1_2 - C_1_3);
412 const double tmp6 = w11*(-C_1_2 - C_1_3);
413 const double tmp7 = w14*(-C_1_0 - C_1_1);
414 const double tmp8 = w11*(C_1_2 + C_1_3);
415 const double tmp9 = w14*(C_1_0 + C_1_1);
416 const double tmp10 = w10*(-C_0_1 - C_0_3);
417 const double tmp11 = w15*(-C_0_0 - C_0_2);
418 const double tmp12 = w15*(-C_0_1 - C_0_3);
419 const double tmp13 = w10*(-C_0_0 - C_0_2);
420 const double tmp14 = w10*(C_0_0 + C_0_2);
421 const double tmp15 = w15*(C_0_1 + C_0_3);
422 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=C_0_0*w12 + C_0_1*w10 + C_0_2*w15 + C_0_3*w13 + C_1_0*w16 + C_1_1*w14 + C_1_2*w11 + C_1_3*w17;
423 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=-C_0_0*w12 - C_0_1*w10 - C_0_2*w15 - C_0_3*w13 + tmp0 + tmp1;
424 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=-C_1_0*w16 - C_1_1*w14 - C_1_2*w11 - C_1_3*w17 + tmp14 + tmp15;
425 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp12 + tmp13 + tmp4 + tmp5;
426 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=C_0_0*w10 + C_0_1*w12 + C_0_2*w13 + C_0_3*w15 + tmp0 + tmp1;
427 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-C_0_0*w10 - C_0_1*w12 - C_0_2*w13 - C_0_3*w15 + C_1_0*w14 + C_1_1*w16 + C_1_2*w17 + C_1_3*w11;
428 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2 + tmp3 + tmp4 + tmp5;
429 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=-C_1_0*w14 - C_1_1*w16 - C_1_2*w17 - C_1_3*w11 + tmp10 + tmp11;
430 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=C_1_0*w11 + C_1_1*w17 + C_1_2*w16 + C_1_3*w14 + tmp14 + tmp15;
431 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp12 + tmp13 + tmp8 + tmp9;
432 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=C_0_0*w15 + C_0_1*w13 + C_0_2*w12 + C_0_3*w10 - C_1_0*w11 - C_1_1*w17 - C_1_2*w16 - C_1_3*w14;
433 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=-C_0_0*w15 - C_0_1*w13 - C_0_2*w12 - C_0_3*w10 + tmp6 + tmp7;
434 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2 + tmp3 + tmp8 + tmp9;
435 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=C_1_0*w17 + C_1_1*w11 + C_1_2*w14 + C_1_3*w16 + tmp10 + tmp11;
436 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=C_0_0*w13 + C_0_1*w15 + C_0_2*w10 + C_0_3*w12 + tmp6 + tmp7;
437 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-C_0_0*w13 - C_0_1*w15 - C_0_2*w10 - C_0_3*w12 - C_1_0*w17 - C_1_1*w11 - C_1_2*w14 - C_1_3*w16;
438 }
439 }
440 } else { // constant data
441 for (index_t k=0; k<numEq; k++) {
442 for (index_t m=0; m<numComp; m++) {
443 const double wC0 = C_p[INDEX3(k,m,0,numEq,numComp)]*w18;
444 const double wC1 = C_p[INDEX3(k,m,1,numEq,numComp)]*w19;
445 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+= 2*wC0 + 2*wC1;
446 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=-2*wC0 + wC1;
447 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+= wC0 - 2*wC1;
448 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+= -wC0 - wC1;
449 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+= 2*wC0 + wC1;
450 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-2*wC0 + 2*wC1;
451 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+= wC0 - wC1;
452 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+= -wC0 - 2*wC1;
453 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+= wC0 + 2*wC1;
454 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+= -wC0 + wC1;
455 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+= 2*wC0 - 2*wC1;
456 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=-2*wC0 - wC1;
457 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+= wC0 + wC1;
458 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+= -wC0 + 2*wC1;
459 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+= 2*wC0 - wC1;
460 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-2*wC0 - 2*wC1;
461 }
462 }
463 }
464 }
465 ///////////////
466 // process D //
467 ///////////////
468 if (!D.isEmpty()) {
469 addEM_S=true;
470 const double* D_p=D.getSampleDataRO(e);
471 if (D.actsExpanded()) {
472 for (index_t k=0; k<numEq; k++) {
473 for (index_t m=0; m<numComp; m++) {
474 const double D_0 = D_p[INDEX3(k,m,0,numEq,numComp)];
475 const double D_1 = D_p[INDEX3(k,m,1,numEq,numComp)];
476 const double D_2 = D_p[INDEX3(k,m,2,numEq,numComp)];
477 const double D_3 = D_p[INDEX3(k,m,3,numEq,numComp)];
478 const double tmp0 = w21*(D_2 + D_3);
479 const double tmp1 = w20*(D_0 + D_1);
480 const double tmp2 = w22*(D_0 + D_1 + D_2 + D_3);
481 const double tmp3 = w21*(D_0 + D_1);
482 const double tmp4 = w20*(D_2 + D_3);
483 const double tmp5 = w22*(D_1 + D_2);
484 const double tmp6 = w21*(D_0 + D_2);
485 const double tmp7 = w20*(D_1 + D_3);
486 const double tmp8 = w21*(D_1 + D_3);
487 const double tmp9 = w20*(D_0 + D_2);
488 const double tmp10 = w22*(D_0 + D_3);
489 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=D_0*w23 + D_3*w24 + tmp5;
490 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0 + tmp1;
491 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp8 + tmp9;
492 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp2;
493 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0 + tmp1;
494 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=D_1*w23 + D_2*w24 + tmp10;
495 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2;
496 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp6 + tmp7;
497 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp8 + tmp9;
498 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp2;
499 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=D_1*w24 + D_2*w23 + tmp10;
500 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp3 + tmp4;
501 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2;
502 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp6 + tmp7;
503 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp3 + tmp4;
504 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=D_0*w24 + D_3*w23 + tmp5;
505 }
506 }
507 } else { // constant data
508 for (index_t k=0; k<numEq; k++) {
509 for (index_t m=0; m<numComp; m++) {
510 const double D_0 = D_p[INDEX2(k, m, numEq)];
511 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=16*D_0*w22;
512 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=8*D_0*w22;
513 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=8*D_0*w22;
514 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=4*D_0*w22;
515 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=8*D_0*w22;
516 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=16*D_0*w22;
517 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=4*D_0*w22;
518 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=8*D_0*w22;
519 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=8*D_0*w22;
520 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=4*D_0*w22;
521 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=16*D_0*w22;
522 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=8*D_0*w22;
523 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=4*D_0*w22;
524 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=8*D_0*w22;
525 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=8*D_0*w22;
526 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=16*D_0*w22;
527 }
528 }
529 }
530 }
531 ///////////////
532 // process X //
533 ///////////////
534 if (!X.isEmpty()) {
535 addEM_F=true;
536 const double* X_p=X.getSampleDataRO(e);
537 if (X.actsExpanded()) {
538 for (index_t k=0; k<numEq; k++) {
539 const double X_0_0 = X_p[INDEX3(k,0,0,numEq,2)];
540 const double X_1_0 = X_p[INDEX3(k,1,0,numEq,2)];
541 const double X_0_1 = X_p[INDEX3(k,0,1,numEq,2)];
542 const double X_1_1 = X_p[INDEX3(k,1,1,numEq,2)];
543 const double X_0_2 = X_p[INDEX3(k,0,2,numEq,2)];
544 const double X_1_2 = X_p[INDEX3(k,1,2,numEq,2)];
545 const double X_0_3 = X_p[INDEX3(k,0,3,numEq,2)];
546 const double X_1_3 = X_p[INDEX3(k,1,3,numEq,2)];
547 const double tmp0 = 6*w15*(X_0_2 + X_0_3);
548 const double tmp1 = 6*w10*(X_0_0 + X_0_1);
549 const double tmp2 = 6*w11*(X_1_0 + X_1_2);
550 const double tmp3 = 6*w14*(X_1_1 + X_1_3);
551 const double tmp4 = 6*w11*(X_1_1 + X_1_3);
552 const double tmp5 = w25*(X_0_0 + X_0_1);
553 const double tmp6 = w26*(X_0_2 + X_0_3);
554 const double tmp7 = 6*w14*(X_1_0 + X_1_2);
555 const double tmp8 = w27*(X_1_0 + X_1_2);
556 const double tmp9 = w28*(X_1_1 + X_1_3);
557 const double tmp10 = w25*(-X_0_2 - X_0_3);
558 const double tmp11 = w26*(-X_0_0 - X_0_1);
559 const double tmp12 = w27*(X_1_1 + X_1_3);
560 const double tmp13 = w28*(X_1_0 + X_1_2);
561 const double tmp14 = w25*(X_0_2 + X_0_3);
562 const double tmp15 = w26*(X_0_0 + X_0_1);
563 EM_F[INDEX2(k,0,numEq)]+=tmp0 + tmp1 + tmp2 + tmp3;
564 EM_F[INDEX2(k,1,numEq)]+=tmp4 + tmp5 + tmp6 + tmp7;
565 EM_F[INDEX2(k,2,numEq)]+=tmp10 + tmp11 + tmp8 + tmp9;
566 EM_F[INDEX2(k,3,numEq)]+=tmp12 + tmp13 + tmp14 + tmp15;
567 }
568 } else { // constant data
569 for (index_t k=0; k<numEq; k++) {
570 const double wX0 = X_p[INDEX2(k, 0, numEq)]*w18;
571 const double wX1 = X_p[INDEX2(k, 1, numEq)]*w19;
572 EM_F[INDEX2(k,0,numEq)]+= 6*wX0 + 6*wX1;
573 EM_F[INDEX2(k,1,numEq)]+=-6*wX0 + 6*wX1;
574 EM_F[INDEX2(k,2,numEq)]+= 6*wX0 - 6*wX1;
575 EM_F[INDEX2(k,3,numEq)]+=-6*wX0 - 6*wX1;
576 }
577 }
578 }
579 ///////////////
580 // process Y //
581 ///////////////
582 if (!Y.isEmpty()) {
583 addEM_F=true;
584 const double* Y_p=Y.getSampleDataRO(e);
585 if (Y.actsExpanded()) {
586 for (index_t k=0; k<numEq; k++) {
587 const double Y_0 = Y_p[INDEX2(k, 0, numEq)];
588 const double Y_1 = Y_p[INDEX2(k, 1, numEq)];
589 const double Y_2 = Y_p[INDEX2(k, 2, numEq)];
590 const double Y_3 = Y_p[INDEX2(k, 3, numEq)];
591 const double tmp0 = 6*w22*(Y_1 + Y_2);
592 const double tmp1 = 6*w22*(Y_0 + Y_3);
593 EM_F[INDEX2(k,0,numEq)]+=6*Y_0*w20 + 6*Y_3*w21 + tmp0;
594 EM_F[INDEX2(k,1,numEq)]+=6*Y_1*w20 + 6*Y_2*w21 + tmp1;
595 EM_F[INDEX2(k,2,numEq)]+=6*Y_1*w21 + 6*Y_2*w20 + tmp1;
596 EM_F[INDEX2(k,3,numEq)]+=6*Y_0*w21 + 6*Y_3*w20 + tmp0;
597 }
598 } else { // constant data
599 for (index_t k=0; k<numEq; k++) {
600 EM_F[INDEX2(k,0,numEq)]+=36*Y_p[k]*w22;
601 EM_F[INDEX2(k,1,numEq)]+=36*Y_p[k]*w22;
602 EM_F[INDEX2(k,2,numEq)]+=36*Y_p[k]*w22;
603 EM_F[INDEX2(k,3,numEq)]+=36*Y_p[k]*w22;
604 }
605 }
606 }
607
608 // add to matrix (if addEM_S) and RHS (if addEM_F)
609 const index_t firstNode=m_NN[0]*k1+k0;
610 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S,
611 addEM_F, firstNode, numEq, numComp);
612 } // end k0 loop
613 } // end k1 loop
614 } // end of colouring
615 } // end of parallel region
616 }
617
618 void DefaultAssembler2D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
619 Data& rhs, const Data& A, const Data& B,
620 const Data& C, const Data& D,
621 const Data& X, const Data& Y) const
622 {
623 dim_t numEq, numComp;
624 if (!mat)
625 numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize());
626 else {
627 numEq=mat->getRowBlockSize();
628 numComp=mat->getColumnBlockSize();
629 }
630
631 const double w0 = 1./4;
632 const double w1 = m_dx[0]/8;
633 const double w2 = m_dx[1]/8;
634 const double w3 = m_dx[0]*m_dx[1]/16;
635 const double w4 = m_dx[0]/(4*m_dx[1]);
636 const double w5 = m_dx[1]/(4*m_dx[0]);
637 const int NE0 = m_NE[0];
638 const int NE1 = m_NE[1];
639 rhs.requireWrite();
640
641 #pragma omp parallel
642 {
643 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
644 #pragma omp for
645 for (index_t k1=k1_0; k1<NE1; k1+=2) {
646 for (index_t k0=0; k0<NE0; ++k0) {
647 bool addEM_S=false;
648 bool addEM_F=false;
649 vector<double> EM_S(4*4*numEq*numComp, 0);
650 vector<double> EM_F(4*numEq, 0);
651 const index_t e = k0 + NE0*k1;
652 ///////////////
653 // process A //
654 ///////////////
655 if (!A.isEmpty()) {
656 addEM_S=true;
657 const double* A_p=A.getSampleDataRO(e);
658 for (index_t k=0; k<numEq; k++) {
659 for (index_t m=0; m<numComp; m++) {
660 const double Aw00 = A_p[INDEX4(k,0,m,0, numEq,2, numComp)]*w5;
661 const double Aw01 = A_p[INDEX4(k,0,m,1, numEq,2, numComp)]*w0;
662 const double Aw10 = A_p[INDEX4(k,1,m,0, numEq,2, numComp)]*w0;
663 const double Aw11 = A_p[INDEX4(k,1,m,1, numEq,2, numComp)]*w4;
664 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+= Aw00 + Aw01 + Aw10 + Aw11;
665 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=-Aw00 - Aw01 + Aw10 + Aw11;
666 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+= Aw00 + Aw01 - Aw10 - Aw11;
667 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=-Aw00 - Aw01 - Aw10 - Aw11;
668 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=-Aw00 + Aw01 - Aw10 + Aw11;
669 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+= Aw00 - Aw01 - Aw10 + Aw11;
670 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=-Aw00 + Aw01 + Aw10 - Aw11;
671 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+= Aw00 - Aw01 + Aw10 - Aw11;
672 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+= Aw00 - Aw01 + Aw10 - Aw11;
673 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=-Aw00 + Aw01 + Aw10 - Aw11;
674 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+= Aw00 - Aw01 - Aw10 + Aw11;
675 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=-Aw00 + Aw01 - Aw10 + Aw11;
676 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=-Aw00 - Aw01 - Aw10 - Aw11;
677 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+= Aw00 + Aw01 - Aw10 - Aw11;
678 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=-Aw00 - Aw01 + Aw10 + Aw11;
679 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+= Aw00 + Aw01 + Aw10 + Aw11;
680 }
681 }
682 }
683 ///////////////
684 // process B //
685 ///////////////
686 if (!B.isEmpty()) {
687 addEM_S=true;
688 const double* B_p=B.getSampleDataRO(e);
689 for (index_t k=0; k<numEq; k++) {
690 for (index_t m=0; m<numComp; m++) {
691 const double wB0 = B_p[INDEX3(k,0,m, numEq, 2)]*w2;
692 const double wB1 = B_p[INDEX3(k,1,m, numEq, 2)]*w1;
693 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=-wB0 - wB1;
694 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=-wB0 - wB1;
695 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=-wB0 - wB1;
696 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=-wB0 - wB1;
697 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+= wB0 - wB1;
698 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+= wB0 - wB1;
699 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+= wB0 - wB1;
700 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+= wB0 - wB1;
701 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=-wB0 + wB1;
702 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=-wB0 + wB1;
703 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=-wB0 + wB1;
704 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=-wB0 + wB1;
705 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+= wB0 + wB1;
706 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+= wB0 + wB1;
707 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+= wB0 + wB1;
708 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+= wB0 + wB1;
709 }
710 }
711 }
712 ///////////////
713 // process C //
714 ///////////////
715 if (!C.isEmpty()) {
716 addEM_S=true;
717 const double* C_p=C.getSampleDataRO(e);
718 for (index_t k=0; k<numEq; k++) {
719 for (index_t m=0; m<numComp; m++) {
720 const double wC0 = C_p[INDEX3(k, m, 0, numEq, numComp)]*w2;
721 const double wC1 = C_p[INDEX3(k, m, 1, numEq, numComp)]*w1;
722 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=-wC0 - wC1;
723 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=-wC0 - wC1;
724 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=-wC0 - wC1;
725 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=-wC0 - wC1;
726 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+= wC0 - wC1;
727 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+= wC0 - wC1;
728 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+= wC0 - wC1;
729 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+= wC0 - wC1;
730 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=-wC0 + wC1;
731 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=-wC0 + wC1;
732 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=-wC0 + wC1;
733 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=-wC0 + wC1;
734 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+= wC0 + wC1;
735 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+= wC0 + wC1;
736 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+= wC0 + wC1;
737 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+= wC0 + wC1;
738 }
739 }
740 }
741 ///////////////
742 // process D //
743 ///////////////
744 if (!D.isEmpty()) {
745 addEM_S=true;
746 const double* D_p=D.getSampleDataRO(e);
747 for (index_t k=0; k<numEq; k++) {
748 for (index_t m=0; m<numComp; m++) {
749 const double wD0 = D_p[INDEX2(k, m, numEq)]*w3;
750 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=wD0;
751 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=wD0;
752 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=wD0;
753 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=wD0;
754 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=wD0;
755 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=wD0;
756 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=wD0;
757 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=wD0;
758 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=wD0;
759 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=wD0;
760 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=wD0;
761 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=wD0;
762 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=wD0;
763 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=wD0;
764 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=wD0;
765 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=wD0;
766 }
767 }
768 }
769 ///////////////
770 // process X //
771 ///////////////
772 if (!X.isEmpty()) {
773 addEM_F=true;
774 const double* X_p=X.getSampleDataRO(e);
775 for (index_t k=0; k<numEq; k++) {
776 const double wX0 = 4*X_p[INDEX2(k, 0, numEq)]*w2;
777 const double wX1 = 4*X_p[INDEX2(k, 1, numEq)]*w1;
778 EM_F[INDEX2(k,0,numEq)]+=-wX0 - wX1;
779 EM_F[INDEX2(k,1,numEq)]+= wX0 - wX1;
780 EM_F[INDEX2(k,2,numEq)]+=-wX0 + wX1;
781 EM_F[INDEX2(k,3,numEq)]+= wX0 + wX1;
782 }
783 }
784 ///////////////
785 // process Y //
786 ///////////////
787 if (!Y.isEmpty()) {
788 addEM_F=true;
789 const double* Y_p=Y.getSampleDataRO(e);
790 for (index_t k=0; k<numEq; k++) {
791 EM_F[INDEX2(k,0,numEq)]+=4*Y_p[k]*w3;
792 EM_F[INDEX2(k,1,numEq)]+=4*Y_p[k]*w3;
793 EM_F[INDEX2(k,2,numEq)]+=4*Y_p[k]*w3;
794 EM_F[INDEX2(k,3,numEq)]+=4*Y_p[k]*w3;
795 }
796 }
797
798 // add to matrix (if addEM_S) and RHS (if addEM_F)
799 const index_t firstNode=m_NN[0]*k1+k0;
800 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
801 firstNode, numEq, numComp);
802 } // end k0 loop
803 } // end k1 loop
804 } // end of colouring
805 } // end of parallel region
806 }
807
808 void DefaultAssembler2D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
809 Data& rhs, const Data& d, const Data& y) const
810 {
811 const double SQRT3 = 1.73205080756887719318;
812 const double w5 = m_dx[0]/12;
813 const double w6 = w5*(SQRT3 + 2);
814 const double w7 = w5*(-SQRT3 + 2);
815 const double w8 = w5*(SQRT3 + 3);
816 const double w9 = w5*(-SQRT3 + 3);
817 const double w2 = m_dx[1]/12;
818 const double w0 = w2*(SQRT3 + 2);
819 const double w1 = w2*(-SQRT3 + 2);
820 const double w3 = w2*(SQRT3 + 3);
821 const double w4 = w2*(-SQRT3 + 3);
822 const bool addEM_S=!d.isEmpty();
823 const bool addEM_F=!y.isEmpty();
824 const int NE0 = m_NE[0];
825 const int NE1 = m_NE[1];
826 rhs.requireWrite();
827
828 #pragma omp parallel
829 {
830 if (domain->m_faceOffset[0] > -1) {
831 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
832 #pragma omp for
833 for (index_t k1=k1_0; k1<NE1; k1+=2) {
834 vector<double> EM_S(4*4, 0);
835 vector<double> EM_F(4, 0);
836 ///////////////
837 // process d //
838 ///////////////
839 if (addEM_S) {
840 const double* d_p=d.getSampleDataRO(k1);
841 if (d.actsExpanded()) {
842 const double d_0 = d_p[0];
843 const double d_1 = d_p[1];
844 const double tmp0 = w2*(d_0 + d_1);
845 EM_S[INDEX2(0,0,4)]+=d_0*w0 + d_1*w1;
846 EM_S[INDEX2(2,0,4)]+=tmp0;
847 EM_S[INDEX2(0,2,4)]+=tmp0;
848 EM_S[INDEX2(2,2,4)]+=d_0*w1 + d_1*w0;
849 } else { // constant data
850 EM_S[INDEX2(0,0,4)]+=4*d_p[0]*w2;
851 EM_S[INDEX2(2,0,4)]+=2*d_p[0]*w2;
852 EM_S[INDEX2(0,2,4)]+=2*d_p[0]*w2;
853 EM_S[INDEX2(2,2,4)]+=4*d_p[0]*w2;
854 }
855 }
856 ///////////////
857 // process y //
858 ///////////////
859 if (addEM_F) {
860 const double* y_p=y.getSampleDataRO(k1);
861 if (y.actsExpanded()) {
862 EM_F[0]+=w3*y_p[0] + w4*y_p[1];
863 EM_F[2]+=w3*y_p[1] + w4*y_p[0];
864 } else { // constant data
865 EM_F[0]+=6*w2*y_p[0];
866 EM_F[2]+=6*w2*y_p[0];
867 }
868 }
869 const index_t firstNode=m_NN[0]*k1;
870 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F, firstNode);
871 }
872 } // end colouring
873 }
874
875 if (domain->m_faceOffset[1] > -1) {
876 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
877 #pragma omp for
878 for (index_t k1=k1_0; k1<NE1; k1+=2) {
879 vector<double> EM_S(4*4, 0);
880 vector<double> EM_F(4, 0);
881 const index_t e = domain->m_faceOffset[1]+k1;
882 ///////////////
883 // process d //
884 ///////////////
885 if (addEM_S) {
886 const double* d_p=d.getSampleDataRO(e);
887 if (d.actsExpanded()) {
888 const double d_0 = d_p[0];
889 const double d_1 = d_p[1];
890 const double tmp0 = w2*(d_0 + d_1);
891 EM_S[INDEX2(1,1,4)]+=d_0*w0 + d_1*w1;
892 EM_S[INDEX2(3,1,4)]+=tmp0;
893 EM_S[INDEX2(1,3,4)]+=tmp0;
894 EM_S[INDEX2(3,3,4)]+=d_0*w1 + d_1*w0;
895 } else { // constant data
896 EM_S[INDEX2(1,1,4)]+=4*d_p[0]*w2;
897 EM_S[INDEX2(3,1,4)]+=2*d_p[0]*w2;
898 EM_S[INDEX2(1,3,4)]+=2*d_p[0]*w2;
899 EM_S[INDEX2(3,3,4)]+=4*d_p[0]*w2;
900 }
901 }
902 ///////////////
903 // process y //
904 ///////////////
905 if (addEM_F) {
906 const double* y_p=y.getSampleDataRO(e);
907 if (y.actsExpanded()) {
908 EM_F[1]+=w3*y_p[0] + w4*y_p[1];
909 EM_F[3]+=w3*y_p[1] + w4*y_p[0];
910 } else { // constant data
911 EM_F[1]+=6*w2*y_p[0];
912 EM_F[3]+=6*w2*y_p[0];
913 }
914 }
915 const index_t firstNode=m_NN[0]*(k1+1)-2;
916 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F, firstNode);
917 }
918 } // end colouring
919 }
920
921 if (domain->m_faceOffset[2] > -1) {
922 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
923 #pragma omp for
924 for (index_t k0 = k0_0; k0 < NE0; k0+=2) {
925 vector<double> EM_S(4*4, 0);
926 vector<double> EM_F(4, 0);
927 const index_t e = domain->m_faceOffset[2]+k0;
928 ///////////////
929 // process d //
930 ///////////////
931 if (addEM_S) {
932 const double* d_p=d.getSampleDataRO(e);
933 if (d.actsExpanded()) {
934 const double d_0 = d_p[0];
935 const double d_1 = d_p[1];
936 const double tmp0 = w5*(d_0 + d_1);
937 EM_S[INDEX2(0,0,4)]+=d_0*w6 + d_1*w7;
938 EM_S[INDEX2(1,0,4)]+=tmp0;
939 EM_S[INDEX2(0,1,4)]+=tmp0;
940 EM_S[INDEX2(1,1,4)]+=d_0*w7 + d_1*w6;
941 } else { // constant data
942 EM_S[INDEX2(0,0,4)]+=4*d_p[0]*w5;
943 EM_S[INDEX2(1,0,4)]+=2*d_p[0]*w5;
944 EM_S[INDEX2(0,1,4)]+=2*d_p[0]*w5;
945 EM_S[INDEX2(1,1,4)]+=4*d_p[0]*w5;
946 }
947 }
948 ///////////////
949 // process y //
950 ///////////////
951 if (addEM_F) {
952 const double* y_p=y.getSampleDataRO(e);
953 if (y.actsExpanded()) {
954 EM_F[0]+=w8*y_p[0] + w9*y_p[1];
955 EM_F[1]+=w8*y_p[1] + w9*y_p[0];
956 } else { // constant data
957 EM_F[0]+=6*w5*y_p[0];
958 EM_F[1]+=6*w5*y_p[0];
959 }
960 }
961 const index_t firstNode=k0;
962 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F, firstNode);
963 }
964 } // end colouring
965 }
966
967 if (domain->m_faceOffset[3] > -1) {
968 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
969 #pragma omp for
970 for (index_t k0 = k0_0; k0 < NE0; k0+=2) {
971 const index_t e = domain->m_faceOffset[3]+k0;
972 vector<double> EM_S(4*4, 0);
973 vector<double> EM_F(4, 0);
974 ///////////////
975 // process d //
976 ///////////////
977 if (addEM_S) {
978 const double* d_p=d.getSampleDataRO(e);
979 if (d.actsExpanded()) {
980 const double d_0 = d_p[0];
981 const double d_1 = d_p[1];
982 const double tmp0 = w5*(d_0 + d_1);
983 EM_S[INDEX2(2,2,4)]+=d_0*w6 + d_1*w7;
984 EM_S[INDEX2(3,2,4)]+=tmp0;
985 EM_S[INDEX2(2,3,4)]+=tmp0;
986 EM_S[INDEX2(3,3,4)]+=d_0*w7 + d_1*w6;
987 } else { // constant data
988 EM_S[INDEX2(2,2,4)]+=4*d_p[0]*w5;
989 EM_S[INDEX2(3,2,4)]+=2*d_p[0]*w5;
990 EM_S[INDEX2(2,3,4)]+=2*d_p[0]*w5;
991 EM_S[INDEX2(3,3,4)]+=4*d_p[0]*w5;
992 }
993 }
994 ///////////////
995 // process y //
996 ///////////////
997 if (addEM_F) {
998 const double* y_p=y.getSampleDataRO(e);
999 if (y.actsExpanded()) {
1000 EM_F[2]+=w8*y_p[0] + w9*y_p[1];
1001 EM_F[3]+=w8*y_p[1] + w9*y_p[0];
1002 } else { // constant data
1003 EM_F[2]+=6*w5*y_p[0];
1004 EM_F[3]+=6*w5*y_p[0];
1005 }
1006 }
1007 const index_t firstNode=m_NN[0]*(m_NN[1]-2)+k0;
1008 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F, firstNode);
1009 }
1010 } // end colouring
1011 }
1012 } // end of parallel section
1013 }
1014
1015 void DefaultAssembler2D::assemblePDEBoundarySingleReduced(
1016 AbstractSystemMatrix* mat, Data& rhs,
1017 const Data& d, const Data& y) const
1018 {
1019 const double w0 = m_dx[0]/4;
1020 const double w1 = m_dx[1]/4;
1021 const bool addEM_S=!d.isEmpty();
1022 const bool addEM_F=!y.isEmpty();
1023 const int NE0 = m_NE[0];
1024 const int NE1 = m_NE[1];
1025 rhs.requireWrite();
1026
1027 #pragma omp parallel
1028 {
1029 if (domain->m_faceOffset[0] > -1) {
1030 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
1031 #pragma omp for
1032 for (index_t k1=k1_0; k1<NE1; k1+=2) {
1033 vector<double> EM_S(4*4, 0);
1034 vector<double> EM_F(4, 0);
1035 ///////////////
1036 // process d //
1037 ///////////////
1038 if (addEM_S) {
1039 const double* d_p=d.getSampleDataRO(k1);
1040 EM_S[INDEX2(0,0,4)]+=d_p[0]*w1;
1041 EM_S[INDEX2(2,0,4)]+=d_p[0]*w1;
1042 EM_S[INDEX2(0,2,4)]+=d_p[0]*w1;
1043 EM_S[INDEX2(2,2,4)]+=d_p[0]*w1;
1044 }
1045 ///////////////
1046 // process y //
1047 ///////////////
1048 if (addEM_F) {
1049 const double* y_p=y.getSampleDataRO(k1);
1050 EM_F[0]+=2*w1*y_p[0];
1051 EM_F[2]+=2*w1*y_p[0];
1052 }
1053 const index_t firstNode=m_NN[0]*k1;
1054 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F, firstNode);
1055 }
1056 } // end colouring
1057 }
1058
1059 if (domain->m_faceOffset[1] > -1) {
1060 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
1061 #pragma omp for
1062 for (index_t k1=k1_0; k1<NE1; k1+=2) {
1063 vector<double> EM_S(4*4, 0);
1064 vector<double> EM_F(4, 0);
1065 const index_t e = domain->m_faceOffset[1]+k1;
1066 ///////////////
1067 // process d //
1068 ///////////////
1069 if (addEM_S) {
1070 const double* d_p=d.getSampleDataRO(e);
1071 EM_S[INDEX2(1,1,4)]+=d_p[0]*w1;
1072 EM_S[INDEX2(3,1,4)]+=d_p[0]*w1;
1073 EM_S[INDEX2(1,3,4)]+=d_p[0]*w1;
1074 EM_S[INDEX2(3,3,4)]+=d_p[0]*w1;
1075 }
1076 ///////////////
1077 // process y //
1078 ///////////////
1079 if (addEM_F) {
1080 const double* y_p=y.getSampleDataRO(e);
1081 EM_F[1]+=2*w1*y_p[0];
1082 EM_F[3]+=2*w1*y_p[0];
1083 }
1084 const index_t firstNode=m_NN[0]*(k1+1)-2;
1085 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F, firstNode);
1086 }
1087 } // end colouring
1088 }
1089
1090 if (domain->m_faceOffset[2] > -1) {
1091 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
1092 #pragma omp for
1093 for (index_t k0 = k0_0; k0 < NE0; k0+=2) {
1094 vector<double> EM_S(4*4, 0);
1095 vector<double> EM_F(4, 0);
1096 const index_t e = domain->m_faceOffset[2]+k0;
1097 ///////////////
1098 // process d //
1099 ///////////////
1100 if (addEM_S) {
1101 const double* d_p=d.getSampleDataRO(e);
1102 EM_S[INDEX2(0,0,4)]+=d_p[0]*w0;
1103 EM_S[INDEX2(1,0,4)]+=d_p[0]*w0;
1104 EM_S[INDEX2(0,1,4)]+=d_p[0]*w0;
1105 EM_S[INDEX2(1,1,4)]+=d_p[0]*w0;
1106 }
1107 ///////////////
1108 // process y //
1109 ///////////////
1110 if (addEM_F) {
1111 const double* y_p=y.getSampleDataRO(e);
1112 EM_F[0]+=2*w0*y_p[0];
1113 EM_F[1]+=2*w0*y_p[0];
1114 }
1115 const index_t firstNode=k0;
1116 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F, firstNode);
1117 }
1118 } // end colouring
1119 }
1120
1121 if (domain->m_faceOffset[3] > -1) {
1122 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
1123 #pragma omp for
1124 for (index_t k0 = k0_0; k0 < NE0; k0+=2) {
1125 vector<double> EM_S(4*4, 0);
1126 vector<double> EM_F(4, 0);
1127 const index_t e = domain->m_faceOffset[3]+k0;
1128 ///////////////
1129 // process d //
1130 ///////////////
1131 if (addEM_S) {
1132 const double* d_p=d.getSampleDataRO(e);
1133 EM_S[INDEX2(2,2,4)]+=d_p[0]*w0;
1134 EM_S[INDEX2(3,2,4)]+=d_p[0]*w0;
1135 EM_S[INDEX2(2,3,4)]+=d_p[0]*w0;
1136 EM_S[INDEX2(3,3,4)]+=d_p[0]*w0;
1137 }
1138 ///////////////
1139 // process y //
1140 ///////////////
1141 if (addEM_F) {
1142 const double* y_p=y.getSampleDataRO(e);
1143 EM_F[2]+=2*w0*y_p[0];
1144 EM_F[3]+=2*w0*y_p[0];
1145 }
1146 const index_t firstNode=m_NN[0]*(m_NN[1]-2)+k0;
1147 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F, firstNode);
1148 }
1149 } // end colouring
1150 }
1151 } // end of parallel section
1152 }
1153
1154 void DefaultAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
1155 Data& rhs, const Data& d, const Data& y) const
1156 {
1157 dim_t numEq, numComp;
1158 if (!mat) {
1159 numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize());
1160 } else {
1161 numEq=mat->getRowBlockSize();
1162 numComp=mat->getColumnBlockSize();
1163 }
1164 const double SQRT3 = 1.73205080756887719318;
1165 const double w5 = m_dx[0]/12;
1166 const double w6 = w5*(SQRT3 + 2);
1167 const double w7 = w5*(-SQRT3 + 2);
1168 const double w8 = w5*(SQRT3 + 3);
1169 const double w9 = w5*(-SQRT3 + 3);
1170 const double w2 = m_dx[1]/12;
1171 const double w0 = w2*(SQRT3 + 2);
1172 const double w1 = w2*(-SQRT3 + 2);
1173 const double w3 = w2*(SQRT3 + 3);
1174 const double w4 = w2*(-SQRT3 + 3);
1175 const bool addEM_S=!d.isEmpty();
1176 const bool addEM_F=!y.isEmpty();
1177 const int NE0 = m_NE[0];
1178 const int NE1 = m_NE[1];
1179 rhs.requireWrite();
1180
1181 #pragma omp parallel
1182 {
1183 if (domain->m_faceOffset[0] > -1) {
1184 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
1185 #pragma omp for
1186 for (index_t k1=k1_0; k1<NE1; k1+=2) {
1187 vector<double> EM_S(4*4*numEq*numComp, 0);
1188 vector<double> EM_F(4*numEq, 0);
1189 const index_t e = k1;
1190 ///////////////
1191 // process d //
1192 ///////////////
1193 if (addEM_S) {
1194 const double* d_p=d.getSampleDataRO(e);
1195 if (d.actsExpanded()) {
1196 for (index_t k=0; k<numEq; k++) {
1197 for (index_t m=0; m<numComp; m++) {
1198 const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
1199 const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
1200 const double tmp0 = w2*(d_0 + d_1);
1201 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=d_0*w0 + d_1*w1;
1202 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp0;
1203 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp0;
1204 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=d_0*w1 + d_1*w0;
1205 }
1206 }
1207 } else { // constant data
1208 for (index_t k=0; k<numEq; k++) {
1209 for (index_t m=0; m<numComp; m++) {
1210 const double d_0 = d_p[INDEX2(k, m, numEq)];
1211 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=4*d_0*w2;
1212 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=2*d_0*w2;
1213 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=2*d_0*w2;
1214 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=4*d_0*w2;
1215 }
1216 }
1217 }
1218 }
1219 ///////////////
1220 // process y //
1221 ///////////////
1222 if (addEM_F) {
1223 const double* y_p=y.getSampleDataRO(e);
1224 if (y.actsExpanded()) {
1225 for (index_t k=0; k<numEq; k++) {
1226 const double y_0 = y_p[INDEX2(k, 0, numEq)];
1227 const double y_1 = y_p[INDEX2(k, 1, numEq)];
1228 EM_F[INDEX2(k,0,numEq)]+=w3*y_0 + w4*y_1;
1229 EM_F[INDEX2(k,2,numEq)]+=w3*y_1 + w4*y_0;
1230 }
1231 } else { // constant data
1232 for (index_t k=0; k<numEq; k++) {
1233 EM_F[INDEX2(k,0,numEq)]+=6*w2*y_p[k];
1234 EM_F[INDEX2(k,2,numEq)]+=6*w2*y_p[k];
1235 }
1236 }
1237 }
1238 const index_t firstNode=m_NN[0]*k1;
1239 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
1240 firstNode, numEq, numComp);
1241 }
1242 } // end colouring
1243 }
1244
1245 if (domain->m_faceOffset[1] > -1) {
1246 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
1247 #pragma omp for
1248 for (index_t k1=k1_0; k1<NE1; k1+=2) {
1249 vector<double> EM_S(4*4*numEq*numComp, 0);
1250 vector<double> EM_F(4*numEq, 0);
1251 const index_t e = domain->m_faceOffset[1]+k1;
1252 ///////////////
1253 // process d //
1254 ///////////////
1255 if (addEM_S) {
1256 const double* d_p=d.getSampleDataRO(e);
1257 if (d.actsExpanded()) {
1258 for (index_t k=0; k<numEq; k++) {
1259 for (index_t m=0; m<numComp; m++) {
1260 const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
1261 const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
1262 const double tmp0 = w2*(d_0 + d_1);
1263 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=d_0*w0 + d_1*w1;
1264 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp0;
1265 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp0;
1266 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=d_0*w1 + d_1*w0;
1267 }
1268 }
1269 } else { // constant data
1270 for (index_t k=0; k<numEq; k++) {
1271 for (index_t m=0; m<numComp; m++) {
1272 const double d_0 = d_p[INDEX2(k, m, numEq)];
1273 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=4*d_0*w2;
1274 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=2*d_0*w2;
1275 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=2*d_0*w2;
1276 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=4*d_0*w2;
1277 }
1278 }
1279 }
1280 }
1281 ///////////////
1282 // process y //
1283 ///////////////
1284 if (addEM_F) {
1285 const double* y_p=y.getSampleDataRO(e);
1286 if (y.actsExpanded()) {
1287 for (index_t k=0; k<numEq; k++) {
1288 const double y_0 = y_p[INDEX2(k, 0, numEq)];
1289 const double y_1 = y_p[INDEX2(k, 1, numEq)];
1290 EM_F[INDEX2(k,1,numEq)]+=w3*y_0 + w4*y_1;
1291 EM_F[INDEX2(k,3,numEq)]+=w3*y_1 + w4*y_0;
1292 }
1293 } else { // constant data
1294 for (index_t k=0; k<numEq; k++) {
1295 EM_F[INDEX2(k,1,numEq)]+=6*w2*y_p[k];
1296 EM_F[INDEX2(k,3,numEq)]+=6*w2*y_p[k];
1297 }
1298 }
1299 }
1300 const index_t firstNode=m_NN[0]*(k1+1)-2;
1301 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
1302 firstNode, numEq, numComp);
1303 }
1304 } // end colouring
1305 }
1306
1307 if (domain->m_faceOffset[2] > -1) {
1308 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
1309 #pragma omp for
1310 for (index_t k0 = k0_0; k0 < NE0; k0+=2) {
1311 vector<double> EM_S(4*4*numEq*numComp, 0);
1312 vector<double> EM_F(4*numEq, 0);
1313 const index_t e = domain->m_faceOffset[2]+k0;
1314 ///////////////
1315 // process d //
1316 ///////////////
1317 if (addEM_S) {
1318 const double* d_p=d.getSampleDataRO(e);
1319 if (d.actsExpanded()) {
1320 for (index_t k=0; k<numEq; k++) {
1321 for (index_t m=0; m<numComp; m++) {
1322 const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
1323 const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
1324 const double tmp0 = w5*(d_0 + d_1);
1325 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=d_0*w6 + d_1*w7;
1326 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0;
1327 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0;
1328 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=d_0*w7 + d_1*w6;
1329 }
1330 }
1331 } else { // constant data
1332 for (index_t k=0; k<numEq; k++) {
1333 for (index_t m=0; m<numComp; m++) {
1334 const double d_0 = d_p[INDEX2(k, m, numEq)];
1335 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=4*d_0*w5;
1336 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=2*d_0*w5;
1337 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=2*d_0*w5;
1338 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=4*d_0*w5;
1339 }
1340 }
1341 }
1342 }
1343 ///////////////
1344 // process y //
1345 ///////////////
1346 if (addEM_F) {
1347 const double* y_p=y.getSampleDataRO(e);
1348 if (y.actsExpanded()) {
1349 for (index_t k=0; k<numEq; k++) {
1350 const double y_0 = y_p[INDEX2(k, 0, numEq)];
1351 const double y_1 = y_p[INDEX2(k, 1, numEq)];
1352 EM_F[INDEX2(k,0,numEq)]+=w8*y_0 + w9*y_1;
1353 EM_F[INDEX2(k,1,numEq)]+=w8*y_1 + w9*y_0;
1354 }
1355 } else { // constant data
1356 for (index_t k=0; k<numEq; k++) {
1357 EM_F[INDEX2(k,0,numEq)]+=6*w5*y_p[k];
1358 EM_F[INDEX2(k,1,numEq)]+=6*w5*y_p[k];
1359 }
1360 }
1361 }
1362 const index_t firstNode=k0;
1363 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
1364 firstNode, numEq, numComp);
1365 }
1366 } // end colouring
1367 }
1368
1369 if (domain->m_faceOffset[3] > -1) {
1370 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
1371 #pragma omp for
1372 for (index_t k0 = k0_0; k0 < NE0; k0+=2) {
1373 vector<double> EM_S(4*4*numEq*numComp, 0);
1374 vector<double> EM_F(4*numEq, 0);
1375 const index_t e = domain->m_faceOffset[3]+k0;
1376 ///////////////
1377 // process d //
1378 ///////////////
1379 if (addEM_S) {
1380 const double* d_p=d.getSampleDataRO(e);
1381 if (d.actsExpanded()) {
1382 for (index_t k=0; k<numEq; k++) {
1383 for (index_t m=0; m<numComp; m++) {
1384 const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
1385 const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
1386 const double tmp0 = w5*(d_0 + d_1);
1387 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=d_0*w6 + d_1*w7;
1388 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0;
1389 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0;
1390 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=d_0*w7 + d_1*w6;
1391 }
1392 }
1393 } else { // constant data
1394 for (index_t k=0; k<numEq; k++) {
1395 for (index_t m=0; m<numComp; m++) {
1396 const double d_0 = d_p[INDEX2(k, m, numEq)];
1397 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=4*d_0*w5;
1398 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=2*d_0*w5;
1399 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=2*d_0*w5;
1400 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=4*d_0*w5;
1401 }
1402 }
1403 }
1404 }
1405 ///////////////
1406 // process y //
1407 ///////////////
1408 if (addEM_F) {
1409 const double* y_p=y.getSampleDataRO(e);
1410 if (y.actsExpanded()) {
1411 for (index_t k=0; k<numEq; k++) {
1412 const double y_0 = y_p[INDEX2(k, 0, numEq)];
1413 const double y_1 = y_p[INDEX2(k, 1, numEq)];
1414 EM_F[INDEX2(k,2,numEq)]+=w8*y_0 + w9*y_1;
1415 EM_F[INDEX2(k,3,numEq)]+=w8*y_1 + w9*y_0;
1416 }
1417 } else { // constant data
1418 for (index_t k=0; k<numEq; k++) {
1419 EM_F[INDEX2(k,2,numEq)]+=6*w5*y_p[k];
1420 EM_F[INDEX2(k,3,numEq)]+=6*w5*y_p[k];
1421 }
1422 }
1423 }
1424 const index_t firstNode=m_NN[0]*(m_NN[1]-2)+k0;
1425 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
1426 firstNode, numEq, numComp);
1427 }
1428 } // end colouring
1429 }
1430 } // end of parallel section
1431 }
1432
1433 //protected
1434 void DefaultAssembler2D::assemblePDEBoundarySystemReduced(
1435 AbstractSystemMatrix* mat, Data& rhs,
1436 const Data& d, const Data& y) const
1437 {
1438 dim_t numEq, numComp;
1439 if (!mat)
1440 numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize());
1441 else {
1442 numEq=mat->getRowBlockSize();
1443 numComp=mat->getColumnBlockSize();
1444 }
1445 const double w0 = m_dx[0]/4;
1446 const double w1 = m_dx[1]/4;
1447 const bool addEM_S=!d.isEmpty();
1448 const bool addEM_F=!y.isEmpty();
1449 const int NE0 = m_NE[0];
1450 const int NE1 = m_NE[1];
1451 rhs.requireWrite();
1452
1453 #pragma omp parallel
1454 {
1455 if (domain->m_faceOffset[0] > -1) {
1456 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
1457 #pragma omp for
1458 for (index_t k1=k1_0; k1<NE1; k1+=2) {
1459 vector<double> EM_S(4*4*numEq*numComp, 0);
1460 vector<double> EM_F(4*numEq, 0);
1461 ///////////////
1462 // process d //
1463 ///////////////
1464 if (addEM_S) {
1465 const double* d_p=d.getSampleDataRO(k1);
1466 for (index_t k=0; k<numEq; k++) {
1467 for (index_t m=0; m<numComp; m++) {
1468 const double tmp0 = d_p[INDEX2(k, m, numEq)]*w1;
1469 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp0;
1470 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp0;
1471 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp0;
1472 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp0;
1473 }
1474 }
1475 }
1476 ///////////////
1477 // process y //
1478 ///////////////
1479 if (addEM_F) {
1480 const double* y_p=y.getSampleDataRO(k1);
1481 for (index_t k=0; k<numEq; k++) {
1482 EM_F[INDEX2(k,0,numEq)]+=2*w1*y_p[k];
1483 EM_F[INDEX2(k,2,numEq)]+=2*w1*y_p[k];
1484 }
1485 }
1486 const index_t firstNode=m_NN[0]*k1;
1487 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
1488 firstNode, numEq, numComp);
1489 }
1490 } // end colouring
1491 }
1492
1493 if (domain->m_faceOffset[1] > -1) {
1494 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
1495 #pragma omp for
1496 for (index_t k1=k1_0; k1<NE1; k1+=2) {
1497 vector<double> EM_S(4*4*numEq*numComp, 0);
1498 vector<double> EM_F(4*numEq, 0);
1499 const index_t e = domain->m_faceOffset[1]+k1;
1500 ///////////////
1501 // process d //
1502 ///////////////
1503 if (addEM_S) {
1504 const double* d_p=d.getSampleDataRO(e);
1505 for (index_t k=0; k<numEq; k++) {
1506 for (index_t m=0; m<numComp; m++) {
1507 const double tmp0 = d_p[INDEX2(k, m, numEq)]*w1;
1508 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp0;
1509 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp0;
1510 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp0;
1511 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp0;
1512 }
1513 }
1514 }
1515 ///////////////
1516 // process y //
1517 ///////////////
1518 if (addEM_F) {
1519 const double* y_p=y.getSampleDataRO(e);
1520 for (index_t k=0; k<numEq; k++) {
1521 EM_F[INDEX2(k,1,numEq)]+=2*w1*y_p[k];
1522 EM_F[INDEX2(k,3,numEq)]+=2*w1*y_p[k];
1523 }
1524 }
1525 const index_t firstNode=m_NN[0]*(k1+1)-2;
1526 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
1527 firstNode, numEq, numComp);
1528 }
1529 } // end colouring
1530 }
1531
1532 if (domain->m_faceOffset[2] > -1) {
1533 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
1534 #pragma omp for
1535 for (index_t k0 = k0_0; k0 < NE0; k0+=2) {
1536 vector<double> EM_S(4*4*numEq*numComp, 0);
1537 vector<double> EM_F(4*numEq, 0);
1538 const index_t e = domain->m_faceOffset[2]+k0;
1539 ///////////////
1540 // process d //
1541 ///////////////
1542 if (addEM_S) {
1543 const double* d_p=d.getSampleDataRO(e);
1544 for (index_t k=0; k<numEq; k++) {
1545 for (index_t m=0; m<numComp; m++) {
1546 const double tmp0 = d_p[INDEX2(k, m, numEq)]*w0;
1547 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp0;
1548 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0;
1549 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0;
1550 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp0;
1551 }
1552 }
1553 }
1554 ///////////////
1555 // process y //
1556 ///////////////
1557 if (addEM_F) {
1558 const double* y_p=y.getSampleDataRO(e);
1559 for (index_t k=0; k<numEq; k++) {
1560 EM_F[INDEX2(k,0,numEq)]+=2*w0*y_p[k];
1561 EM_F[INDEX2(k,1,numEq)]+=2*w0*y_p[k];
1562 }
1563 }
1564 const index_t firstNode=k0;
1565 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
1566 firstNode, numEq, numComp);
1567 }
1568 } // end colouring
1569 }
1570
1571 if (domain->m_faceOffset[3] > -1) {
1572 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
1573 #pragma omp for
1574 for (index_t k0 = k0_0; k0 < NE0; k0+=2) {
1575 vector<double> EM_S(4*4*numEq*numComp, 0);
1576 vector<double> EM_F(4*numEq, 0);
1577 const index_t e = domain->m_faceOffset[3]+k0;
1578 ///////////////
1579 // process d //
1580 ///////////////
1581 if (addEM_S) {
1582 const double* d_p=d.getSampleDataRO(e);
1583 for (index_t k=0; k<numEq; k++) {
1584 for (index_t m=0; m<numComp; m++) {
1585 const double tmp0 = d_p[INDEX2(k, m, numEq)]*w0;
1586 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp0;
1587 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0;
1588 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0;
1589 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp0;
1590 }
1591 }
1592 }
1593 ///////////////
1594 // process y //
1595 ///////////////
1596 if (addEM_F) {
1597 const double* y_p=y.getSampleDataRO(e);
1598 for (index_t k=0; k<numEq; k++) {
1599 EM_F[INDEX2(k,2,numEq)]+=2*w0*y_p[k];
1600 EM_F[INDEX2(k,3,numEq)]+=2*w0*y_p[k];
1601 }
1602 }
1603 const index_t firstNode=m_NN[0]*(m_NN[1]-2)+k0;
1604 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
1605 firstNode, numEq, numComp);
1606 }
1607 } // end colouring
1608 }
1609 } // end of parallel section
1610 }
1611
1612 //protected
1613 void DefaultAssembler2D::assemblePDESingle(AbstractSystemMatrix* mat,
1614 Data& rhs, const Data& A, const Data& B,
1615 const Data& C, const Data& D,
1616 const Data& X, const Data& Y) const
1617 {
1618 const double SQRT3 = 1.73205080756887719318;
1619 const double w1 = 1.0/24.0;
1620 const double w5 = -SQRT3/24 + 1.0/12;
1621 const double w2 = -SQRT3/24 - 1.0/12;
1622 const double w19 = -m_dx[0]/12;
1623 const double w11 = w19*(SQRT3 + 3)/12;
1624 const double w14 = w19*(-SQRT3 + 3)/12;
1625 const double w16 = w19*(5*SQRT3 + 9)/12;
1626 const double w17 = w19*(-5*SQRT3 + 9)/12;
1627 const double w27 = w19*(-SQRT3 - 3)/2;
1628 const double w28 = w19*(SQRT3 - 3)/2;
1629 const double w18 = -m_dx[1]/12;
1630 const double w12 = w18*(5*SQRT3 + 9)/12;
1631 const double w13 = w18*(-5*SQRT3 + 9)/12;
1632 const double w10 = w18*(SQRT3 + 3)/12;
1633 const double w15 = w18*(-SQRT3 + 3)/12;
1634 const double w25 = w18*(-SQRT3 - 3)/2;
1635 const double w26 = w18*(SQRT3 - 3)/2;
1636 const double w22 = m_dx[0]*m_dx[1]/144;
1637 const double w20 = w22*(SQRT3 + 2);
1638 const double w21 = w22*(-SQRT3 + 2);
1639 const double w23 = w22*(4*SQRT3 + 7);
1640 const double w24 = w22*(-4*SQRT3 + 7);
1641 const double w3 = m_dx[0]/(24*m_dx[1]);
1642 const double w7 = w3*(SQRT3 + 2);
1643 const double w8 = w3*(-SQRT3 + 2);
1644 const double w6 = -m_dx[1]/(24*m_dx[0]);
1645 const double w0 = w6*(SQRT3 + 2);
1646 const double w4 = w6*(-SQRT3 + 2);
1647 const int NE0 = m_NE[0];
1648 const int NE1 = m_NE[1];
1649 rhs.requireWrite();
1650
1651 #pragma omp parallel
1652 {
1653 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
1654 #pragma omp for
1655 for (index_t k1=k1_0; k1<NE1; k1+=2) {
1656 for (index_t k0=0; k0<NE0; ++k0) {
1657 bool addEM_S=false;
1658 bool addEM_F=false;
1659 vector<double> EM_S(4*4, 0);
1660 vector<double> EM_F(4, 0);
1661 const index_t e = k0 + NE0*k1;
1662 ///////////////
1663 // process A //
1664 ///////////////
1665 if (!A.isEmpty()) {
1666 addEM_S = true;
1667 const double* A_p = A.getSampleDataRO(e);
1668 if (A.actsExpanded()) {
1669 const double A_00_0 = A_p[INDEX3(0,0,0,2,2)];
1670 const double A_01_0 = A_p[INDEX3(0,1,0,2,2)];
1671 const double A_10_0 = A_p[INDEX3(1,0,0,2,2)];
1672 const double A_11_0 = A_p[INDEX3(1,1,0,2,2)];
1673 const double A_00_1 = A_p[INDEX3(0,0,1,2,2)];
1674 const double A_01_1 = A_p[INDEX3(0,1,1,2,2)];
1675 const double A_10_1 = A_p[INDEX3(1,0,1,2,2)];
1676 const double A_11_1 = A_p[INDEX3(1,1,1,2,2)];
1677 const double A_00_2 = A_p[INDEX3(0,0,2,2,2)];
1678 const double A_01_2 = A_p[INDEX3(0,1,2,2,2)];
1679 const double A_10_2 = A_p[INDEX3(1,0,2,2,2)];
1680 const double A_11_2 = A_p[INDEX3(1,1,2,2,2)];
1681 const double A_00_3 = A_p[INDEX3(0,0,3,2,2)];
1682 const double A_01_3 = A_p[INDEX3(0,1,3,2,2)];
1683 const double A_10_3 = A_p[INDEX3(1,0,3,2,2)];
1684 const double A_11_3 = A_p[INDEX3(1,1,3,2,2)];
1685 const double tmp0 = w3*(A_11_0 + A_11_1 + A_11_2 + A_11_3);
1686 const double tmp1 = w1*(A_01_0 + A_01_3 - A_10_1 - A_10_2);
1687 const double tmp2 = w4*(A_00_2 + A_00_3);
1688 const double tmp3 = w0*(A_00_0 + A_00_1);
1689 const double tmp4 = w5*(A_01_2 - A_10_3);
1690 const double tmp5 = w2*(-A_01_1 + A_10_0);
1691 const double tmp6 = w5*(A_01_3 + A_10_0);
1692 const double tmp7 = w3*(-A_11_0 - A_11_1 - A_11_2 - A_11_3);
1693 const double tmp8 = w6*(A_00_0 + A_00_1 + A_00_2 + A_00_3);
1694 const double tmp9 = w1*(A_01_1 + A_01_2 + A_10_1 + A_10_2);
1695 const double tmp10 = w2*(-A_01_0 - A_10_3);
1696 const double tmp11 = w4*(A_00_0 + A_00_1);
1697 const double tmp12 = w0*(A_00_2 + A_00_3);
1698 const double tmp13 = w5*(A_01_1 - A_10_0);
1699 const double tmp14 = w2*(-A_01_2 + A_10_3);
1700 const double tmp15 = w7*(A_11_0 + A_11_2);
1701 const double tmp16 = w4*(-A_00_2 - A_00_3);
1702 const double tmp17 = w0*(-A_00_0 - A_00_1);
1703 const double tmp18 = w5*(A_01_3 + A_10_3);
1704 const double tmp19 = w8*(A_11_1 + A_11_3);
1705 const double tmp20 = w2*(-A_01_0 - A_10_0);
1706 const double tmp21 = w7*(A_11_1 + A_11_3);
1707 const double tmp22 = w4*(-A_00_0 - A_00_1);
1708 const double tmp23 = w0*(-A_00_2 - A_00_3);
1709 const double tmp24 = w5*(A_01_0 + A_10_0);
1710 const double tmp25 = w8*(A_11_0 + A_11_2);
1711 const double tmp26 = w2*(-A_01_3 - A_10_3);
1712 const double tmp27 = w5*(-A_01_1 - A_10_2);
1713 const double tmp28 = w1*(-A_01_0 - A_01_3 - A_10_0 - A_10_3);
1714 const double tmp29 = w2*(A_01_2 + A_10_1);
1715 const double tmp30 = w7*(-A_11_1 - A_11_3);
1716 const double tmp31 = w1*(-A_01_1 - A_01_2 + A_10_0 + A_10_3);
1717 const double tmp32 = w5*(-A_01_0 + A_10_2);
1718 const double tmp33 = w8*(-A_11_0 - A_11_2);
1719 const double tmp34 = w6*(-A_00_0 - A_00_1 - A_00_2 - A_00_3);
1720 const double tmp35 = w2*(A_01_3 - A_10_1);
1721 const double tmp36 = w5*(A_01_0 + A_10_3);
1722 const double tmp37 = w2*(-A_01_3 - A_10_0);
1723 const double tmp38 = w7*(-A_11_0 - A_11_2);
1724 const double tmp39 = w5*(-A_01_3 + A_10_1);
1725 const double tmp40 = w8*(-A_11_1 - A_11_3);
1726 const double tmp41 = w2*(A_01_0 - A_10_2);
1727 const double tmp42 = w5*(A_01_1 - A_10_3);
1728 const double tmp43 = w2*(-A_01_2 + A_10_0);
1729 const double tmp44 = w5*(A_01_2 - A_10_0);
1730 const double tmp45 = w2*(-A_01_1 + A_10_3);
1731 const double tmp46 = w5*(-A_01_0 + A_10_1);
1732 const double tmp47 = w2*(A_01_3 - A_10_2);
1733 const double tmp48 = w5*(-A_01_1 - A_10_1);
1734 const double tmp49 = w2*(A_01_2 + A_10_2);
1735 const double tmp50 = w5*(-A_01_3 + A_10_2);
1736 const double tmp51 = w2*(A_01_0 - A_10_1);
1737 const double tmp52 = w5*(-A_01_2 - A_10_1);
1738 const double tmp53 = w2*(A_01_1 + A_10_2);
1739 const double tmp54 = w5*(-A_01_2 - A_10_2);
1740 const double tmp55 = w2*(A_01_1 + A_10_1);
1741 EM_S[INDEX2(0,0,4)]+=tmp15 + tmp16 + tmp17 + tmp18 + tmp19 + tmp20 + tmp9;
1742 EM_S[INDEX2(0,1,4)]+=tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5;
1743 EM_S[INDEX2(0,2,4)]+=tmp31 + tmp34 + tmp38 + tmp39 + tmp40 + tmp41;
1744 EM_S[INDEX2(0,3,4)]+=tmp28 + tmp52 + tmp53 + tmp7 + tmp8;
1745 EM_S[INDEX2(1,0,4)]+=tmp0 + tmp2 + tmp3 + tmp31 + tmp50 + tmp51;
1746 EM_S[INDEX2(1,1,4)]+=tmp16 + tmp17 + tmp21 + tmp25 + tmp28 + tmp54 + tmp55;
1747 EM_S[INDEX2(1,2,4)]+=tmp10 + tmp6 + tmp7 + tmp8 + tmp9;
1748 EM_S[INDEX2(1,3,4)]+=tmp1 + tmp30 + tmp33 + tmp34 + tmp44 + tmp45;
1749 EM_S[INDEX2(2,0,4)]+=tmp1 + tmp34 + tmp38 + tmp40 + tmp42 + tmp43;
1750 EM_S[INDEX2(2,1,4)]+=tmp36 + tmp37 + tmp7 + tmp8 + tmp9;
1751 EM_S[INDEX2(2,2,4)]+=tmp15 + tmp19 + tmp22 + tmp23 + tmp28 + tmp48 + tmp49;
1752 EM_S[INDEX2(2,3,4)]+=tmp0 + tmp11 + tmp12 + tmp31 + tmp46 + tmp47;
1753 EM_S[INDEX2(3,0,4)]+=tmp27 + tmp28 + tmp29 + tmp7 + tmp8;
1754 EM_S[INDEX2(3,1,4)]+=tmp30 + tmp31 + tmp32 + tmp33 + tmp34 + tmp35;
1755 EM_S[INDEX2(3,2,4)]+=tmp0 + tmp1 + tmp11 + tmp12 + tmp13 + tmp14;
1756 EM_S[INDEX2(3,3,4)]+=tmp21 + tmp22 + tmp23 + tmp24 + tmp25 + tmp26 + tmp9;
1757 } else { // constant data
1758 const double A_00 = A_p[INDEX2(0,0,2)];
1759 const double A_01 = A_p[INDEX2(0,1,2)];
1760 const double A_10 = A_p[INDEX2(1,0,2)];
1761 const double A_11 = A_p[INDEX2(1,1,2)];
1762 const double tmp0 = 6*w1*(A_01 - A_10);
1763 const double tmp1 = 6*w1*(A_01 + A_10);
1764 const double tmp2 = 6*w1*(-A_01 - A_10);
1765 const double tmp3 = 6*w1*(-A_01 + A_10);
1766 EM_S[INDEX2(0,0,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp1;
1767 EM_S[INDEX2(0,1,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp0;
1768 EM_S[INDEX2(0,2,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp3;
1769 EM_S[INDEX2(0,3,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp2;
1770 EM_S[INDEX2(1,0,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp3;
1771 EM_S[INDEX2(1,1,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp2;
1772 EM_S[INDEX2(1,2,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp1;
1773 EM_S[INDEX2(1,3,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp0;
1774 EM_S[INDEX2(2,0,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp0;
1775 EM_S[INDEX2(2,1,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp1;
1776 EM_S[INDEX2(2,2,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp2;
1777 EM_S[INDEX2(2,3,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp3;
1778 EM_S[INDEX2(3,0,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp2;
1779 EM_S[INDEX2(3,1,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp3;
1780 EM_S[INDEX2(3,2,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp0;
1781 EM_S[INDEX2(3,3,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp1;
1782 }
1783 }
1784 ///////////////
1785 // process B //
1786 ///////////////
1787 if (!B.isEmpty()) {
1788 addEM_S=true;
1789 const double* B_p=B.getSampleDataRO(e);
1790 if (B.actsExpanded()) {
1791 const double B_0_0 = B_p[INDEX2(0,0,2)];
1792 const double B_1_0 = B_p[INDEX2(1,0,2)];
1793 const double B_0_1 = B_p[INDEX2(0,1,2)];
1794 const double B_1_1 = B_p[INDEX2(1,1,2)];
1795 const double B_0_2 = B_p[INDEX2(0,2,2)];
1796 const double B_1_2 = B_p[INDEX2(1,2,2)];
1797 const double B_0_3 = B_p[INDEX2(0,3,2)];
1798 const double B_1_3 = B_p[INDEX2(1,3,2)];
1799 const double tmp0 = w11*(B_1_0 + B_1_1);
1800 const double tmp1 = w14*(B_1_2 + B_1_3);
1801 const double tmp2 = w15*(-B_0_1 - B_0_3);
1802