/[escript]/trunk/ripley/src/DefaultAssembler2D.cpp
ViewVC logotype

Contents of /trunk/ripley/src/DefaultAssembler2D.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4657 - (show annotations)
Thu Feb 6 06:12:20 2014 UTC (5 years, 7 months ago) by jfenwick
File size: 125807 byte(s)
I changed some files.
Updated copyright notices, added GeoComp.



1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16 #include <ripley/DefaultAssembler2D.h>
17
18 using namespace std;
19
20 namespace ripley {
21
22
23 void DefaultAssembler2D::assemblePDESingle(Paso_SystemMatrix* mat,
24 escript::Data& rhs, map<string, escript::Data> coefs) const
25 {
26 escript::Data A = unpackData("A", coefs), B = unpackData("B", coefs),
27 C = unpackData("C", coefs), D = unpackData("D", coefs),
28 X = unpackData("X", coefs), Y = unpackData("Y", coefs);
29 assemblePDESingle(mat, rhs, A, B, C, D, X, Y);
30
31 }
32
33 void DefaultAssembler2D::assemblePDEBoundarySingle(Paso_SystemMatrix* mat,
34 escript::Data& rhs, map<string, escript::Data> coefs) const
35 {
36 escript::Data d = unpackData("d", coefs), y = unpackData("y", coefs);
37 assemblePDEBoundarySingle(mat, rhs, d, y);
38 }
39
40 void DefaultAssembler2D::assemblePDESingleReduced(Paso_SystemMatrix* mat,
41 escript::Data& rhs, map<string, escript::Data> coefs) const
42 {
43 escript::Data A = unpackData("A", coefs), B = unpackData("B", coefs),
44 C = unpackData("C", coefs), D = unpackData("D", coefs),
45 X = unpackData("X", coefs), Y = unpackData("Y", coefs);
46 assemblePDESingleReduced(mat, rhs, A, B, C, D, X, Y);
47 }
48
49 void DefaultAssembler2D::assemblePDEBoundarySingleReduced(Paso_SystemMatrix* mat,
50 escript::Data& rhs, map<string, escript::Data> coefs) const
51 {
52 escript::Data d = unpackData("d", coefs), y = unpackData("y", coefs);
53 assemblePDEBoundarySingleReduced(mat, rhs, d, y);
54 }
55
56 void DefaultAssembler2D::assemblePDESystem(Paso_SystemMatrix* mat,
57 escript::Data& rhs, map<string, escript::Data> coefs) const
58 {
59 escript::Data A = unpackData("A", coefs), B = unpackData("B", coefs),
60 C = unpackData("C", coefs), D = unpackData("D", coefs),
61 X = unpackData("X", coefs), Y = unpackData("Y", coefs);
62 assemblePDESystem(mat, rhs, A, B, C, D, X, Y);
63 }
64
65 void DefaultAssembler2D::assemblePDEBoundarySystem(Paso_SystemMatrix* mat,
66 escript::Data& rhs, map<string, escript::Data> coefs) const
67 {
68 escript::Data d = unpackData("d", coefs), y = unpackData("y", coefs);
69 assemblePDEBoundarySystem(mat, rhs, d, y);
70 }
71
72 void DefaultAssembler2D::assemblePDESystemReduced(Paso_SystemMatrix* mat,
73 escript::Data& rhs, map<string, escript::Data> coefs) const
74 {
75 escript::Data A = unpackData("A", coefs), B = unpackData("B", coefs),
76 C = unpackData("C", coefs), D = unpackData("D", coefs),
77 X = unpackData("X", coefs), Y = unpackData("Y", coefs);
78 assemblePDESystemReduced(mat, rhs, A, B, C, D, X, Y);
79 }
80
81 void DefaultAssembler2D::assemblePDEBoundarySystemReduced(Paso_SystemMatrix* mat,
82 escript::Data& rhs, map<string, escript::Data> coefs) const
83 {
84 escript::Data d = unpackData("d", coefs), y = unpackData("y", coefs);
85 assemblePDEBoundarySystemReduced(mat, rhs, d, y);
86 }
87
88 void DefaultAssembler2D::assemblePDESystem(Paso_SystemMatrix* mat,
89 escript::Data& rhs, const escript::Data& A, const escript::Data& B,
90 const escript::Data& C, const escript::Data& D,
91 const escript::Data& X, const escript::Data& Y) const
92 {
93 dim_t numEq, numComp;
94 if (!mat)
95 numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize());
96 else {
97 numEq=mat->logical_row_block_size;
98 numComp=mat->logical_col_block_size;
99 }
100 const double SQRT3 = 1.73205080756887719318;
101 const double w1 = 1.0/24;
102 const double w5 = -SQRT3/24 + 1.0/12;
103 const double w2 = -SQRT3/24 - 1.0/12;
104 const double w19 = -m_dx[0]/12;
105 const double w11 = w19*(SQRT3 + 3)/12;
106 const double w14 = w19*(-SQRT3 + 3)/12;
107 const double w16 = w19*(5*SQRT3 + 9)/12;
108 const double w17 = w19*(-5*SQRT3 + 9)/12;
109 const double w27 = w19*(-SQRT3 - 3)/2;
110 const double w28 = w19*(SQRT3 - 3)/2;
111 const double w18 = -m_dx[1]/12;
112 const double w10 = w18*(SQRT3 + 3)/12;
113 const double w15 = w18*(-SQRT3 + 3)/12;
114 const double w12 = w18*(5*SQRT3 + 9)/12;
115 const double w13 = w18*(-5*SQRT3 + 9)/12;
116 const double w25 = w18*(-SQRT3 - 3)/2;
117 const double w26 = w18*(SQRT3 - 3)/2;
118 const double w22 = m_dx[0]*m_dx[1]/144;
119 const double w20 = w22*(SQRT3 + 2);
120 const double w21 = w22*(-SQRT3 + 2);
121 const double w23 = w22*(4*SQRT3 + 7);
122 const double w24 = w22*(-4*SQRT3 + 7);
123 const double w3 = m_dx[0]/(24*m_dx[1]);
124 const double w7 = w3*(SQRT3 + 2);
125 const double w8 = w3*(-SQRT3 + 2);
126 const double w6 = -m_dx[1]/(24*m_dx[0]);
127 const double w0 = w6*(SQRT3 + 2);
128 const double w4 = w6*(-SQRT3 + 2);
129
130 rhs.requireWrite();
131 #pragma omp parallel
132 {
133 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
134 #pragma omp for
135 for (index_t k1=k1_0; k1 < m_NE[1]; k1+=2) {
136 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
137 bool addEM_S=false;
138 bool addEM_F=false;
139 vector<double> EM_S(4*4*numEq*numComp, 0);
140 vector<double> EM_F(4*numEq, 0);
141 const index_t e = k0 + m_NE[0]*k1;
142 ///////////////
143 // process A //
144 ///////////////
145 if (!A.isEmpty()) {
146 addEM_S = true;
147 const double* A_p = A.getSampleDataRO(e);
148 if (A.actsExpanded()) {
149 for (index_t k=0; k<numEq; k++) {
150 for (index_t m=0; m<numComp; m++) {
151 const double A_00_0 = A_p[INDEX5(k,0,m,0,0,numEq,2,numComp,2)];
152 const double A_01_0 = A_p[INDEX5(k,0,m,1,0,numEq,2,numComp,2)];
153 const double A_10_0 = A_p[INDEX5(k,1,m,0,0,numEq,2,numComp,2)];
154 const double A_11_0 = A_p[INDEX5(k,1,m,1,0,numEq,2,numComp,2)];
155 const double A_00_1 = A_p[INDEX5(k,0,m,0,1,numEq,2,numComp,2)];
156 const double A_01_1 = A_p[INDEX5(k,0,m,1,1,numEq,2,numComp,2)];
157 const double A_10_1 = A_p[INDEX5(k,1,m,0,1,numEq,2,numComp,2)];
158 const double A_11_1 = A_p[INDEX5(k,1,m,1,1,numEq,2,numComp,2)];
159 const double A_00_2 = A_p[INDEX5(k,0,m,0,2,numEq,2,numComp,2)];
160 const double A_01_2 = A_p[INDEX5(k,0,m,1,2,numEq,2,numComp,2)];
161 const double A_10_2 = A_p[INDEX5(k,1,m,0,2,numEq,2,numComp,2)];
162 const double A_11_2 = A_p[INDEX5(k,1,m,1,2,numEq,2,numComp,2)];
163 const double A_00_3 = A_p[INDEX5(k,0,m,0,3,numEq,2,numComp,2)];
164 const double A_01_3 = A_p[INDEX5(k,0,m,1,3,numEq,2,numComp,2)];
165 const double A_10_3 = A_p[INDEX5(k,1,m,0,3,numEq,2,numComp,2)];
166 const double A_11_3 = A_p[INDEX5(k,1,m,1,3,numEq,2,numComp,2)];
167 const double tmp0 = w3*(A_11_0 + A_11_1 + A_11_2 + A_11_3);
168 const double tmp1 = w1*(A_01_0 + A_01_3 - A_10_1 - A_10_2);
169 const double tmp2 = w4*(A_00_2 + A_00_3);
170 const double tmp3 = w0*(A_00_0 + A_00_1);
171 const double tmp4 = w5*(A_01_2 - A_10_3);
172 const double tmp5 = w2*(-A_01_1 + A_10_0);
173 const double tmp6 = w5*(A_01_3 + A_10_0);
174 const double tmp7 = w3*(-A_11_0 - A_11_1 - A_11_2 - A_11_3);
175 const double tmp8 = w6*(A_00_0 + A_00_1 + A_00_2 + A_00_3);
176 const double tmp9 = w1*(A_01_1 + A_01_2 + A_10_1 + A_10_2);
177 const double tmp10 = w2*(-A_01_0 - A_10_3);
178 const double tmp11 = w4*(A_00_0 + A_00_1);
179 const double tmp12 = w0*(A_00_2 + A_00_3);
180 const double tmp13 = w5*(A_01_1 - A_10_0);
181 const double tmp14 = w2*(-A_01_2 + A_10_3);
182 const double tmp15 = w7*(A_11_0 + A_11_2);
183 const double tmp16 = w4*(-A_00_2 - A_00_3);
184 const double tmp17 = w0*(-A_00_0 - A_00_1);
185 const double tmp18 = w5*(A_01_3 + A_10_3);
186 const double tmp19 = w8*(A_11_1 + A_11_3);
187 const double tmp20 = w2*(-A_01_0 - A_10_0);
188 const double tmp21 = w7*(A_11_1 + A_11_3);
189 const double tmp22 = w4*(-A_00_0 - A_00_1);
190 const double tmp23 = w0*(-A_00_2 - A_00_3);
191 const double tmp24 = w5*(A_01_0 + A_10_0);
192 const double tmp25 = w8*(A_11_0 + A_11_2);
193 const double tmp26 = w2*(-A_01_3 - A_10_3);
194 const double tmp27 = w5*(-A_01_1 - A_10_2);
195 const double tmp28 = w1*(-A_01_0 - A_01_3 - A_10_0 - A_10_3);
196 const double tmp29 = w2*(A_01_2 + A_10_1);
197 const double tmp30 = w7*(-A_11_1 - A_11_3);
198 const double tmp31 = w1*(-A_01_1 - A_01_2 + A_10_0 + A_10_3);
199 const double tmp32 = w5*(-A_01_0 + A_10_2);
200 const double tmp33 = w8*(-A_11_0 - A_11_2);
201 const double tmp34 = w6*(-A_00_0 - A_00_1 - A_00_2 - A_00_3);
202 const double tmp35 = w2*(A_01_3 - A_10_1);
203 const double tmp36 = w5*(A_01_0 + A_10_3);
204 const double tmp37 = w2*(-A_01_3 - A_10_0);
205 const double tmp38 = w7*(-A_11_0 - A_11_2);
206 const double tmp39 = w5*(-A_01_3 + A_10_1);
207 const double tmp40 = w8*(-A_11_1 - A_11_3);
208 const double tmp41 = w2*(A_01_0 - A_10_2);
209 const double tmp42 = w5*(A_01_1 - A_10_3);
210 const double tmp43 = w2*(-A_01_2 + A_10_0);
211 const double tmp44 = w5*(A_01_2 - A_10_0);
212 const double tmp45 = w2*(-A_01_1 + A_10_3);
213 const double tmp46 = w5*(-A_01_0 + A_10_1);
214 const double tmp47 = w2*(A_01_3 - A_10_2);
215 const double tmp48 = w5*(-A_01_1 - A_10_1);
216 const double tmp49 = w2*(A_01_2 + A_10_2);
217 const double tmp50 = w5*(-A_01_3 + A_10_2);
218 const double tmp51 = w2*(A_01_0 - A_10_1);
219 const double tmp52 = w5*(-A_01_2 - A_10_1);
220 const double tmp53 = w2*(A_01_1 + A_10_2);
221 const double tmp54 = w5*(-A_01_2 - A_10_2);
222 const double tmp55 = w2*(A_01_1 + A_10_1);
223 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp15 + tmp16 + tmp17 + tmp18 + tmp19 + tmp20 + tmp9;
224 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5;
225 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp31 + tmp34 + tmp38 + tmp39 + tmp40 + tmp41;
226 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp28 + tmp52 + tmp53 + tmp7 + tmp8;
227 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0 + tmp2 + tmp3 + tmp31 + tmp50 + tmp51;
228 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp16 + tmp17 + tmp21 + tmp25 + tmp28 + tmp54 + tmp55;
229 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp10 + tmp6 + tmp7 + tmp8 + tmp9;
230 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp1 + tmp30 + tmp33 + tmp34 + tmp44 + tmp45;
231 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp1 + tmp34 + tmp38 + tmp40 + tmp42 + tmp43;
232 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp36 + tmp37 + tmp7 + tmp8 + tmp9;
233 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp15 + tmp19 + tmp22 + tmp23 + tmp28 + tmp48 + tmp49;
234 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0 + tmp11 + tmp12 + tmp31 + tmp46 + tmp47;
235 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp27 + tmp28 + tmp29 + tmp7 + tmp8;
236 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp30 + tmp31 + tmp32 + tmp33 + tmp34 + tmp35;
237 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0 + tmp1 + tmp11 + tmp12 + tmp13 + tmp14;
238 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp21 + tmp22 + tmp23 + tmp24 + tmp25 + tmp26 + tmp9;
239 }
240 }
241 } else { // constant data
242 for (index_t k=0; k<numEq; k++) {
243 for (index_t m=0; m<numComp; m++) {
244 const double A_00 = A_p[INDEX4(k,0,m,0, numEq,2, numComp)];
245 const double A_01 = A_p[INDEX4(k,0,m,1, numEq,2, numComp)];
246 const double A_10 = A_p[INDEX4(k,1,m,0, numEq,2, numComp)];
247 const double A_11 = A_p[INDEX4(k,1,m,1, numEq,2, numComp)];
248 const double tmp0 = 6*w1*(A_01 - A_10);
249 const double tmp1 = 6*w1*(A_01 + A_10);
250 const double tmp2 = 6*w1*(-A_01 - A_10);
251 const double tmp3 = 6*w1*(-A_01 + A_10);
252 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp1;
253 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp0;
254 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp3;
255 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp2;
256 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp3;
257 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp2;
258 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp1;
259 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp0;
260 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp0;
261 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp1;
262 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp2;
263 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp3;
264 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp2;
265 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp3;
266 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp0;
267 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp1;
268 }
269 }
270 }
271 }
272 ///////////////
273 // process B //
274 ///////////////
275 if (!B.isEmpty()) {
276 addEM_S=true;
277 const double* B_p=B.getSampleDataRO(e);
278 if (B.actsExpanded()) {
279 for (index_t k=0; k<numEq; k++) {
280 for (index_t m=0; m<numComp; m++) {
281 const double B_0_0 = B_p[INDEX4(k,0,m,0, numEq,2,numComp)];
282 const double B_1_0 = B_p[INDEX4(k,1,m,0, numEq,2,numComp)];
283 const double B_0_1 = B_p[INDEX4(k,0,m,1, numEq,2,numComp)];
284 const double B_1_1 = B_p[INDEX4(k,1,m,1, numEq,2,numComp)];
285 const double B_0_2 = B_p[INDEX4(k,0,m,2, numEq,2,numComp)];
286 const double B_1_2 = B_p[INDEX4(k,1,m,2, numEq,2,numComp)];
287 const double B_0_3 = B_p[INDEX4(k,0,m,3, numEq,2,numComp)];
288 const double B_1_3 = B_p[INDEX4(k,1,m,3, numEq,2,numComp)];
289 const double tmp0 = w11*(B_1_0 + B_1_1);
290 const double tmp1 = w14*(B_1_2 + B_1_3);
291 const double tmp2 = w15*(-B_0_1 - B_0_3);
292 const double tmp3 = w10*(-B_0_0 - B_0_2);
293 const double tmp4 = w11*(B_1_2 + B_1_3);
294 const double tmp5 = w14*(B_1_0 + B_1_1);
295 const double tmp6 = w11*(-B_1_2 - B_1_3);
296 const double tmp7 = w14*(-B_1_0 - B_1_1);
297 const double tmp8 = w11*(-B_1_0 - B_1_1);
298 const double tmp9 = w14*(-B_1_2 - B_1_3);
299 const double tmp10 = w10*(-B_0_1 - B_0_3);
300 const double tmp11 = w15*(-B_0_0 - B_0_2);
301 const double tmp12 = w15*(B_0_0 + B_0_2);
302 const double tmp13 = w10*(B_0_1 + B_0_3);
303 const double tmp14 = w10*(B_0_0 + B_0_2);
304 const double tmp15 = w15*(B_0_1 + B_0_3);
305 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=B_0_0*w12 + B_0_1*w10 + B_0_2*w15 + B_0_3*w13 + B_1_0*w16 + B_1_1*w14 + B_1_2*w11 + B_1_3*w17;
306 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=B_0_0*w10 + B_0_1*w12 + B_0_2*w13 + B_0_3*w15 + tmp0 + tmp1;
307 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=B_1_0*w11 + B_1_1*w17 + B_1_2*w16 + B_1_3*w14 + tmp14 + tmp15;
308 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp12 + tmp13 + tmp4 + tmp5;
309 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=-B_0_0*w12 - B_0_1*w10 - B_0_2*w15 - B_0_3*w13 + tmp0 + tmp1;
310 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-B_0_0*w10 - B_0_1*w12 - B_0_2*w13 - B_0_3*w15 + B_1_0*w14 + B_1_1*w16 + B_1_2*w17 + B_1_3*w11;
311 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2 + tmp3 + tmp4 + tmp5;
312 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=B_1_0*w17 + B_1_1*w11 + B_1_2*w14 + B_1_3*w16 + tmp10 + tmp11;
313 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=-B_1_0*w16 - B_1_1*w14 - B_1_2*w11 - B_1_3*w17 + tmp14 + tmp15;
314 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp12 + tmp13 + tmp8 + tmp9;
315 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=B_0_0*w15 + B_0_1*w13 + B_0_2*w12 + B_0_3*w10 - B_1_0*w11 - B_1_1*w17 - B_1_2*w16 - B_1_3*w14;
316 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=B_0_0*w13 + B_0_1*w15 + B_0_2*w10 + B_0_3*w12 + tmp6 + tmp7;
317 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2 + tmp3 + tmp8 + tmp9;
318 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=-B_1_0*w14 - B_1_1*w16 - B_1_2*w17 - B_1_3*w11 + tmp10 + tmp11;
319 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=-B_0_0*w15 - B_0_1*w13 - B_0_2*w12 - B_0_3*w10 + tmp6 + tmp7;
320 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-B_0_0*w13 - B_0_1*w15 - B_0_2*w10 - B_0_3*w12 - B_1_0*w17 - B_1_1*w11 - B_1_2*w14 - B_1_3*w16;
321 }
322 }
323 } else { // constant data
324 for (index_t k=0; k<numEq; k++) {
325 for (index_t m=0; m<numComp; m++) {
326 const double wB0 = B_p[INDEX3(k,0,m,numEq,2)]*w18;
327 const double wB1 = B_p[INDEX3(k,1,m,numEq,2)]*w19;
328 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+= 2*wB0 + 2*wB1;
329 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+= 2*wB0 + wB1;
330 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+= wB0 + 2*wB1;
331 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+= wB0 + wB1;
332 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=-2*wB0 + wB1;
333 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-2*wB0 + 2*wB1;
334 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+= -wB0 + wB1;
335 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+= -wB0 + 2*wB1;
336 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+= wB0 - 2*wB1;
337 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+= wB0 - wB1;
338 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+= 2*wB0 - 2*wB1;
339 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+= 2*wB0 - wB1;
340 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+= -wB0 - wB1;
341 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+= -wB0 - 2*wB1;
342 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=-2*wB0 - wB1;
343 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-2*wB0 - 2*wB1;
344 }
345 }
346 }
347 }
348 ///////////////
349 // process C //
350 ///////////////
351 if (!C.isEmpty()) {
352 addEM_S=true;
353 const double* C_p=C.getSampleDataRO(e);
354 if (C.actsExpanded()) {
355 for (index_t k=0; k<numEq; k++) {
356 for (index_t m=0; m<numComp; m++) {
357 const double C_0_0 = C_p[INDEX4(k,m,0, 0, numEq,numComp,2)];
358 const double C_1_0 = C_p[INDEX4(k,m,1, 0, numEq,numComp,2)];
359 const double C_0_1 = C_p[INDEX4(k,m,0, 1, numEq,numComp,2)];
360 const double C_1_1 = C_p[INDEX4(k,m,1, 1, numEq,numComp,2)];
361 const double C_0_2 = C_p[INDEX4(k,m,0, 2, numEq,numComp,2)];
362 const double C_1_2 = C_p[INDEX4(k,m,1, 2, numEq,numComp,2)];
363 const double C_0_3 = C_p[INDEX4(k,m,0, 3, numEq,numComp,2)];
364 const double C_1_3 = C_p[INDEX4(k,m,1, 3, numEq,numComp,2)];
365 const double tmp0 = w11*(C_1_0 + C_1_1);
366 const double tmp1 = w14*(C_1_2 + C_1_3);
367 const double tmp2 = w15*(C_0_0 + C_0_2);
368 const double tmp3 = w10*(C_0_1 + C_0_3);
369 const double tmp4 = w11*(-C_1_0 - C_1_1);
370 const double tmp5 = w14*(-C_1_2 - C_1_3);
371 const double tmp6 = w11*(-C_1_2 - C_1_3);
372 const double tmp7 = w14*(-C_1_0 - C_1_1);
373 const double tmp8 = w11*(C_1_2 + C_1_3);
374 const double tmp9 = w14*(C_1_0 + C_1_1);
375 const double tmp10 = w10*(-C_0_1 - C_0_3);
376 const double tmp11 = w15*(-C_0_0 - C_0_2);
377 const double tmp12 = w15*(-C_0_1 - C_0_3);
378 const double tmp13 = w10*(-C_0_0 - C_0_2);
379 const double tmp14 = w10*(C_0_0 + C_0_2);
380 const double tmp15 = w15*(C_0_1 + C_0_3);
381 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=C_0_0*w12 + C_0_1*w10 + C_0_2*w15 + C_0_3*w13 + C_1_0*w16 + C_1_1*w14 + C_1_2*w11 + C_1_3*w17;
382 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=-C_0_0*w12 - C_0_1*w10 - C_0_2*w15 - C_0_3*w13 + tmp0 + tmp1;
383 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=-C_1_0*w16 - C_1_1*w14 - C_1_2*w11 - C_1_3*w17 + tmp14 + tmp15;
384 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp12 + tmp13 + tmp4 + tmp5;
385 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=C_0_0*w10 + C_0_1*w12 + C_0_2*w13 + C_0_3*w15 + tmp0 + tmp1;
386 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-C_0_0*w10 - C_0_1*w12 - C_0_2*w13 - C_0_3*w15 + C_1_0*w14 + C_1_1*w16 + C_1_2*w17 + C_1_3*w11;
387 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2 + tmp3 + tmp4 + tmp5;
388 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=-C_1_0*w14 - C_1_1*w16 - C_1_2*w17 - C_1_3*w11 + tmp10 + tmp11;
389 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=C_1_0*w11 + C_1_1*w17 + C_1_2*w16 + C_1_3*w14 + tmp14 + tmp15;
390 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp12 + tmp13 + tmp8 + tmp9;
391 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=C_0_0*w15 + C_0_1*w13 + C_0_2*w12 + C_0_3*w10 - C_1_0*w11 - C_1_1*w17 - C_1_2*w16 - C_1_3*w14;
392 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=-C_0_0*w15 - C_0_1*w13 - C_0_2*w12 - C_0_3*w10 + tmp6 + tmp7;
393 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2 + tmp3 + tmp8 + tmp9;
394 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=C_1_0*w17 + C_1_1*w11 + C_1_2*w14 + C_1_3*w16 + tmp10 + tmp11;
395 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=C_0_0*w13 + C_0_1*w15 + C_0_2*w10 + C_0_3*w12 + tmp6 + tmp7;
396 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-C_0_0*w13 - C_0_1*w15 - C_0_2*w10 - C_0_3*w12 - C_1_0*w17 - C_1_1*w11 - C_1_2*w14 - C_1_3*w16;
397 }
398 }
399 } else { // constant data
400 for (index_t k=0; k<numEq; k++) {
401 for (index_t m=0; m<numComp; m++) {
402 const double wC0 = C_p[INDEX3(k,m,0,numEq,numComp)]*w18;
403 const double wC1 = C_p[INDEX3(k,m,1,numEq,numComp)]*w19;
404 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+= 2*wC0 + 2*wC1;
405 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=-2*wC0 + wC1;
406 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+= wC0 - 2*wC1;
407 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+= -wC0 - wC1;
408 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+= 2*wC0 + wC1;
409 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-2*wC0 + 2*wC1;
410 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+= wC0 - wC1;
411 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+= -wC0 - 2*wC1;
412 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+= wC0 + 2*wC1;
413 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+= -wC0 + wC1;
414 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+= 2*wC0 - 2*wC1;
415 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=-2*wC0 - wC1;
416 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+= wC0 + wC1;
417 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+= -wC0 + 2*wC1;
418 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+= 2*wC0 - wC1;
419 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-2*wC0 - 2*wC1;
420 }
421 }
422 }
423 }
424 ///////////////
425 // process D //
426 ///////////////
427 if (!D.isEmpty()) {
428 addEM_S=true;
429 const double* D_p=D.getSampleDataRO(e);
430 if (D.actsExpanded()) {
431 for (index_t k=0; k<numEq; k++) {
432 for (index_t m=0; m<numComp; m++) {
433 const double D_0 = D_p[INDEX3(k,m,0,numEq,numComp)];
434 const double D_1 = D_p[INDEX3(k,m,1,numEq,numComp)];
435 const double D_2 = D_p[INDEX3(k,m,2,numEq,numComp)];
436 const double D_3 = D_p[INDEX3(k,m,3,numEq,numComp)];
437 const double tmp0 = w21*(D_2 + D_3);
438 const double tmp1 = w20*(D_0 + D_1);
439 const double tmp2 = w22*(D_0 + D_1 + D_2 + D_3);
440 const double tmp3 = w21*(D_0 + D_1);
441 const double tmp4 = w20*(D_2 + D_3);
442 const double tmp5 = w22*(D_1 + D_2);
443 const double tmp6 = w21*(D_0 + D_2);
444 const double tmp7 = w20*(D_1 + D_3);
445 const double tmp8 = w21*(D_1 + D_3);
446 const double tmp9 = w20*(D_0 + D_2);
447 const double tmp10 = w22*(D_0 + D_3);
448 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=D_0*w23 + D_3*w24 + tmp5;
449 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0 + tmp1;
450 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp8 + tmp9;
451 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp2;
452 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0 + tmp1;
453 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=D_1*w23 + D_2*w24 + tmp10;
454 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2;
455 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp6 + tmp7;
456 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp8 + tmp9;
457 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp2;
458 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=D_1*w24 + D_2*w23 + tmp10;
459 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp3 + tmp4;
460 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2;
461 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp6 + tmp7;
462 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp3 + tmp4;
463 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=D_0*w24 + D_3*w23 + tmp5;
464 }
465 }
466 } else { // constant data
467 for (index_t k=0; k<numEq; k++) {
468 for (index_t m=0; m<numComp; m++) {
469 const double D_0 = D_p[INDEX2(k, m, numEq)];
470 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=16*D_0*w22;
471 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=8*D_0*w22;
472 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=8*D_0*w22;
473 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=4*D_0*w22;
474 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=8*D_0*w22;
475 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=16*D_0*w22;
476 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=4*D_0*w22;
477 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=8*D_0*w22;
478 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=8*D_0*w22;
479 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=4*D_0*w22;
480 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=16*D_0*w22;
481 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=8*D_0*w22;
482 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=4*D_0*w22;
483 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=8*D_0*w22;
484 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=8*D_0*w22;
485 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=16*D_0*w22;
486 }
487 }
488 }
489 }
490 ///////////////
491 // process X //
492 ///////////////
493 if (!X.isEmpty()) {
494 addEM_F=true;
495 const double* X_p=X.getSampleDataRO(e);
496 if (X.actsExpanded()) {
497 for (index_t k=0; k<numEq; k++) {
498 const double X_0_0 = X_p[INDEX3(k,0,0,numEq,2)];
499 const double X_1_0 = X_p[INDEX3(k,1,0,numEq,2)];
500 const double X_0_1 = X_p[INDEX3(k,0,1,numEq,2)];
501 const double X_1_1 = X_p[INDEX3(k,1,1,numEq,2)];
502 const double X_0_2 = X_p[INDEX3(k,0,2,numEq,2)];
503 const double X_1_2 = X_p[INDEX3(k,1,2,numEq,2)];
504 const double X_0_3 = X_p[INDEX3(k,0,3,numEq,2)];
505 const double X_1_3 = X_p[INDEX3(k,1,3,numEq,2)];
506 const double tmp0 = 6*w15*(X_0_2 + X_0_3);
507 const double tmp1 = 6*w10*(X_0_0 + X_0_1);
508 const double tmp2 = 6*w11*(X_1_0 + X_1_2);
509 const double tmp3 = 6*w14*(X_1_1 + X_1_3);
510 const double tmp4 = 6*w11*(X_1_1 + X_1_3);
511 const double tmp5 = w25*(X_0_0 + X_0_1);
512 const double tmp6 = w26*(X_0_2 + X_0_3);
513 const double tmp7 = 6*w14*(X_1_0 + X_1_2);
514 const double tmp8 = w27*(X_1_0 + X_1_2);
515 const double tmp9 = w28*(X_1_1 + X_1_3);
516 const double tmp10 = w25*(-X_0_2 - X_0_3);
517 const double tmp11 = w26*(-X_0_0 - X_0_1);
518 const double tmp12 = w27*(X_1_1 + X_1_3);
519 const double tmp13 = w28*(X_1_0 + X_1_2);
520 const double tmp14 = w25*(X_0_2 + X_0_3);
521 const double tmp15 = w26*(X_0_0 + X_0_1);
522 EM_F[INDEX2(k,0,numEq)]+=tmp0 + tmp1 + tmp2 + tmp3;
523 EM_F[INDEX2(k,1,numEq)]+=tmp4 + tmp5 + tmp6 + tmp7;
524 EM_F[INDEX2(k,2,numEq)]+=tmp10 + tmp11 + tmp8 + tmp9;
525 EM_F[INDEX2(k,3,numEq)]+=tmp12 + tmp13 + tmp14 + tmp15;
526 }
527 } else { // constant data
528 for (index_t k=0; k<numEq; k++) {
529 const double wX0 = X_p[INDEX2(k, 0, numEq)]*w18;
530 const double wX1 = X_p[INDEX2(k, 1, numEq)]*w19;
531 EM_F[INDEX2(k,0,numEq)]+= 6*wX0 + 6*wX1;
532 EM_F[INDEX2(k,1,numEq)]+=-6*wX0 + 6*wX1;
533 EM_F[INDEX2(k,2,numEq)]+= 6*wX0 - 6*wX1;
534 EM_F[INDEX2(k,3,numEq)]+=-6*wX0 - 6*wX1;
535 }
536 }
537 }
538 ///////////////
539 // process Y //
540 ///////////////
541 if (!Y.isEmpty()) {
542 addEM_F=true;
543 const double* Y_p=Y.getSampleDataRO(e);
544 if (Y.actsExpanded()) {
545 for (index_t k=0; k<numEq; k++) {
546 const double Y_0 = Y_p[INDEX2(k, 0, numEq)];
547 const double Y_1 = Y_p[INDEX2(k, 1, numEq)];
548 const double Y_2 = Y_p[INDEX2(k, 2, numEq)];
549 const double Y_3 = Y_p[INDEX2(k, 3, numEq)];
550 const double tmp0 = 6*w22*(Y_1 + Y_2);
551 const double tmp1 = 6*w22*(Y_0 + Y_3);
552 EM_F[INDEX2(k,0,numEq)]+=6*Y_0*w20 + 6*Y_3*w21 + tmp0;
553 EM_F[INDEX2(k,1,numEq)]+=6*Y_1*w20 + 6*Y_2*w21 + tmp1;
554 EM_F[INDEX2(k,2,numEq)]+=6*Y_1*w21 + 6*Y_2*w20 + tmp1;
555 EM_F[INDEX2(k,3,numEq)]+=6*Y_0*w21 + 6*Y_3*w20 + tmp0;
556 }
557 } else { // constant data
558 for (index_t k=0; k<numEq; k++) {
559 EM_F[INDEX2(k,0,numEq)]+=36*Y_p[k]*w22;
560 EM_F[INDEX2(k,1,numEq)]+=36*Y_p[k]*w22;
561 EM_F[INDEX2(k,2,numEq)]+=36*Y_p[k]*w22;
562 EM_F[INDEX2(k,3,numEq)]+=36*Y_p[k]*w22;
563 }
564 }
565 }
566
567 // add to matrix (if addEM_S) and RHS (if addEM_F)
568 const index_t firstNode=m_NN[0]*k1+k0;
569 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S,
570 addEM_F, firstNode, numEq, numComp);
571 } // end k0 loop
572 } // end k1 loop
573 } // end of colouring
574 } // end of parallel region
575 }
576
577 void DefaultAssembler2D::assemblePDESystemReduced(Paso_SystemMatrix* mat,
578 escript::Data& rhs, const escript::Data& A, const escript::Data& B,
579 const escript::Data& C, const escript::Data& D,
580 const escript::Data& X, const escript::Data& Y) const
581 {
582 dim_t numEq, numComp;
583 if (!mat)
584 numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize());
585 else {
586 numEq=mat->logical_row_block_size;
587 numComp=mat->logical_col_block_size;
588 }
589
590 const double w0 = 1./4;
591 const double w1 = m_dx[0]/8;
592 const double w2 = m_dx[1]/8;
593 const double w3 = m_dx[0]*m_dx[1]/16;
594 const double w4 = m_dx[0]/(4*m_dx[1]);
595 const double w5 = m_dx[1]/(4*m_dx[0]);
596
597 rhs.requireWrite();
598 #pragma omp parallel
599 {
600 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
601 #pragma omp for
602 for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) {
603 for (index_t k0=0; k0<m_NE[0]; ++k0) {
604 bool addEM_S=false;
605 bool addEM_F=false;
606 vector<double> EM_S(4*4*numEq*numComp, 0);
607 vector<double> EM_F(4*numEq, 0);
608 const index_t e = k0 + m_NE[0]*k1;
609 ///////////////
610 // process A //
611 ///////////////
612 if (!A.isEmpty()) {
613 addEM_S=true;
614 const double* A_p=A.getSampleDataRO(e);
615 for (index_t k=0; k<numEq; k++) {
616 for (index_t m=0; m<numComp; m++) {
617 const double Aw00 = A_p[INDEX4(k,0,m,0, numEq,2, numComp)]*w5;
618 const double Aw01 = A_p[INDEX4(k,0,m,1, numEq,2, numComp)]*w0;
619 const double Aw10 = A_p[INDEX4(k,1,m,0, numEq,2, numComp)]*w0;
620 const double Aw11 = A_p[INDEX4(k,1,m,1, numEq,2, numComp)]*w4;
621 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+= Aw00 + Aw01 + Aw10 + Aw11;
622 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=-Aw00 - Aw01 + Aw10 + Aw11;
623 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+= Aw00 + Aw01 - Aw10 - Aw11;
624 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=-Aw00 - Aw01 - Aw10 - Aw11;
625 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=-Aw00 + Aw01 - Aw10 + Aw11;
626 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+= Aw00 - Aw01 - Aw10 + Aw11;
627 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=-Aw00 + Aw01 + Aw10 - Aw11;
628 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+= Aw00 - Aw01 + Aw10 - Aw11;
629 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+= Aw00 - Aw01 + Aw10 - Aw11;
630 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=-Aw00 + Aw01 + Aw10 - Aw11;
631 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+= Aw00 - Aw01 - Aw10 + Aw11;
632 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=-Aw00 + Aw01 - Aw10 + Aw11;
633 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=-Aw00 - Aw01 - Aw10 - Aw11;
634 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+= Aw00 + Aw01 - Aw10 - Aw11;
635 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=-Aw00 - Aw01 + Aw10 + Aw11;
636 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+= Aw00 + Aw01 + Aw10 + Aw11;
637 }
638 }
639 }
640 ///////////////
641 // process B //
642 ///////////////
643 if (!B.isEmpty()) {
644 addEM_S=true;
645 const double* B_p=B.getSampleDataRO(e);
646 for (index_t k=0; k<numEq; k++) {
647 for (index_t m=0; m<numComp; m++) {
648 const double wB0 = B_p[INDEX3(k,0,m, numEq, 2)]*w2;
649 const double wB1 = B_p[INDEX3(k,1,m, numEq, 2)]*w1;
650 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=-wB0 - wB1;
651 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=-wB0 - wB1;
652 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=-wB0 - wB1;
653 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=-wB0 - wB1;
654 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+= wB0 - wB1;
655 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+= wB0 - wB1;
656 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+= wB0 - wB1;
657 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+= wB0 - wB1;
658 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=-wB0 + wB1;
659 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=-wB0 + wB1;
660 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=-wB0 + wB1;
661 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=-wB0 + wB1;
662 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+= wB0 + wB1;
663 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+= wB0 + wB1;
664 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+= wB0 + wB1;
665 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+= wB0 + wB1;
666 }
667 }
668 }
669 ///////////////
670 // process C //
671 ///////////////
672 if (!C.isEmpty()) {
673 addEM_S=true;
674 const double* C_p=C.getSampleDataRO(e);
675 for (index_t k=0; k<numEq; k++) {
676 for (index_t m=0; m<numComp; m++) {
677 const double wC0 = C_p[INDEX3(k, m, 0, numEq, numComp)]*w2;
678 const double wC1 = C_p[INDEX3(k, m, 1, numEq, numComp)]*w1;
679 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=-wC0 - wC1;
680 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=-wC0 - wC1;
681 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=-wC0 - wC1;
682 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=-wC0 - wC1;
683 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+= wC0 - wC1;
684 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+= wC0 - wC1;
685 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+= wC0 - wC1;
686 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+= wC0 - wC1;
687 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=-wC0 + wC1;
688 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=-wC0 + wC1;
689 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=-wC0 + wC1;
690 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=-wC0 + wC1;
691 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+= wC0 + wC1;
692 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+= wC0 + wC1;
693 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+= wC0 + wC1;
694 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+= wC0 + wC1;
695 }
696 }
697 }
698 ///////////////
699 // process D //
700 ///////////////
701 if (!D.isEmpty()) {
702 addEM_S=true;
703 const double* D_p=D.getSampleDataRO(e);
704 for (index_t k=0; k<numEq; k++) {
705 for (index_t m=0; m<numComp; m++) {
706 const double wD0 = D_p[INDEX2(k, m, numEq)]*w3;
707 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=wD0;
708 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=wD0;
709 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=wD0;
710 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=wD0;
711 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=wD0;
712 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=wD0;
713 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=wD0;
714 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=wD0;
715 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=wD0;
716 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=wD0;
717 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=wD0;
718 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=wD0;
719 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=wD0;
720 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=wD0;
721 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=wD0;
722 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=wD0;
723 }
724 }
725 }
726 ///////////////
727 // process X //
728 ///////////////
729 if (!X.isEmpty()) {
730 addEM_F=true;
731 const double* X_p=X.getSampleDataRO(e);
732 for (index_t k=0; k<numEq; k++) {
733 const double wX0 = 4*X_p[INDEX2(k, 0, numEq)]*w2;
734 const double wX1 = 4*X_p[INDEX2(k, 1, numEq)]*w1;
735 EM_F[INDEX2(k,0,numEq)]+=-wX0 - wX1;
736 EM_F[INDEX2(k,1,numEq)]+= wX0 - wX1;
737 EM_F[INDEX2(k,2,numEq)]+=-wX0 + wX1;
738 EM_F[INDEX2(k,3,numEq)]+= wX0 + wX1;
739 }
740 }
741 ///////////////
742 // process Y //
743 ///////////////
744 if (!Y.isEmpty()) {
745 addEM_F=true;
746 const double* Y_p=Y.getSampleDataRO(e);
747 for (index_t k=0; k<numEq; k++) {
748 EM_F[INDEX2(k,0,numEq)]+=4*Y_p[k]*w3;
749 EM_F[INDEX2(k,1,numEq)]+=4*Y_p[k]*w3;
750 EM_F[INDEX2(k,2,numEq)]+=4*Y_p[k]*w3;
751 EM_F[INDEX2(k,3,numEq)]+=4*Y_p[k]*w3;
752 }
753 }
754
755 // add to matrix (if addEM_S) and RHS (if addEM_F)
756 const index_t firstNode=m_NN[0]*k1+k0;
757 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
758 firstNode, numEq, numComp);
759 } // end k0 loop
760 } // end k1 loop
761 } // end of colouring
762 } // end of parallel region
763 }
764
765 void DefaultAssembler2D::assemblePDEBoundarySingle(Paso_SystemMatrix* mat,
766 escript::Data& rhs, const escript::Data& d, const escript::Data& y) const
767 {
768 const double SQRT3 = 1.73205080756887719318;
769 const double w5 = m_dx[0]/12;
770 const double w6 = w5*(SQRT3 + 2);
771 const double w7 = w5*(-SQRT3 + 2);
772 const double w8 = w5*(SQRT3 + 3);
773 const double w9 = w5*(-SQRT3 + 3);
774 const double w2 = m_dx[1]/12;
775 const double w0 = w2*(SQRT3 + 2);
776 const double w1 = w2*(-SQRT3 + 2);
777 const double w3 = w2*(SQRT3 + 3);
778 const double w4 = w2*(-SQRT3 + 3);
779 const bool addEM_S=!d.isEmpty();
780 const bool addEM_F=!y.isEmpty();
781 rhs.requireWrite();
782 #pragma omp parallel
783 {
784 if (domain->m_faceOffset[0] > -1) {
785 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
786 #pragma omp for
787 for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) {
788 vector<double> EM_S(4*4, 0);
789 vector<double> EM_F(4, 0);
790 ///////////////
791 // process d //
792 ///////////////
793 if (addEM_S) {
794 const double* d_p=d.getSampleDataRO(k1);
795 if (d.actsExpanded()) {
796 const double d_0 = d_p[0];
797 const double d_1 = d_p[1];
798 const double tmp0 = w2*(d_0 + d_1);
799 EM_S[INDEX2(0,0,4)]+=d_0*w0 + d_1*w1;
800 EM_S[INDEX2(2,0,4)]+=tmp0;
801 EM_S[INDEX2(0,2,4)]+=tmp0;
802 EM_S[INDEX2(2,2,4)]+=d_0*w1 + d_1*w0;
803 } else { // constant data
804 EM_S[INDEX2(0,0,4)]+=4*d_p[0]*w2;
805 EM_S[INDEX2(2,0,4)]+=2*d_p[0]*w2;
806 EM_S[INDEX2(0,2,4)]+=2*d_p[0]*w2;
807 EM_S[INDEX2(2,2,4)]+=4*d_p[0]*w2;
808 }
809 }
810 ///////////////
811 // process y //
812 ///////////////
813 if (addEM_F) {
814 const double* y_p=y.getSampleDataRO(k1);
815 if (y.actsExpanded()) {
816 EM_F[0]+=w3*y_p[0] + w4*y_p[1];
817 EM_F[2]+=w3*y_p[1] + w4*y_p[0];
818 } else { // constant data
819 EM_F[0]+=6*w2*y_p[0];
820 EM_F[2]+=6*w2*y_p[0];
821 }
822 }
823 const index_t firstNode=m_NN[0]*k1;
824 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F, firstNode);
825 }
826 } // end colouring
827 }
828
829 if (domain->m_faceOffset[1] > -1) {
830 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
831 #pragma omp for
832 for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) {
833 vector<double> EM_S(4*4, 0);
834 vector<double> EM_F(4, 0);
835 const index_t e = domain->m_faceOffset[1]+k1;
836 ///////////////
837 // process d //
838 ///////////////
839 if (addEM_S) {
840 const double* d_p=d.getSampleDataRO(e);
841 if (d.actsExpanded()) {
842 const double d_0 = d_p[0];
843 const double d_1 = d_p[1];
844 const double tmp0 = w2*(d_0 + d_1);
845 EM_S[INDEX2(1,1,4)]+=d_0*w0 + d_1*w1;
846 EM_S[INDEX2(3,1,4)]+=tmp0;
847 EM_S[INDEX2(1,3,4)]+=tmp0;
848 EM_S[INDEX2(3,3,4)]+=d_0*w1 + d_1*w0;
849 } else { // constant data
850 EM_S[INDEX2(1,1,4)]+=4*d_p[0]*w2;
851 EM_S[INDEX2(3,1,4)]+=2*d_p[0]*w2;
852 EM_S[INDEX2(1,3,4)]+=2*d_p[0]*w2;
853 EM_S[INDEX2(3,3,4)]+=4*d_p[0]*w2;
854 }
855 }
856 ///////////////
857 // process y //
858 ///////////////
859 if (addEM_F) {
860 const double* y_p=y.getSampleDataRO(e);
861 if (y.actsExpanded()) {
862 EM_F[1]+=w3*y_p[0] + w4*y_p[1];
863 EM_F[3]+=w3*y_p[1] + w4*y_p[0];
864 } else { // constant data
865 EM_F[1]+=6*w2*y_p[0];
866 EM_F[3]+=6*w2*y_p[0];
867 }
868 }
869 const index_t firstNode=m_NN[0]*(k1+1)-2;
870 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F, firstNode);
871 }
872 } // end colouring
873 }
874
875 if (domain->m_faceOffset[2] > -1) {
876 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
877 #pragma omp for
878 for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) {
879 vector<double> EM_S(4*4, 0);
880 vector<double> EM_F(4, 0);
881 const index_t e = domain->m_faceOffset[2]+k0;
882 ///////////////
883 // process d //
884 ///////////////
885 if (addEM_S) {
886 const double* d_p=d.getSampleDataRO(e);
887 if (d.actsExpanded()) {
888 const double d_0 = d_p[0];
889 const double d_1 = d_p[1];
890 const double tmp0 = w5*(d_0 + d_1);
891 EM_S[INDEX2(0,0,4)]+=d_0*w6 + d_1*w7;
892 EM_S[INDEX2(1,0,4)]+=tmp0;
893 EM_S[INDEX2(0,1,4)]+=tmp0;
894 EM_S[INDEX2(1,1,4)]+=d_0*w7 + d_1*w6;
895 } else { // constant data
896 EM_S[INDEX2(0,0,4)]+=4*d_p[0]*w5;
897 EM_S[INDEX2(1,0,4)]+=2*d_p[0]*w5;
898 EM_S[INDEX2(0,1,4)]+=2*d_p[0]*w5;
899 EM_S[INDEX2(1,1,4)]+=4*d_p[0]*w5;
900 }
901 }
902 ///////////////
903 // process y //
904 ///////////////
905 if (addEM_F) {
906 const double* y_p=y.getSampleDataRO(e);
907 if (y.actsExpanded()) {
908 EM_F[0]+=w8*y_p[0] + w9*y_p[1];
909 EM_F[1]+=w8*y_p[1] + w9*y_p[0];
910 } else { // constant data
911 EM_F[0]+=6*w5*y_p[0];
912 EM_F[1]+=6*w5*y_p[0];
913 }
914 }
915 const index_t firstNode=k0;
916 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F, firstNode);
917 }
918 } // end colouring
919 }
920
921 if (domain->m_faceOffset[3] > -1) {
922 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
923 #pragma omp for
924 for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) {
925 const index_t e = domain->m_faceOffset[3]+k0;
926 vector<double> EM_S(4*4, 0);
927 vector<double> EM_F(4, 0);
928 ///////////////
929 // process d //
930 ///////////////
931 if (addEM_S) {
932 const double* d_p=d.getSampleDataRO(e);
933 if (d.actsExpanded()) {
934 const double d_0 = d_p[0];
935 const double d_1 = d_p[1];
936 const double tmp0 = w5*(d_0 + d_1);
937 EM_S[INDEX2(2,2,4)]+=d_0*w6 + d_1*w7;
938 EM_S[INDEX2(3,2,4)]+=tmp0;
939 EM_S[INDEX2(2,3,4)]+=tmp0;
940 EM_S[INDEX2(3,3,4)]+=d_0*w7 + d_1*w6;
941 } else { // constant data
942 EM_S[INDEX2(2,2,4)]+=4*d_p[0]*w5;
943 EM_S[INDEX2(3,2,4)]+=2*d_p[0]*w5;
944 EM_S[INDEX2(2,3,4)]+=2*d_p[0]*w5;
945 EM_S[INDEX2(3,3,4)]+=4*d_p[0]*w5;
946 }
947 }
948 ///////////////
949 // process y //
950 ///////////////
951 if (addEM_F) {
952 const double* y_p=y.getSampleDataRO(e);
953 if (y.actsExpanded()) {
954 EM_F[2]+=w8*y_p[0] + w9*y_p[1];
955 EM_F[3]+=w8*y_p[1] + w9*y_p[0];
956 } else { // constant data
957 EM_F[2]+=6*w5*y_p[0];
958 EM_F[3]+=6*w5*y_p[0];
959 }
960 }
961 const index_t firstNode=m_NN[0]*(m_NN[1]-2)+k0;
962 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F, firstNode);
963 }
964 } // end colouring
965 }
966 } // end of parallel section
967 }
968
969 void DefaultAssembler2D::assemblePDEBoundarySingleReduced(Paso_SystemMatrix* mat,
970 escript::Data& rhs, const escript::Data& d, const escript::Data& y) const
971 {
972 const double w0 = m_dx[0]/4;
973 const double w1 = m_dx[1]/4;
974 const bool addEM_S=!d.isEmpty();
975 const bool addEM_F=!y.isEmpty();
976 rhs.requireWrite();
977 #pragma omp parallel
978 {
979 if (domain->m_faceOffset[0] > -1) {
980 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
981 #pragma omp for
982 for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) {
983 vector<double> EM_S(4*4, 0);
984 vector<double> EM_F(4, 0);
985 ///////////////
986 // process d //
987 ///////////////
988 if (addEM_S) {
989 const double* d_p=d.getSampleDataRO(k1);
990 EM_S[INDEX2(0,0,4)]+=d_p[0]*w1;
991 EM_S[INDEX2(2,0,4)]+=d_p[0]*w1;
992 EM_S[INDEX2(0,2,4)]+=d_p[0]*w1;
993 EM_S[INDEX2(2,2,4)]+=d_p[0]*w1;
994 }
995 ///////////////
996 // process y //
997 ///////////////
998 if (addEM_F) {
999 const double* y_p=y.getSampleDataRO(k1);
1000 EM_F[0]+=2*w1*y_p[0];
1001 EM_F[2]+=2*w1*y_p[0];
1002 }
1003 const index_t firstNode=m_NN[0]*k1;
1004 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F, firstNode);
1005 }
1006 } // end colouring
1007 }
1008
1009 if (domain->m_faceOffset[1] > -1) {
1010 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
1011 #pragma omp for
1012 for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) {
1013 vector<double> EM_S(4*4, 0);
1014 vector<double> EM_F(4, 0);
1015 const index_t e = domain->m_faceOffset[1]+k1;
1016 ///////////////
1017 // process d //
1018 ///////////////
1019 if (addEM_S) {
1020 const double* d_p=d.getSampleDataRO(e);
1021 EM_S[INDEX2(1,1,4)]+=d_p[0]*w1;
1022 EM_S[INDEX2(3,1,4)]+=d_p[0]*w1;
1023 EM_S[INDEX2(1,3,4)]+=d_p[0]*w1;
1024 EM_S[INDEX2(3,3,4)]+=d_p[0]*w1;
1025 }
1026 ///////////////
1027 // process y //
1028 ///////////////
1029 if (addEM_F) {
1030 const double* y_p=y.getSampleDataRO(e);
1031 EM_F[1]+=2*w1*y_p[0];
1032 EM_F[3]+=2*w1*y_p[0];
1033 }
1034 const index_t firstNode=m_NN[0]*(k1+1)-2;
1035 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F, firstNode);
1036 }
1037 } // end colouring
1038 }
1039
1040 if (domain->m_faceOffset[2] > -1) {
1041 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
1042 #pragma omp for
1043 for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) {
1044 vector<double> EM_S(4*4, 0);
1045 vector<double> EM_F(4, 0);
1046 const index_t e = domain->m_faceOffset[2]+k0;
1047 ///////////////
1048 // process d //
1049 ///////////////
1050 if (addEM_S) {
1051 const double* d_p=d.getSampleDataRO(e);
1052 EM_S[INDEX2(0,0,4)]+=d_p[0]*w0;
1053 EM_S[INDEX2(1,0,4)]+=d_p[0]*w0;
1054 EM_S[INDEX2(0,1,4)]+=d_p[0]*w0;
1055 EM_S[INDEX2(1,1,4)]+=d_p[0]*w0;
1056 }
1057 ///////////////
1058 // process y //
1059 ///////////////
1060 if (addEM_F) {
1061 const double* y_p=y.getSampleDataRO(e);
1062 EM_F[0]+=2*w0*y_p[0];
1063 EM_F[1]+=2*w0*y_p[0];
1064 }
1065 const index_t firstNode=k0;
1066 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F, firstNode);
1067 }
1068 } // end colouring
1069 }
1070
1071 if (domain->m_faceOffset[3] > -1) {
1072 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
1073 #pragma omp for
1074 for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) {
1075 vector<double> EM_S(4*4, 0);
1076 vector<double> EM_F(4, 0);
1077 const index_t e = domain->m_faceOffset[3]+k0;
1078 ///////////////
1079 // process d //
1080 ///////////////
1081 if (addEM_S) {
1082 const double* d_p=d.getSampleDataRO(e);
1083 EM_S[INDEX2(2,2,4)]+=d_p[0]*w0;
1084 EM_S[INDEX2(3,2,4)]+=d_p[0]*w0;
1085 EM_S[INDEX2(2,3,4)]+=d_p[0]*w0;
1086 EM_S[INDEX2(3,3,4)]+=d_p[0]*w0;
1087 }
1088 ///////////////
1089 // process y //
1090 ///////////////
1091 if (addEM_F) {
1092 const double* y_p=y.getSampleDataRO(e);
1093 EM_F[2]+=2*w0*y_p[0];
1094 EM_F[3]+=2*w0*y_p[0];
1095 }
1096 const index_t firstNode=m_NN[0]*(m_NN[1]-2)+k0;
1097 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F, firstNode);
1098 }
1099 } // end colouring
1100 }
1101 } // end of parallel section
1102 }
1103
1104 void DefaultAssembler2D::assemblePDEBoundarySystem(Paso_SystemMatrix* mat,
1105 escript::Data& rhs, const escript::Data& d, const escript::Data& y) const
1106 {
1107 dim_t numEq, numComp;
1108 if (!mat) {
1109 numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize());
1110 } else {
1111 numEq=mat->logical_row_block_size;
1112 numComp=mat->logical_col_block_size;
1113 }
1114 const double SQRT3 = 1.73205080756887719318;
1115 const double w5 = m_dx[0]/12;
1116 const double w6 = w5*(SQRT3 + 2);
1117 const double w7 = w5*(-SQRT3 + 2);
1118 const double w8 = w5*(SQRT3 + 3);
1119 const double w9 = w5*(-SQRT3 + 3);
1120 const double w2 = m_dx[1]/12;
1121 const double w0 = w2*(SQRT3 + 2);
1122 const double w1 = w2*(-SQRT3 + 2);
1123 const double w3 = w2*(SQRT3 + 3);
1124 const double w4 = w2*(-SQRT3 + 3);
1125 const bool addEM_S=!d.isEmpty();
1126 const bool addEM_F=!y.isEmpty();
1127 rhs.requireWrite();
1128 #pragma omp parallel
1129 {
1130 if (domain->m_faceOffset[0] > -1) {
1131 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
1132 #pragma omp for
1133 for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) {
1134 vector<double> EM_S(4*4*numEq*numComp, 0);
1135 vector<double> EM_F(4*numEq, 0);
1136 const index_t e = k1;
1137 ///////////////
1138 // process d //
1139 ///////////////
1140 if (addEM_S) {
1141 const double* d_p=d.getSampleDataRO(e);
1142 if (d.actsExpanded()) {
1143 for (index_t k=0; k<numEq; k++) {
1144 for (index_t m=0; m<numComp; m++) {
1145 const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
1146 const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
1147 const double tmp0 = w2*(d_0 + d_1);
1148 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=d_0*w0 + d_1*w1;
1149 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp0;
1150 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp0;
1151 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=d_0*w1 + d_1*w0;
1152 }
1153 }
1154 } else { // constant data
1155 for (index_t k=0; k<numEq; k++) {
1156 for (index_t m=0; m<numComp; m++) {
1157 const double d_0 = d_p[INDEX2(k, m, numEq)];
1158 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=4*d_0*w2;
1159 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=2*d_0*w2;
1160 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=2*d_0*w2;
1161 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=4*d_0*w2;
1162 }
1163 }
1164 }
1165 }
1166 ///////////////
1167 // process y //
1168 ///////////////
1169 if (addEM_F) {
1170 const double* y_p=y.getSampleDataRO(e);
1171 if (y.actsExpanded()) {
1172 for (index_t k=0; k<numEq; k++) {
1173 const double y_0 = y_p[INDEX2(k, 0, numEq)];
1174 const double y_1 = y_p[INDEX2(k, 1, numEq)];
1175 EM_F[INDEX2(k,0,numEq)]+=w3*y_0 + w4*y_1;
1176 EM_F[INDEX2(k,2,numEq)]+=w3*y_1 + w4*y_0;
1177 }
1178 } else { // constant data
1179 for (index_t k=0; k<numEq; k++) {
1180 EM_F[INDEX2(k,0,numEq)]+=6*w2*y_p[k];
1181 EM_F[INDEX2(k,2,numEq)]+=6*w2*y_p[k];
1182 }
1183 }
1184 }
1185 const index_t firstNode=m_NN[0]*k1;
1186 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
1187 firstNode, numEq, numComp);
1188 }
1189 } // end colouring
1190 }
1191
1192 if (domain->m_faceOffset[1] > -1) {
1193 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
1194 #pragma omp for
1195 for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) {
1196 vector<double> EM_S(4*4*numEq*numComp, 0);
1197 vector<double> EM_F(4*numEq, 0);
1198 const index_t e = domain->m_faceOffset[1]+k1;
1199 ///////////////
1200 // process d //
1201 ///////////////
1202 if (addEM_S) {
1203 const double* d_p=d.getSampleDataRO(e);
1204 if (d.actsExpanded()) {
1205 for (index_t k=0; k<numEq; k++) {
1206 for (index_t m=0; m<numComp; m++) {
1207 const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
1208 const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
1209 const double tmp0 = w2*(d_0 + d_1);
1210 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=d_0*w0 + d_1*w1;
1211 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp0;
1212 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp0;
1213 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=d_0*w1 + d_1*w0;
1214 }
1215 }
1216 } else { // constant data
1217 for (index_t k=0; k<numEq; k++) {
1218 for (index_t m=0; m<numComp; m++) {
1219 const double d_0 = d_p[INDEX2(k, m, numEq)];
1220 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=4*d_0*w2;
1221 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=2*d_0*w2;
1222 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=2*d_0*w2;
1223 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=4*d_0*w2;
1224 }
1225 }
1226 }
1227 }
1228 ///////////////
1229 // process y //
1230 ///////////////
1231 if (addEM_F) {
1232 const double* y_p=y.getSampleDataRO(e);
1233 if (y.actsExpanded()) {
1234 for (index_t k=0; k<numEq; k++) {
1235 const double y_0 = y_p[INDEX2(k, 0, numEq)];
1236 const double y_1 = y_p[INDEX2(k, 1, numEq)];
1237 EM_F[INDEX2(k,1,numEq)]+=w3*y_0 + w4*y_1;
1238 EM_F[INDEX2(k,3,numEq)]+=w3*y_1 + w4*y_0;
1239 }
1240 } else { // constant data
1241 for (index_t k=0; k<numEq; k++) {
1242 EM_F[INDEX2(k,1,numEq)]+=6*w2*y_p[k];
1243 EM_F[INDEX2(k,3,numEq)]+=6*w2*y_p[k];
1244 }
1245 }
1246 }
1247 const index_t firstNode=m_NN[0]*(k1+1)-2;
1248 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
1249 firstNode, numEq, numComp);
1250 }
1251 } // end colouring
1252 }
1253
1254 if (domain->m_faceOffset[2] > -1) {
1255 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
1256 #pragma omp for
1257 for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) {
1258 vector<double> EM_S(4*4*numEq*numComp, 0);
1259 vector<double> EM_F(4*numEq, 0);
1260 const index_t e = domain->m_faceOffset[2]+k0;
1261 ///////////////
1262 // process d //
1263 ///////////////
1264 if (addEM_S) {
1265 const double* d_p=d.getSampleDataRO(e);
1266 if (d.actsExpanded()) {
1267 for (index_t k=0; k<numEq; k++) {
1268 for (index_t m=0; m<numComp; m++) {
1269 const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
1270 const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
1271 const double tmp0 = w5*(d_0 + d_1);
1272 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=d_0*w6 + d_1*w7;
1273 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0;
1274 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0;
1275 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=d_0*w7 + d_1*w6;
1276 }
1277 }
1278 } else { // constant data
1279 for (index_t k=0; k<numEq; k++) {
1280 for (index_t m=0; m<numComp; m++) {
1281 const double d_0 = d_p[INDEX2(k, m, numEq)];
1282 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=4*d_0*w5;
1283 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=2*d_0*w5;
1284 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=2*d_0*w5;
1285 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=4*d_0*w5;
1286 }
1287 }
1288 }
1289 }
1290 ///////////////
1291 // process y //
1292 ///////////////
1293 if (addEM_F) {
1294 const double* y_p=y.getSampleDataRO(e);
1295 if (y.actsExpanded()) {
1296 for (index_t k=0; k<numEq; k++) {
1297 const double y_0 = y_p[INDEX2(k, 0, numEq)];
1298 const double y_1 = y_p[INDEX2(k, 1, numEq)];
1299 EM_F[INDEX2(k,0,numEq)]+=w8*y_0 + w9*y_1;
1300 EM_F[INDEX2(k,1,numEq)]+=w8*y_1 + w9*y_0;
1301 }
1302 } else { // constant data
1303 for (index_t k=0; k<numEq; k++) {
1304 EM_F[INDEX2(k,0,numEq)]+=6*w5*y_p[k];
1305 EM_F[INDEX2(k,1,numEq)]+=6*w5*y_p[k];
1306 }
1307 }
1308 }
1309 const index_t firstNode=k0;
1310 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
1311 firstNode, numEq, numComp);
1312 }
1313 } // end colouring
1314 }
1315
1316 if (domain->m_faceOffset[3] > -1) {
1317 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
1318 #pragma omp for
1319 for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) {
1320 vector<double> EM_S(4*4*numEq*numComp, 0);
1321 vector<double> EM_F(4*numEq, 0);
1322 const index_t e = domain->m_faceOffset[3]+k0;
1323 ///////////////
1324 // process d //
1325 ///////////////
1326 if (addEM_S) {
1327 const double* d_p=d.getSampleDataRO(e);
1328 if (d.actsExpanded()) {
1329 for (index_t k=0; k<numEq; k++) {
1330 for (index_t m=0; m<numComp; m++) {
1331 const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
1332 const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
1333 const double tmp0 = w5*(d_0 + d_1);
1334 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=d_0*w6 + d_1*w7;
1335 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0;
1336 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0;
1337 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=d_0*w7 + d_1*w6;
1338 }
1339 }
1340 } else { // constant data
1341 for (index_t k=0; k<numEq; k++) {
1342 for (index_t m=0; m<numComp; m++) {
1343 const double d_0 = d_p[INDEX2(k, m, numEq)];
1344 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=4*d_0*w5;
1345 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=2*d_0*w5;
1346 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=2*d_0*w5;
1347 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=4*d_0*w5;
1348 }
1349 }
1350 }
1351 }
1352 ///////////////
1353 // process y //
1354 ///////////////
1355 if (addEM_F) {
1356 const double* y_p=y.getSampleDataRO(e);
1357 if (y.actsExpanded()) {
1358 for (index_t k=0; k<numEq; k++) {
1359 const double y_0 = y_p[INDEX2(k, 0, numEq)];
1360 const double y_1 = y_p[INDEX2(k, 1, numEq)];
1361 EM_F[INDEX2(k,2,numEq)]+=w8*y_0 + w9*y_1;
1362 EM_F[INDEX2(k,3,numEq)]+=w8*y_1 + w9*y_0;
1363 }
1364 } else { // constant data
1365 for (index_t k=0; k<numEq; k++) {
1366 EM_F[INDEX2(k,2,numEq)]+=6*w5*y_p[k];
1367 EM_F[INDEX2(k,3,numEq)]+=6*w5*y_p[k];
1368 }
1369 }
1370 }
1371 const index_t firstNode=m_NN[0]*(m_NN[1]-2)+k0;
1372 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
1373 firstNode, numEq, numComp);
1374 }
1375 } // end colouring
1376 }
1377 } // end of parallel section
1378 }
1379
1380 //protected
1381 void DefaultAssembler2D::assemblePDEBoundarySystemReduced(Paso_SystemMatrix* mat,
1382 escript::Data& rhs, const escript::Data& d, const escript::Data& y) const
1383 {
1384 dim_t numEq, numComp;
1385 if (!mat)
1386 numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize());
1387 else {
1388 numEq=mat->logical_row_block_size;
1389 numComp=mat->logical_col_block_size;
1390 }
1391 const double w0 = m_dx[0]/4;
1392 const double w1 = m_dx[1]/4;
1393 const bool addEM_S=!d.isEmpty();
1394 const bool addEM_F=!y.isEmpty();
1395 rhs.requireWrite();
1396 #pragma omp parallel
1397 {
1398 if (domain->m_faceOffset[0] > -1) {
1399 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
1400 #pragma omp for
1401 for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) {
1402 vector<double> EM_S(4*4*numEq*numComp, 0);
1403 vector<double> EM_F(4*numEq, 0);
1404 ///////////////
1405 // process d //
1406 ///////////////
1407 if (addEM_S) {
1408 const double* d_p=d.getSampleDataRO(k1);
1409 for (index_t k=0; k<numEq; k++) {
1410 for (index_t m=0; m<numComp; m++) {
1411 const double tmp0 = d_p[INDEX2(k, m, numEq)]*w1;
1412 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp0;
1413 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp0;
1414 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp0;
1415 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp0;
1416 }
1417 }
1418 }
1419 ///////////////
1420 // process y //
1421 ///////////////
1422 if (addEM_F) {
1423 const double* y_p=y.getSampleDataRO(k1);
1424 for (index_t k=0; k<numEq; k++) {
1425 EM_F[INDEX2(k,0,numEq)]+=2*w1*y_p[k];
1426 EM_F[INDEX2(k,2,numEq)]+=2*w1*y_p[k];
1427 }
1428 }
1429 const index_t firstNode=m_NN[0]*k1;
1430 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
1431 firstNode, numEq, numComp);
1432 }
1433 } // end colouring
1434 }
1435
1436 if (domain->m_faceOffset[1] > -1) {
1437 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
1438 #pragma omp for
1439 for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) {
1440 vector<double> EM_S(4*4*numEq*numComp, 0);
1441 vector<double> EM_F(4*numEq, 0);
1442 const index_t e = domain->m_faceOffset[1]+k1;
1443 ///////////////
1444 // process d //
1445 ///////////////
1446 if (addEM_S) {
1447 const double* d_p=d.getSampleDataRO(e);
1448 for (index_t k=0; k<numEq; k++) {
1449 for (index_t m=0; m<numComp; m++) {
1450 const double tmp0 = d_p[INDEX2(k, m, numEq)]*w1;
1451 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp0;
1452 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp0;
1453 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp0;
1454 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp0;
1455 }
1456 }
1457 }
1458 ///////////////
1459 // process y //
1460 ///////////////
1461 if (addEM_F) {
1462 const double* y_p=y.getSampleDataRO(e);
1463 for (index_t k=0; k<numEq; k++) {
1464 EM_F[INDEX2(k,1,numEq)]+=2*w1*y_p[k];
1465 EM_F[INDEX2(k,3,numEq)]+=2*w1*y_p[k];
1466 }
1467 }
1468 const index_t firstNode=m_NN[0]*(k1+1)-2;
1469 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
1470 firstNode, numEq, numComp);
1471 }
1472 } // end colouring
1473 }
1474
1475 if (domain->m_faceOffset[2] > -1) {
1476 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
1477 #pragma omp for
1478 for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) {
1479 vector<double> EM_S(4*4*numEq*numComp, 0);
1480 vector<double> EM_F(4*numEq, 0);
1481 const index_t e = domain->m_faceOffset[2]+k0;
1482 ///////////////
1483 // process d //
1484 ///////////////
1485 if (addEM_S) {
1486 const double* d_p=d.getSampleDataRO(e);
1487 for (index_t k=0; k<numEq; k++) {
1488 for (index_t m=0; m<numComp; m++) {
1489 const double tmp0 = d_p[INDEX2(k, m, numEq)]*w0;
1490 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp0;
1491 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0;
1492 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0;
1493 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp0;
1494 }
1495 }
1496 }
1497 ///////////////
1498 // process y //
1499 ///////////////
1500 if (addEM_F) {
1501 const double* y_p=y.getSampleDataRO(e);
1502 for (index_t k=0; k<numEq; k++) {
1503 EM_F[INDEX2(k,0,numEq)]+=2*w0*y_p[k];
1504 EM_F[INDEX2(k,1,numEq)]+=2*w0*y_p[k];
1505 }
1506 }
1507 const index_t firstNode=k0;
1508 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
1509 firstNode, numEq, numComp);
1510 }
1511 } // end colouring
1512 }
1513
1514 if (domain->m_faceOffset[3] > -1) {
1515 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
1516 #pragma omp for
1517 for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) {
1518 vector<double> EM_S(4*4*numEq*numComp, 0);
1519 vector<double> EM_F(4*numEq, 0);
1520 const index_t e = domain->m_faceOffset[3]+k0;
1521 ///////////////
1522 // process d //
1523 ///////////////
1524 if (addEM_S) {
1525 const double* d_p=d.getSampleDataRO(e);
1526 for (index_t k=0; k<numEq; k++) {
1527 for (index_t m=0; m<numComp; m++) {
1528 const double tmp0 = d_p[INDEX2(k, m, numEq)]*w0;
1529 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp0;
1530 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0;
1531 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0;
1532 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp0;
1533 }
1534 }
1535 }
1536 ///////////////
1537 // process y //
1538 ///////////////
1539 if (addEM_F) {
1540 const double* y_p=y.getSampleDataRO(e);
1541 for (index_t k=0; k<numEq; k++) {
1542 EM_F[INDEX2(k,2,numEq)]+=2*w0*y_p[k];
1543 EM_F[INDEX2(k,3,numEq)]+=2*w0*y_p[k];
1544 }
1545 }
1546 const index_t firstNode=m_NN[0]*(m_NN[1]-2)+k0;
1547 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
1548 firstNode, numEq, numComp);
1549 }
1550 } // end colouring
1551 }
1552 } // end of parallel section
1553 }
1554
1555 //protected
1556 void DefaultAssembler2D::assemblePDESingle(Paso_SystemMatrix* mat,
1557 escript::Data& rhs, const escript::Data& A, const escript::Data& B,
1558 const escript::Data& C, const escript::Data& D,
1559 const escript::Data& X, const escript::Data& Y) const
1560 {
1561 const double SQRT3 = 1.73205080756887719318;
1562 const double w1 = 1.0/24.0;
1563 const double w5 = -SQRT3/24 + 1.0/12;
1564 const double w2 = -SQRT3/24 - 1.0/12;
1565 const double w19 = -m_dx[0]/12;
1566 const double w11 = w19*(SQRT3 + 3)/12;
1567 const double w14 = w19*(-SQRT3 + 3)/12;
1568 const double w16 = w19*(5*SQRT3 + 9)/12;
1569 const double w17 = w19*(-5*SQRT3 + 9)/12;
1570 const double w27 = w19*(-SQRT3 - 3)/2;
1571 const double w28 = w19*(SQRT3 - 3)/2;
1572 const double w18 = -m_dx[1]/12;
1573 const double w12 = w18*(5*SQRT3 + 9)/12;
1574 const double w13 = w18*(-5*SQRT3 + 9)/12;
1575 const double w10 = w18*(SQRT3 + 3)/12;
1576 const double w15 = w18*(-SQRT3 + 3)/12;
1577 const double w25 = w18*(-SQRT3 - 3)/2;
1578 const double w26 = w18*(SQRT3 - 3)/2;
1579 const double w22 = m_dx[0]*m_dx[1]/144;
1580 const double w20 = w22*(SQRT3 + 2);
1581 const double w21 = w22*(-SQRT3 + 2);
1582 const double w23 = w22*(4*SQRT3 + 7);
1583 const double w24 = w22*(-4*SQRT3 + 7);
1584 const double w3 = m_dx[0]/(24*m_dx[1]);
1585 const double w7 = w3*(SQRT3 + 2);
1586 const double w8 = w3*(-SQRT3 + 2);
1587 const double w6 = -m_dx[1]/(24*m_dx[0]);
1588 const double w0 = w6*(SQRT3 + 2);
1589 const double w4 = w6*(-SQRT3 + 2);
1590
1591 rhs.requireWrite();
1592 #pragma omp parallel
1593 {
1594 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
1595 #pragma omp for
1596 for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) {
1597 for (index_t k0=0; k0<m_NE[0]; ++k0) {
1598 bool addEM_S=false;
1599 bool addEM_F=false;
1600 vector<double> EM_S(4*4, 0);
1601 vector<double> EM_F(4, 0);
1602 const index_t e = k0 + m_NE[0]*k1;
1603 ///////////////
1604 // process A //
1605 ///////////////
1606 if (!A.isEmpty()) {
1607 addEM_S = true;
1608 const double* A_p = A.getSampleDataRO(e);
1609 if (A.actsExpanded()) {
1610 const double A_00_0 = A_p[INDEX3(0,0,0,2,2)];
1611 const double A_01_0 = A_p[INDEX3(0,1,0,2,2)];
1612 const double A_10_0 = A_p[INDEX3(1,0,0,2,2)];
1613 const double A_11_0 = A_p[INDEX3(1,1,0,2,2)];
1614 const double A_00_1 = A_p[INDEX3(0,0,1,2,2)];
1615 const double A_01_1 = A_p[INDEX3(0,1,1,2,2)];
1616 const double A_10_1 = A_p[INDEX3(1,0,1,2,2)];
1617 const double A_11_1 = A_p[INDEX3(1,1,1,2,2)];
1618 const double A_00_2 = A_p[INDEX3(0,0,2,2,2)];
1619 const double A_01_2 = A_p[INDEX3(0,1,2,2,2)];
1620 const double A_10_2 = A_p[INDEX3(1,0,2,2,2)];
1621 const double A_11_2 = A_p[INDEX3(1,1,2,2,2)];
1622 const double A_00_3 = A_p[INDEX3(0,0,3,2,2)];
1623 const double A_01_3 = A_p[INDEX3(0,1,3,2,2)];
1624 const double A_10_3 = A_p[INDEX3(1,0,3,2,2)];
1625 const double A_11_3 = A_p[INDEX3(1,1,3,2,2)];
1626 const double tmp0 = w3*(A_11_0 + A_11_1 + A_11_2 + A_11_3);
1627 const double tmp1 = w1*(A_01_0 + A_01_3 - A_10_1 - A_10_2);
1628 const double tmp2 = w4*(A_00_2 + A_00_3);
1629 const double tmp3 = w0*(A_00_0 + A_00_1);
1630 const double tmp4 = w5*(A_01_2 - A_10_3);
1631 const double tmp5 = w2*(-A_01_1 + A_10_0);
1632 const double tmp6 = w5*(A_01_3 + A_10_0);
1633 const double tmp7 = w3*(-A_11_0 - A_11_1 - A_11_2 - A_11_3);
1634 const double tmp8 = w6*(A_00_0 + A_00_1 + A_00_2 + A_00_3);
1635 const double tmp9 = w1*(A_01_1 + A_01_2 + A_10_1 + A_10_2);
1636 const double tmp10 = w2*(-A_01_0 - A_10_3);
1637 const double tmp11 = w4*(A_00_0 + A_00_1);
1638 const double tmp12 = w0*(A_00_2 + A_00_3);
1639 const double tmp13 = w5*(A_01_1 - A_10_0);
1640 const double tmp14 = w2*(-A_01_2 + A_10_3);
1641 const double tmp15 = w7*(A_11_0 + A_11_2);
1642 const double tmp16 = w4*(-A_00_2 - A_00_3);
1643 const double tmp17 = w0*(-A_00_0 - A_00_1);
1644 const double tmp18 = w5*(A_01_3 + A_10_3);
1645 const double tmp19 = w8*(A_11_1 + A_11_3);
1646 const double tmp20 = w2*(-A_01_0 - A_10_0);
1647 const double tmp21 = w7*(A_11_1 + A_11_3);
1648 const double tmp22 = w4*(-A_00_0 - A_00_1);
1649 const double tmp23 = w0*(-A_00_2 - A_00_3);
1650 const double tmp24 = w5*(A_01_0 + A_10_0);
1651 const double tmp25 = w8*(A_11_0 + A_11_2);
1652 const double tmp26 = w2*(-A_01_3 - A_10_3);
1653 const double tmp27 = w5*(-A_01_1 - A_10_2);
1654 const double tmp28 = w1*(-A_01_0 - A_01_3 - A_10_0 - A_10_3);
1655 const double tmp29 = w2*(A_01_2 + A_10_1);
1656 const double tmp30 = w7*(-A_11_1 - A_11_3);
1657 const double tmp31 = w1*(-A_01_1 - A_01_2 + A_10_0 + A_10_3);
1658 const double tmp32 = w5*(-A_01_0 + A_10_2);
1659 const double tmp33 = w8*(-A_11_0 - A_11_2);
1660 const double tmp34 = w6*(-A_00_0 - A_00_1 - A_00_2 - A_00_3);
1661 const double tmp35 = w2*(A_01_3 - A_10_1);
1662 const double tmp36 = w5*(A_01_0 + A_10_3);
1663 const double tmp37 = w2*(-A_01_3 - A_10_0);
1664 const double tmp38 = w7*(-A_11_0 - A_11_2);
1665 const double tmp39 = w5*(-A_01_3 + A_10_1);
1666 const double tmp40 = w8*(-A_11_1 - A_11_3);
1667 const double tmp41 = w2*(A_01_0 - A_10_2);
1668 const double tmp42 = w5*(A_01_1 - A_10_3);
1669 const double tmp43 = w2*(-A_01_2 + A_10_0);
1670 const double tmp44 = w5*(A_01_2 - A_10_0);
1671 const double tmp45 = w2*(-A_01_1 + A_10_3);
1672 const double tmp46 = w5*(-A_01_0 + A_10_1);
1673 const double tmp47 = w2*(A_01_3 - A_10_2);
1674 const double tmp48 = w5*(-A_01_1 - A_10_1);
1675 const double tmp49 = w2*(A_01_2 + A_10_2);
1676 const double tmp50 = w5*(-A_01_3 + A_10_2);
1677 const double tmp51 = w2*(A_01_0 - A_10_1);
1678 const double tmp52 = w5*(-A_01_2 - A_10_1);
1679 const double tmp53 = w2*(A_01_1 + A_10_2);
1680 const double tmp54 = w5*(-A_01_2 - A_10_2);
1681 const double tmp55 = w2*(A_01_1 + A_10_1);
1682 EM_S[INDEX2(0,0,4)]+=tmp15 + tmp16 + tmp17 + tmp18 + tmp19 + tmp20 + tmp9;
1683 EM_S[INDEX2(0,1,4)]+=tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5;
1684 EM_S[INDEX2(0,2,4)]+=tmp31 + tmp34 + tmp38 + tmp39 + tmp40 + tmp41;
1685 EM_S[INDEX2(0,3,4)]+=tmp28 + tmp52 + tmp53 + tmp7 + tmp8;
1686 EM_S[INDEX2(1,0,4)]+=tmp0 + tmp2 + tmp3 + tmp31 + tmp50 + tmp51;
1687 EM_S[INDEX2(1,1,4)]+=tmp16 + tmp17 + tmp21 + tmp25 + tmp28 + tmp54 + tmp55;
1688 EM_S[INDEX2(1,2,4)]+=tmp10 + tmp6 + tmp7 + tmp8 + tmp9;
1689 EM_S[INDEX2(1,3,4)]+=tmp1 + tmp30 + tmp33 + tmp34 + tmp44 + tmp45;
1690 EM_S[INDEX2(2,0,4)]+=tmp1 + tmp34 + tmp38 + tmp40 + tmp42 + tmp43;
1691 EM_S[INDEX2(2,1,4)]+=tmp36 + tmp37 + tmp7 + tmp8 + tmp9;
1692 EM_S[INDEX2(2,2,4)]+=tmp15 + tmp19 + tmp22 + tmp23 + tmp28 + tmp48 + tmp49;
1693 EM_S[INDEX2(2,3,4)]+=tmp0 + tmp11 + tmp12 + tmp31 + tmp46 + tmp47;
1694 EM_S[INDEX2(3,0,4)]+=tmp27 + tmp28 + tmp29 + tmp7 + tmp8;
1695 EM_S[INDEX2(3,1,4)]+=tmp30 + tmp31 + tmp32 + tmp33 + tmp34 + tmp35;
1696 EM_S[INDEX2(3,2,4)]+=tmp0 + tmp1 + tmp11 + tmp12 + tmp13 + tmp14;
1697 EM_S[INDEX2(3,3,4)]+=tmp21 + tmp22 + tmp23 + tmp24 + tmp25 + tmp26 + tmp9;
1698 } else { // constant data
1699 const double A_00 = A_p[INDEX2(0,0,2)];
1700 const double A_01 = A_p[INDEX2(0,1,2)];
1701 const double A_10 = A_p[INDEX2(1,0,2)];
1702 const double A_11 = A_p[INDEX2(1,1,2)];
1703 const double tmp0 = 6*w1*(A_01 - A_10);
1704 const double tmp1 = 6*w1*(A_01 + A_10);
1705 const double tmp2 = 6*w1*(-A_01 - A_10);
1706 const double tmp3 = 6*w1*(-A_01 + A_10);
1707 EM_S[INDEX2(0,0,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp1;
1708 EM_S[INDEX2(0,1,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp0;
1709 EM_S[INDEX2(0,2,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp3;
1710 EM_S[INDEX2(0,3,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp2;
1711 EM_S[INDEX2(1,0,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp3;
1712 EM_S[INDEX2(1,1,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp2;
1713 EM_S[INDEX2(1,2,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp1;
1714 EM_S[INDEX2(1,3,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp0;
1715 EM_S[INDEX2(2,0,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp0;
1716 EM_S[INDEX2(2,1,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp1;
1717 EM_S[INDEX2(2,2,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp2;
1718 EM_S[INDEX2(2,3,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp3;
1719 EM_S[INDEX2(3,0,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp2;
1720 EM_S[INDEX2(3,1,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp3;
1721 EM_S[INDEX2(3,2,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp0;
1722 EM_S[INDEX2(3,3,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp1;
1723 }
1724 }
1725 ///////////////
1726 // process B //
1727 ///////////////
1728 if (!B.isEmpty()) {
1729 addEM_S=true;
1730 const double* B_p=B.getSampleDataRO(e);
1731 if (B.actsExpanded()) {
1732 const double B_0_0 = B_p[INDEX2(0,0,2)];
1733 const double B_1_0 = B_p[INDEX2(1,0,2)];
1734 const double B_0_1 = B_p[INDEX2(0,1,2)];
1735 const double B_1_1 = B_p[INDEX2(1,1,2)];
1736 const double B_0_2 = B_p[INDEX2(0,2,2)];
1737 const double B_1_2 = B_p[INDEX2(1,2,2)];
1738 const double B_0_3 = B_p[INDEX2(0,3,2)];
1739 const double B_1_3 = B_p[INDEX2(1,3,2)];
1740 const double tmp0 = w11*(B_1_0 + B_1_1);
1741 const double tmp1 = w14*(B_1_2 + B_1_3);
1742 const double tmp2 = w15*(-B_0_1 - B_0_3);
1743 const double tmp3 = w10*(-B_0_0 - B_0_2);
1744 const double tmp4 = w11*(B_1_2 + B_1_3);
1745 const double tmp5 = w14*(B_1_0 + B_1_1);
1746 const double tmp6 = w11*(-B_1_2 - B_1_3);
1747 const double tmp7 = w14*(-B_1_0 - B_1_1);
1748 const double tmp8 = w11*(-B_1_0 - B_1_1);
1749 const double tmp9 = w14*(-B_1_2 - B_1_3);
1750 const double tmp10 = w10*(-B_0_1 - B_0_3);
1751 const double tmp11 = w15*(-B_0_0 - B_0_2);
1752 const double tmp12 = w15*(B_0_0 + B_0_2);
1753 const double tmp13 = w10*(B_0_1 + B_0_3);
1754 const double tmp14 = w10*(B_0_0 + B_0_2);
1755 const double tmp15 = w15*(B_0_1 + B_0_3);
1756 EM_S[INDEX2(0,0,4)]+=B_0_0*w12 + B_0_1*w10 + B_0_2*w15 + B_0_3*w13 + B_1_0*w16 + B_1_1*w14 + B_1_2*w11 + B_1_3*w17;
1757 EM_S[INDEX2(0,1,4)]+=B_0_0*w10 + B_0_1*w12 + B_0_2*w13 + B_0_3*w15 + tmp0 + tmp1;
1758 EM_S[INDEX2(0,2,4)]+=B_1_0*w11 + B_1_1*w17 + B_1_2*w16 + B_1_3*w14 + tmp14 + tmp15;
1759 EM_S[INDEX2(0,3,4)]+=tmp12 + tmp13 + tmp4 + tmp5;
1760 EM_S[INDEX2(1,0,4)]+=-B_0_0*w12 - B_0_1*w10 - B_0_2*w15 - B_0_3*w13 + tmp0 + tmp1;
1761 EM_S[INDEX2(1,1,4)]+=-B_0_0*w10 - B_0_1*w12 - B_0_2*w13 - B_0_3*w15 + B_1_0*w14 + B_1_1*w16 + B_1_2*w17 + B_1_3*w11;
1762 EM_S[INDEX2(1,2,4)]+=tmp2 + tmp3 + tmp4 + tmp5;
1763 EM_S[INDEX2(1,3,4)]+=