/[escript]/trunk/ripley/src/WaveAssembler2D.cpp
ViewVC logotype

Contents of /trunk/ripley/src/WaveAssembler2D.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4645 - (show annotations)
Mon Feb 3 00:06:24 2014 UTC (5 years, 2 months ago) by sshaw
File size: 41690 byte(s)
further optimisation of wave assemblers, added support for constant data
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15 #include <ripley/WaveAssembler2D.h>
16
17 using namespace std;
18
19 namespace ripley {
20
21 WaveAssembler2D::WaveAssembler2D(Rectangle *dom, double *m_dx, dim_t *m_NX, dim_t *m_NE,
22 dim_t *m_NN, std::map<std::string, escript::Data> c)
23 : AbstractAssembler() {
24 domain = dom;
25 this->m_dx = m_dx;
26 this->m_NX = m_NX;
27 this->m_NE = m_NE;
28 this->m_NN = m_NN;
29 if (c.find("c11") == c.end() || c.find("c12") == c.end()
30 || c.find("c13") == c.end() || c.find("c33") == c.end()
31 || c.find("c44") == c.end() || c.find("c66") == c.end())
32 throw RipleyException("required constants missing for WaveAssembler");
33 c11 = c.find("c11")->second, c12 = c.find("c12")->second,
34 c13 = c.find("c13")->second, c33 = c.find("c33")->second,
35 c44 = c.find("c44")->second, c66 = c.find("c66")->second;
36 }
37
38 void WaveAssembler2D::assemblePDESystem(Paso_SystemMatrix* mat,
39 escript::Data& rhs, map<string, escript::Data> coefs) const
40 {
41 const escript::Data A = unpackData("A", coefs), B = unpackData("B", coefs),
42 C = unpackData("C", coefs), D = unpackData("D", coefs),
43 Y = unpackData("Y", coefs), du = unpackData("du", coefs);
44 dim_t numEq, numComp;
45 if (!mat)
46 numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize());
47 else {
48 numEq=mat->logical_row_block_size;
49 numComp=mat->logical_col_block_size;
50 }
51 const double SQRT3 = 1.73205080756887719318;
52 const double w1 = 1.0/24;
53 const double w5 = -SQRT3/24 + 1.0/12;
54 const double w2 = -SQRT3/24 - 1.0/12;
55 const double w19 = -m_dx[0]/12;
56 const double w11 = w19*(SQRT3 + 3)/12;
57 const double w14 = w19*(-SQRT3 + 3)/12;
58 const double w16 = w19*(5*SQRT3 + 9)/12;
59 const double w17 = w19*(-5*SQRT3 + 9)/12;
60 const double w27 = w19*(-SQRT3 - 3)/2;
61 const double w28 = w19*(SQRT3 - 3)/2;
62 const double w18 = -m_dx[1]/12;
63 const double w10 = w18*(SQRT3 + 3)/12;
64 const double w15 = w18*(-SQRT3 + 3)/12;
65 const double w12 = w18*(5*SQRT3 + 9)/12;
66 const double w13 = w18*(-5*SQRT3 + 9)/12;
67 const double w25 = w18*(-SQRT3 - 3)/2;
68 const double w26 = w18*(SQRT3 - 3)/2;
69 const double w22 = m_dx[0]*m_dx[1]/144;
70 const double w20 = w22*(SQRT3 + 2);
71 const double w21 = w22*(-SQRT3 + 2);
72 const double w23 = w22*(4*SQRT3 + 7);
73 const double w24 = w22*(-4*SQRT3 + 7);
74 const double w3 = m_dx[0]/(24*m_dx[1]);
75 const double w7 = w3*(SQRT3 + 2);
76 const double w8 = w3*(-SQRT3 + 2);
77 const double w6 = -m_dx[1]/(24*m_dx[0]);
78 const double w0 = w6*(SQRT3 + 2);
79 const double w4 = w6*(-SQRT3 + 2);
80
81 rhs.requireWrite();
82 #pragma omp parallel
83 {
84 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
85 #pragma omp for
86 for (index_t k1=k1_0; k1 < m_NE[1]; k1+=2) {
87 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
88 bool addEM_S=false;
89 bool addEM_F=false;
90 vector<double> EM_S(4*4*numEq*numComp, 0);
91 vector<double> EM_F(4*numEq, 0);
92 const index_t e = k0 + m_NE[0]*k1;
93 ///////////////
94 // process A //
95 ///////////////
96 if (!A.isEmpty()) {
97 addEM_S = true;
98 const double* A_p = A.getSampleDataRO(e);
99 if (A.actsExpanded()) {
100 for (index_t k=0; k<numEq; k++) {
101 for (index_t m=0; m<numComp; m++) {
102 const double A_00_0 = A_p[INDEX5(k,0,m,0,0,numEq,2,numComp,2)];
103 const double A_01_0 = A_p[INDEX5(k,0,m,1,0,numEq,2,numComp,2)];
104 const double A_10_0 = A_p[INDEX5(k,1,m,0,0,numEq,2,numComp,2)];
105 const double A_11_0 = A_p[INDEX5(k,1,m,1,0,numEq,2,numComp,2)];
106 const double A_00_1 = A_p[INDEX5(k,0,m,0,1,numEq,2,numComp,2)];
107 const double A_01_1 = A_p[INDEX5(k,0,m,1,1,numEq,2,numComp,2)];
108 const double A_10_1 = A_p[INDEX5(k,1,m,0,1,numEq,2,numComp,2)];
109 const double A_11_1 = A_p[INDEX5(k,1,m,1,1,numEq,2,numComp,2)];
110 const double A_00_2 = A_p[INDEX5(k,0,m,0,2,numEq,2,numComp,2)];
111 const double A_01_2 = A_p[INDEX5(k,0,m,1,2,numEq,2,numComp,2)];
112 const double A_10_2 = A_p[INDEX5(k,1,m,0,2,numEq,2,numComp,2)];
113 const double A_11_2 = A_p[INDEX5(k,1,m,1,2,numEq,2,numComp,2)];
114 const double A_00_3 = A_p[INDEX5(k,0,m,0,3,numEq,2,numComp,2)];
115 const double A_01_3 = A_p[INDEX5(k,0,m,1,3,numEq,2,numComp,2)];
116 const double A_10_3 = A_p[INDEX5(k,1,m,0,3,numEq,2,numComp,2)];
117 const double A_11_3 = A_p[INDEX5(k,1,m,1,3,numEq,2,numComp,2)];
118 const double tmp0 = w3*(A_11_0 + A_11_1 + A_11_2 + A_11_3);
119 const double tmp1 = w1*(A_01_0 + A_01_3 - A_10_1 - A_10_2);
120 const double tmp2 = w4*(A_00_2 + A_00_3);
121 const double tmp3 = w0*(A_00_0 + A_00_1);
122 const double tmp4 = w5*(A_01_2 - A_10_3);
123 const double tmp5 = w2*(-A_01_1 + A_10_0);
124 const double tmp6 = w5*(A_01_3 + A_10_0);
125 const double tmp7 = w3*(-A_11_0 - A_11_1 - A_11_2 - A_11_3);
126 const double tmp8 = w6*(A_00_0 + A_00_1 + A_00_2 + A_00_3);
127 const double tmp9 = w1*(A_01_1 + A_01_2 + A_10_1 + A_10_2);
128 const double tmp10 = w2*(-A_01_0 - A_10_3);
129 const double tmp11 = w4*(A_00_0 + A_00_1);
130 const double tmp12 = w0*(A_00_2 + A_00_3);
131 const double tmp13 = w5*(A_01_1 - A_10_0);
132 const double tmp14 = w2*(-A_01_2 + A_10_3);
133 const double tmp15 = w7*(A_11_0 + A_11_2);
134 const double tmp16 = w4*(-A_00_2 - A_00_3);
135 const double tmp17 = w0*(-A_00_0 - A_00_1);
136 const double tmp18 = w5*(A_01_3 + A_10_3);
137 const double tmp19 = w8*(A_11_1 + A_11_3);
138 const double tmp20 = w2*(-A_01_0 - A_10_0);
139 const double tmp21 = w7*(A_11_1 + A_11_3);
140 const double tmp22 = w4*(-A_00_0 - A_00_1);
141 const double tmp23 = w0*(-A_00_2 - A_00_3);
142 const double tmp24 = w5*(A_01_0 + A_10_0);
143 const double tmp25 = w8*(A_11_0 + A_11_2);
144 const double tmp26 = w2*(-A_01_3 - A_10_3);
145 const double tmp27 = w5*(-A_01_1 - A_10_2);
146 const double tmp28 = w1*(-A_01_0 - A_01_3 - A_10_0 - A_10_3);
147 const double tmp29 = w2*(A_01_2 + A_10_1);
148 const double tmp30 = w7*(-A_11_1 - A_11_3);
149 const double tmp31 = w1*(-A_01_1 - A_01_2 + A_10_0 + A_10_3);
150 const double tmp32 = w5*(-A_01_0 + A_10_2);
151 const double tmp33 = w8*(-A_11_0 - A_11_2);
152 const double tmp34 = w6*(-A_00_0 - A_00_1 - A_00_2 - A_00_3);
153 const double tmp35 = w2*(A_01_3 - A_10_1);
154 const double tmp36 = w5*(A_01_0 + A_10_3);
155 const double tmp37 = w2*(-A_01_3 - A_10_0);
156 const double tmp38 = w7*(-A_11_0 - A_11_2);
157 const double tmp39 = w5*(-A_01_3 + A_10_1);
158 const double tmp40 = w8*(-A_11_1 - A_11_3);
159 const double tmp41 = w2*(A_01_0 - A_10_2);
160 const double tmp42 = w5*(A_01_1 - A_10_3);
161 const double tmp43 = w2*(-A_01_2 + A_10_0);
162 const double tmp44 = w5*(A_01_2 - A_10_0);
163 const double tmp45 = w2*(-A_01_1 + A_10_3);
164 const double tmp46 = w5*(-A_01_0 + A_10_1);
165 const double tmp47 = w2*(A_01_3 - A_10_2);
166 const double tmp48 = w5*(-A_01_1 - A_10_1);
167 const double tmp49 = w2*(A_01_2 + A_10_2);
168 const double tmp50 = w5*(-A_01_3 + A_10_2);
169 const double tmp51 = w2*(A_01_0 - A_10_1);
170 const double tmp52 = w5*(-A_01_2 - A_10_1);
171 const double tmp53 = w2*(A_01_1 + A_10_2);
172 const double tmp54 = w5*(-A_01_2 - A_10_2);
173 const double tmp55 = w2*(A_01_1 + A_10_1);
174 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp15 + tmp16 + tmp17 + tmp18 + tmp19 + tmp20 + tmp9;
175 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5;
176 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp31 + tmp34 + tmp38 + tmp39 + tmp40 + tmp41;
177 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp28 + tmp52 + tmp53 + tmp7 + tmp8;
178 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0 + tmp2 + tmp3 + tmp31 + tmp50 + tmp51;
179 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp16 + tmp17 + tmp21 + tmp25 + tmp28 + tmp54 + tmp55;
180 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp10 + tmp6 + tmp7 + tmp8 + tmp9;
181 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp1 + tmp30 + tmp33 + tmp34 + tmp44 + tmp45;
182 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp1 + tmp34 + tmp38 + tmp40 + tmp42 + tmp43;
183 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp36 + tmp37 + tmp7 + tmp8 + tmp9;
184 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp15 + tmp19 + tmp22 + tmp23 + tmp28 + tmp48 + tmp49;
185 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0 + tmp11 + tmp12 + tmp31 + tmp46 + tmp47;
186 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp27 + tmp28 + tmp29 + tmp7 + tmp8;
187 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp30 + tmp31 + tmp32 + tmp33 + tmp34 + tmp35;
188 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0 + tmp1 + tmp11 + tmp12 + tmp13 + tmp14;
189 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp21 + tmp22 + tmp23 + tmp24 + tmp25 + tmp26 + tmp9;
190 }
191 }
192 } else { // constant data
193 for (index_t k=0; k<numEq; k++) {
194 for (index_t m=0; m<numComp; m++) {
195 const double A_00 = A_p[INDEX4(k,0,m,0, numEq,2, numComp)];
196 const double A_01 = A_p[INDEX4(k,0,m,1, numEq,2, numComp)];
197 const double A_10 = A_p[INDEX4(k,1,m,0, numEq,2, numComp)];
198 const double A_11 = A_p[INDEX4(k,1,m,1, numEq,2, numComp)];
199 const double tmp0 = 6*w1*(A_01 - A_10);
200 const double tmp1 = 6*w1*(A_01 + A_10);
201 const double tmp2 = 6*w1*(-A_01 - A_10);
202 const double tmp3 = 6*w1*(-A_01 + A_10);
203 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp1;
204 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp0;
205 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp3;
206 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp2;
207 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp3;
208 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp2;
209 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp1;
210 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp0;
211 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp0;
212 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp1;
213 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp2;
214 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp3;
215 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=4*A_00*w6 - 4*A_11*w3 + tmp2;
216 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=-4*A_00*w6 - 8*A_11*w3 + tmp3;
217 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=8*A_00*w6 + 4*A_11*w3 + tmp0;
218 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-8*A_00*w6 + 8*A_11*w3 + tmp1;
219 }
220 }
221 }
222 }
223 ///////////////
224 // process B //
225 ///////////////
226 if (!B.isEmpty()) {
227 addEM_S=true;
228 const double* B_p=B.getSampleDataRO(e);
229 if (B.actsExpanded()) {
230 for (index_t k=0; k<numEq; k++) {
231 for (index_t m=0; m<numComp; m++) {
232 const double B_0_0 = B_p[INDEX4(k,0,m,0, numEq,2,numComp)];
233 const double B_1_0 = B_p[INDEX4(k,1,m,0, numEq,2,numComp)];
234 const double B_0_1 = B_p[INDEX4(k,0,m,1, numEq,2,numComp)];
235 const double B_1_1 = B_p[INDEX4(k,1,m,1, numEq,2,numComp)];
236 const double B_0_2 = B_p[INDEX4(k,0,m,2, numEq,2,numComp)];
237 const double B_1_2 = B_p[INDEX4(k,1,m,2, numEq,2,numComp)];
238 const double B_0_3 = B_p[INDEX4(k,0,m,3, numEq,2,numComp)];
239 const double B_1_3 = B_p[INDEX4(k,1,m,3, numEq,2,numComp)];
240 const double tmp0 = w11*(B_1_0 + B_1_1);
241 const double tmp1 = w14*(B_1_2 + B_1_3);
242 const double tmp2 = w15*(-B_0_1 - B_0_3);
243 const double tmp3 = w10*(-B_0_0 - B_0_2);
244 const double tmp4 = w11*(B_1_2 + B_1_3);
245 const double tmp5 = w14*(B_1_0 + B_1_1);
246 const double tmp6 = w11*(-B_1_2 - B_1_3);
247 const double tmp7 = w14*(-B_1_0 - B_1_1);
248 const double tmp8 = w11*(-B_1_0 - B_1_1);
249 const double tmp9 = w14*(-B_1_2 - B_1_3);
250 const double tmp10 = w10*(-B_0_1 - B_0_3);
251 const double tmp11 = w15*(-B_0_0 - B_0_2);
252 const double tmp12 = w15*(B_0_0 + B_0_2);
253 const double tmp13 = w10*(B_0_1 + B_0_3);
254 const double tmp14 = w10*(B_0_0 + B_0_2);
255 const double tmp15 = w15*(B_0_1 + B_0_3);
256 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=B_0_0*w12 + B_0_1*w10 + B_0_2*w15 + B_0_3*w13 + B_1_0*w16 + B_1_1*w14 + B_1_2*w11 + B_1_3*w17;
257 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=B_0_0*w10 + B_0_1*w12 + B_0_2*w13 + B_0_3*w15 + tmp0 + tmp1;
258 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=B_1_0*w11 + B_1_1*w17 + B_1_2*w16 + B_1_3*w14 + tmp14 + tmp15;
259 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp12 + tmp13 + tmp4 + tmp5;
260 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=-B_0_0*w12 - B_0_1*w10 - B_0_2*w15 - B_0_3*w13 + tmp0 + tmp1;
261 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-B_0_0*w10 - B_0_1*w12 - B_0_2*w13 - B_0_3*w15 + B_1_0*w14 + B_1_1*w16 + B_1_2*w17 + B_1_3*w11;
262 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2 + tmp3 + tmp4 + tmp5;
263 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=B_1_0*w17 + B_1_1*w11 + B_1_2*w14 + B_1_3*w16 + tmp10 + tmp11;
264 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=-B_1_0*w16 - B_1_1*w14 - B_1_2*w11 - B_1_3*w17 + tmp14 + tmp15;
265 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp12 + tmp13 + tmp8 + tmp9;
266 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=B_0_0*w15 + B_0_1*w13 + B_0_2*w12 + B_0_3*w10 - B_1_0*w11 - B_1_1*w17 - B_1_2*w16 - B_1_3*w14;
267 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=B_0_0*w13 + B_0_1*w15 + B_0_2*w10 + B_0_3*w12 + tmp6 + tmp7;
268 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2 + tmp3 + tmp8 + tmp9;
269 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=-B_1_0*w14 - B_1_1*w16 - B_1_2*w17 - B_1_3*w11 + tmp10 + tmp11;
270 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=-B_0_0*w15 - B_0_1*w13 - B_0_2*w12 - B_0_3*w10 + tmp6 + tmp7;
271 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-B_0_0*w13 - B_0_1*w15 - B_0_2*w10 - B_0_3*w12 - B_1_0*w17 - B_1_1*w11 - B_1_2*w14 - B_1_3*w16;
272 }
273 }
274 } else { // constant data
275 for (index_t k=0; k<numEq; k++) {
276 for (index_t m=0; m<numComp; m++) {
277 const double wB0 = B_p[INDEX3(k,0,m,numEq,2)]*w18;
278 const double wB1 = B_p[INDEX3(k,1,m,numEq,2)]*w19;
279 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+= 2*wB0 + 2*wB1;
280 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+= 2*wB0 + wB1;
281 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+= wB0 + 2*wB1;
282 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+= wB0 + wB1;
283 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=-2*wB0 + wB1;
284 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-2*wB0 + 2*wB1;
285 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+= -wB0 + wB1;
286 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+= -wB0 + 2*wB1;
287 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+= wB0 - 2*wB1;
288 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+= wB0 - wB1;
289 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+= 2*wB0 - 2*wB1;
290 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+= 2*wB0 - wB1;
291 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+= -wB0 - wB1;
292 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+= -wB0 - 2*wB1;
293 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=-2*wB0 - wB1;
294 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-2*wB0 - 2*wB1;
295 }
296 }
297 }
298 }
299 ///////////////
300 // process C //
301 ///////////////
302 if (!C.isEmpty()) {
303 addEM_S=true;
304 const double* C_p=C.getSampleDataRO(e);
305 if (C.actsExpanded()) {
306 for (index_t k=0; k<numEq; k++) {
307 for (index_t m=0; m<numComp; m++) {
308 const double C_0_0 = C_p[INDEX4(k,m,0, 0, numEq,numComp,2)];
309 const double C_1_0 = C_p[INDEX4(k,m,1, 0, numEq,numComp,2)];
310 const double C_0_1 = C_p[INDEX4(k,m,0, 1, numEq,numComp,2)];
311 const double C_1_1 = C_p[INDEX4(k,m,1, 1, numEq,numComp,2)];
312 const double C_0_2 = C_p[INDEX4(k,m,0, 2, numEq,numComp,2)];
313 const double C_1_2 = C_p[INDEX4(k,m,1, 2, numEq,numComp,2)];
314 const double C_0_3 = C_p[INDEX4(k,m,0, 3, numEq,numComp,2)];
315 const double C_1_3 = C_p[INDEX4(k,m,1, 3, numEq,numComp,2)];
316 const double tmp0 = w11*(C_1_0 + C_1_1);
317 const double tmp1 = w14*(C_1_2 + C_1_3);
318 const double tmp2 = w15*(C_0_0 + C_0_2);
319 const double tmp3 = w10*(C_0_1 + C_0_3);
320 const double tmp4 = w11*(-C_1_0 - C_1_1);
321 const double tmp5 = w14*(-C_1_2 - C_1_3);
322 const double tmp6 = w11*(-C_1_2 - C_1_3);
323 const double tmp7 = w14*(-C_1_0 - C_1_1);
324 const double tmp8 = w11*(C_1_2 + C_1_3);
325 const double tmp9 = w14*(C_1_0 + C_1_1);
326 const double tmp10 = w10*(-C_0_1 - C_0_3);
327 const double tmp11 = w15*(-C_0_0 - C_0_2);
328 const double tmp12 = w15*(-C_0_1 - C_0_3);
329 const double tmp13 = w10*(-C_0_0 - C_0_2);
330 const double tmp14 = w10*(C_0_0 + C_0_2);
331 const double tmp15 = w15*(C_0_1 + C_0_3);
332 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=C_0_0*w12 + C_0_1*w10 + C_0_2*w15 + C_0_3*w13 + C_1_0*w16 + C_1_1*w14 + C_1_2*w11 + C_1_3*w17;
333 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=-C_0_0*w12 - C_0_1*w10 - C_0_2*w15 - C_0_3*w13 + tmp0 + tmp1;
334 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=-C_1_0*w16 - C_1_1*w14 - C_1_2*w11 - C_1_3*w17 + tmp14 + tmp15;
335 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp12 + tmp13 + tmp4 + tmp5;
336 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=C_0_0*w10 + C_0_1*w12 + C_0_2*w13 + C_0_3*w15 + tmp0 + tmp1;
337 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-C_0_0*w10 - C_0_1*w12 - C_0_2*w13 - C_0_3*w15 + C_1_0*w14 + C_1_1*w16 + C_1_2*w17 + C_1_3*w11;
338 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2 + tmp3 + tmp4 + tmp5;
339 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=-C_1_0*w14 - C_1_1*w16 - C_1_2*w17 - C_1_3*w11 + tmp10 + tmp11;
340 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=C_1_0*w11 + C_1_1*w17 + C_1_2*w16 + C_1_3*w14 + tmp14 + tmp15;
341 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp12 + tmp13 + tmp8 + tmp9;
342 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=C_0_0*w15 + C_0_1*w13 + C_0_2*w12 + C_0_3*w10 - C_1_0*w11 - C_1_1*w17 - C_1_2*w16 - C_1_3*w14;
343 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=-C_0_0*w15 - C_0_1*w13 - C_0_2*w12 - C_0_3*w10 + tmp6 + tmp7;
344 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2 + tmp3 + tmp8 + tmp9;
345 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=C_1_0*w17 + C_1_1*w11 + C_1_2*w14 + C_1_3*w16 + tmp10 + tmp11;
346 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=C_0_0*w13 + C_0_1*w15 + C_0_2*w10 + C_0_3*w12 + tmp6 + tmp7;
347 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-C_0_0*w13 - C_0_1*w15 - C_0_2*w10 - C_0_3*w12 - C_1_0*w17 - C_1_1*w11 - C_1_2*w14 - C_1_3*w16;
348 }
349 }
350 } else { // constant data
351 for (index_t k=0; k<numEq; k++) {
352 for (index_t m=0; m<numComp; m++) {
353 const double wC0 = C_p[INDEX3(k,m,0,numEq,numComp)]*w18;
354 const double wC1 = C_p[INDEX3(k,m,1,numEq,numComp)]*w19;
355 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+= 2*wC0 + 2*wC1;
356 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=-2*wC0 + wC1;
357 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+= wC0 - 2*wC1;
358 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+= -wC0 - wC1;
359 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+= 2*wC0 + wC1;
360 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-2*wC0 + 2*wC1;
361 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+= wC0 - wC1;
362 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+= -wC0 - 2*wC1;
363 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+= wC0 + 2*wC1;
364 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+= -wC0 + wC1;
365 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+= 2*wC0 - 2*wC1;
366 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=-2*wC0 - wC1;
367 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+= wC0 + wC1;
368 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+= -wC0 + 2*wC1;
369 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+= 2*wC0 - wC1;
370 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-2*wC0 - 2*wC1;
371 }
372 }
373 }
374 }
375 ///////////////
376 // process D //
377 ///////////////
378 if (!D.isEmpty()) {
379 addEM_S=true;
380 const double* D_p=D.getSampleDataRO(e);
381 if (D.actsExpanded()) {
382 for (index_t k=0; k<numEq; k++) {
383 for (index_t m=0; m<numComp; m++) {
384 const double D_0 = D_p[INDEX3(k,m,0,numEq,numComp)];
385 const double D_1 = D_p[INDEX3(k,m,1,numEq,numComp)];
386 const double D_2 = D_p[INDEX3(k,m,2,numEq,numComp)];
387 const double D_3 = D_p[INDEX3(k,m,3,numEq,numComp)];
388 const double tmp0 = w21*(D_2 + D_3);
389 const double tmp1 = w20*(D_0 + D_1);
390 const double tmp2 = w22*(D_0 + D_1 + D_2 + D_3);
391 const double tmp3 = w21*(D_0 + D_1);
392 const double tmp4 = w20*(D_2 + D_3);
393 const double tmp5 = w22*(D_1 + D_2);
394 const double tmp6 = w21*(D_0 + D_2);
395 const double tmp7 = w20*(D_1 + D_3);
396 const double tmp8 = w21*(D_1 + D_3);
397 const double tmp9 = w20*(D_0 + D_2);
398 const double tmp10 = w22*(D_0 + D_3);
399 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=D_0*w23 + D_3*w24 + tmp5;
400 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0 + tmp1;
401 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp8 + tmp9;
402 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp2;
403 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0 + tmp1;
404 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=D_1*w23 + D_2*w24 + tmp10;
405 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2;
406 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp6 + tmp7;
407 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp8 + tmp9;
408 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp2;
409 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=D_1*w24 + D_2*w23 + tmp10;
410 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp3 + tmp4;
411 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2;
412 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp6 + tmp7;
413 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp3 + tmp4;
414 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=D_0*w24 + D_3*w23 + tmp5;
415 }
416 }
417 } else { // constant data
418 for (index_t k=0; k<numEq; k++) {
419 for (index_t m=0; m<numComp; m++) {
420 const double D_0 = D_p[INDEX2(k, m, numEq)];
421 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=16*D_0*w22;
422 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=8*D_0*w22;
423 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=8*D_0*w22;
424 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=4*D_0*w22;
425 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=8*D_0*w22;
426 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=16*D_0*w22;
427 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=4*D_0*w22;
428 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=8*D_0*w22;
429 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=8*D_0*w22;
430 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=4*D_0*w22;
431 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=16*D_0*w22;
432 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=8*D_0*w22;
433 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=4*D_0*w22;
434 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=8*D_0*w22;
435 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=8*D_0*w22;
436 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=16*D_0*w22;
437 }
438 }
439 }
440 }
441 ///////////////
442 // process X //
443 ///////////////
444 if (!du.isEmpty()) {
445 addEM_F=true;
446 const double *du_p = du.getSampleDataRO(e);
447 const double *c11_p = c11.getSampleDataRO(e),
448 *c13_p = c13.getSampleDataRO(e),
449 *c33_p = c33.getSampleDataRO(e),
450 *c44_p = c44.getSampleDataRO(e);
451 if (du.actsExpanded()) {
452 const double X_00_0 = -(du_p[INDEX3(0,0,0,numEq,2)] * c11_p[0]
453 + du_p[INDEX3(1,1,0,numEq,2)] * c13_p[0]);
454 const double X_00_1 = -(du_p[INDEX3(0,0,1,numEq,2)] * c11_p[1]
455 + du_p[INDEX3(1,1,1,numEq,2)] * c13_p[1]);
456 const double X_00_2 = -(du_p[INDEX3(0,0,2,numEq,2)] * c11_p[2]
457 + du_p[INDEX3(1,1,2,numEq,2)] * c13_p[2]);
458 const double X_00_3 = -(du_p[INDEX3(0,0,3,numEq,2)] * c11_p[3]
459 + du_p[INDEX3(1,1,3,numEq,2)] * c13_p[3]);
460 const double X_11_0 = -(du_p[INDEX3(0,0,0,numEq,2)] * c13_p[0]
461 + du_p[INDEX3(1,1,0,numEq,2)] * c33_p[0]);
462 const double X_11_1 = -(du_p[INDEX3(0,0,1,numEq,2)] * c13_p[1]
463 + du_p[INDEX3(1,1,1,numEq,2)] * c33_p[1]);
464 const double X_11_2 = -(du_p[INDEX3(0,0,2,numEq,2)] * c13_p[2]
465 + du_p[INDEX3(1,1,2,numEq,2)] * c33_p[2]);
466 const double X_11_3 = -(du_p[INDEX3(0,0,3,numEq,2)] * c13_p[3]
467 + du_p[INDEX3(1,1,3,numEq,2)] * c33_p[3]);
468 const double X_01_0 = -(c44_p[0] *
469 (du_p[INDEX3(1,0,0,numEq,2)] + du_p[INDEX3(0,1,0,numEq,2)]));
470 const double X_01_1 = -(c44_p[1] *
471 (du_p[INDEX3(1,0,1,numEq,2)] + du_p[INDEX3(0,1,1,numEq,2)]));
472 const double X_01_2 = -(c44_p[2] *
473 (du_p[INDEX3(1,0,2,numEq,2)] + du_p[INDEX3(0,1,2,numEq,2)]));
474 const double X_01_3 = -(c44_p[3] *
475 (du_p[INDEX3(1,0,3,numEq,2)] + du_p[INDEX3(0,1,3,numEq,2)]));
476 const double X_10_0 = X_01_0, X_10_1 = X_01_1, X_10_2 = X_01_2, X_10_3 = X_01_3;
477
478 const double Atmp0 = 6*w15*(X_00_2 + X_00_3);
479 const double Atmp1 = 6*w10*(X_00_0 + X_00_1);
480 const double Atmp2 = 6*w11*(X_01_0 + X_01_2);
481 const double Atmp3 = 6*w14*(X_01_1 + X_01_3);
482 const double Atmp4 = 6*w11*(X_01_1 + X_01_3);
483 const double Atmp5 = w25*(X_00_0 + X_00_1);
484 const double Atmp6 = w26*(X_00_2 + X_00_3);
485 const double Atmp7 = 6*w14*(X_01_0 + X_01_2);
486 const double Atmp8 = w27*(X_01_0 + X_01_2);
487 const double Atmp9 = w28*(X_01_1 + X_01_3);
488 const double Atmp10 = w25*(-X_00_2 - X_00_3);
489 const double Atmp11 = w26*(-X_00_0 - X_00_1);
490 const double Atmp12 = w27*(X_01_1 + X_01_3);
491 const double Atmp13 = w28*(X_01_0 + X_01_2);
492 const double Atmp14 = w25*(X_00_2 + X_00_3);
493 const double Atmp15 = w26*(X_00_0 + X_00_1);
494 EM_F[INDEX2(0,0,numEq)]+=Atmp0 + Atmp1 + Atmp2 + Atmp3;
495 EM_F[INDEX2(0,1,numEq)]+=Atmp4 + Atmp5 + Atmp6 + Atmp7;
496 EM_F[INDEX2(0,2,numEq)]+=Atmp10 + Atmp11 + Atmp8 + Atmp9;
497 EM_F[INDEX2(0,3,numEq)]+=Atmp12 + Atmp13 + Atmp14 + Atmp15;
498 const double Btmp0 = 6*w15*(X_10_2 + X_10_3);
499 const double Btmp1 = 6*w10*(X_10_0 + X_10_1);
500 const double Btmp2 = 6*w11*(X_11_0 + X_11_2);
501 const double Btmp3 = 6*w14*(X_11_1 + X_11_3);
502 const double Btmp4 = 6*w11*(X_11_1 + X_11_3);
503 const double Btmp5 = w25*(X_10_0 + X_10_1);
504 const double Btmp6 = w26*(X_10_2 + X_10_3);
505 const double Btmp7 = 6*w14*(X_11_0 + X_11_2);
506 const double Btmp8 = w27*(X_11_0 + X_11_2);
507 const double Btmp9 = w28*(X_11_1 + X_11_3);
508 const double Btmp10 = w25*(-X_10_2 - X_10_3);
509 const double Btmp11 = w26*(-X_10_0 - X_10_1);
510 const double Btmp12 = w27*(X_11_1 + X_11_3);
511 const double Btmp13 = w28*(X_11_0 + X_11_2);
512 const double Btmp14 = w25*(X_10_2 + X_10_3);
513 const double Btmp15 = w26*(X_10_0 + X_10_1);
514 EM_F[INDEX2(1,0,numEq)]+=Btmp0 + Btmp1 + Btmp2 + Btmp3;
515 EM_F[INDEX2(1,1,numEq)]+=Btmp4 + Btmp5 + Btmp6 + Btmp7;
516 EM_F[INDEX2(1,2,numEq)]+=Btmp10 + Btmp11 + Btmp8 + Btmp9;
517 EM_F[INDEX2(1,3,numEq)]+=Btmp12 + Btmp13 + Btmp14 + Btmp15;
518 } else { // constant data
519
520 const double wX_00 = -(du_p[INDEX2(0,0,numEq)] * c11_p[0]
521 + du_p[INDEX2(1,1,numEq)] * c13_p[0])*w18;
522 const double wX_01 = -(c44_p[0] *
523 (du_p[INDEX2(1,0,numEq)] + du_p[INDEX2(0,1,numEq)]))*w19;
524 const double wX_10 = -(c44_p[0] *
525 (du_p[INDEX2(1,0,numEq)] + du_p[INDEX2(0,1,numEq)]))*w18;
526 const double wX_11 = -(du_p[INDEX2(0,0,numEq)] * c13_p[0]
527 + du_p[INDEX2(1,1,numEq)] * c33_p[0])*w19;
528 EM_F[INDEX2(0,0,numEq)]+= wX_00 + wX_01;
529 EM_F[INDEX2(0,1,numEq)]+=-wX_00 + wX_01;
530 EM_F[INDEX2(0,2,numEq)]+= wX_00 - wX_01;
531 EM_F[INDEX2(0,3,numEq)]+=-wX_00 - wX_01;
532 EM_F[INDEX2(1,0,numEq)]+= wX_10 + wX_11;
533 EM_F[INDEX2(1,1,numEq)]+=-wX_10 + wX_11;
534 EM_F[INDEX2(1,2,numEq)]+= wX_10 - wX_11;
535 EM_F[INDEX2(1,3,numEq)]+=-wX_10 - wX_11;
536 }
537 }
538 ///////////////
539 // process Y //
540 ///////////////
541 if (!Y.isEmpty()) {
542 addEM_F=true;
543 const double* Y_p=Y.getSampleDataRO(e);
544 if (Y.actsExpanded()) {
545 for (index_t k=0; k<numEq; k++) {
546 const double Y_0 = Y_p[INDEX2(k, 0, numEq)];
547 const double Y_1 = Y_p[INDEX2(k, 1, numEq)];
548 const double Y_2 = Y_p[INDEX2(k, 2, numEq)];
549 const double Y_3 = Y_p[INDEX2(k, 3, numEq)];
550 const double tmp0 = 6*w22*(Y_1 + Y_2);
551 const double tmp1 = 6*w22*(Y_0 + Y_3);
552 EM_F[INDEX2(k,0,numEq)]+=6*Y_0*w20 + 6*Y_3*w21 + tmp0;
553 EM_F[INDEX2(k,1,numEq)]+=6*Y_1*w20 + 6*Y_2*w21 + tmp1;
554 EM_F[INDEX2(k,2,numEq)]+=6*Y_1*w21 + 6*Y_2*w20 + tmp1;
555 EM_F[INDEX2(k,3,numEq)]+=6*Y_0*w21 + 6*Y_3*w20 + tmp0;
556 }
557 } else { // constant data
558 for (index_t k=0; k<numEq; k++) {
559 EM_F[INDEX2(k,0,numEq)]+=36*Y_p[k]*w22;
560 EM_F[INDEX2(k,1,numEq)]+=36*Y_p[k]*w22;
561 EM_F[INDEX2(k,2,numEq)]+=36*Y_p[k]*w22;
562 EM_F[INDEX2(k,3,numEq)]+=36*Y_p[k]*w22;
563 }
564 }
565 }
566
567 // add to matrix (if addEM_S) and RHS (if addEM_F)
568 const index_t firstNode=m_NN[0]*k1+k0;
569 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S,
570 addEM_F, firstNode, numEq, numComp);
571 } // end k0 loop
572 } // end k1 loop
573 } // end of colouring
574 } // end of parallel region
575 }
576
577 }
578

  ViewVC Help
Powered by ViewVC 1.1.26