/[escript]/trunk/ripley/src/LameAssembler2D.cpp
ViewVC logotype

Contents of /trunk/ripley/src/LameAssembler2D.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5148 - (show annotations)
Mon Sep 15 01:25:23 2014 UTC (4 years, 11 months ago) by caltinay
File size: 66472 byte(s)
Merging ripley diagonal storage + CUDA support into trunk.
Options file version has been incremented due to new options
'cuda' and 'nvccflags'.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16 #include <ripley/LameAssembler2D.h>
17 #include <ripley/domainhelpers.h>
18
19 using namespace std;
20
21 using escript::AbstractSystemMatrix;
22 using escript::Data;
23
24 namespace ripley {
25
26 void LameAssembler2D::collateFunctionSpaceTypes(vector<int>& fsTypes,
27 const DataMap& coefs) const
28 {
29 if (isNotEmpty("lame_mu", coefs))
30 fsTypes.push_back(coefs.find("lame_mu")->second.getFunctionSpace().getTypeCode());
31 if (isNotEmpty("lame_lambda", coefs))
32 fsTypes.push_back(coefs.find("lame_lambda")->second.getFunctionSpace().getTypeCode());
33 if (isNotEmpty("B", coefs))
34 fsTypes.push_back(coefs.find("B")->second.getFunctionSpace().getTypeCode());
35 if (isNotEmpty("C", coefs))
36 fsTypes.push_back(coefs.find("C")->second.getFunctionSpace().getTypeCode());
37 if (isNotEmpty("D", coefs))
38 fsTypes.push_back(coefs.find("D")->second.getFunctionSpace().getTypeCode());
39 if (isNotEmpty("X", coefs))
40 fsTypes.push_back(coefs.find("X")->second.getFunctionSpace().getTypeCode());
41 if (isNotEmpty("Y", coefs))
42 fsTypes.push_back(coefs.find("Y")->second.getFunctionSpace().getTypeCode());
43 }
44
45 void LameAssembler2D::assemblePDESingle(AbstractSystemMatrix* mat, Data& rhs,
46 const DataMap& coefs) const
47 {
48 throw RipleyException("assemblePDESingle not implemented in LameAssembler2D");
49 }
50
51 void LameAssembler2D::assemblePDEBoundarySingle(AbstractSystemMatrix* mat,
52 Data& rhs, const DataMap& coefs) const
53 {
54 throw RipleyException("assemblePDEBoundarySingle not implemented in LameAssembler2D");
55 }
56
57 void LameAssembler2D::assemblePDESingleReduced(AbstractSystemMatrix* mat,
58 Data& rhs, const DataMap& coefs) const
59 {
60 throw RipleyException("assemblePDESingleReduced not implemented in LameAssembler2D");
61 }
62
63 void LameAssembler2D::assemblePDEBoundarySingleReduced(
64 AbstractSystemMatrix* mat, Data& rhs,
65 const DataMap& coefs) const
66 {
67 throw RipleyException("assemblePDEBoundarySingleReduced not implemented in LameAssembler2D");
68 }
69
70 void LameAssembler2D::assemblePDESystemReduced(AbstractSystemMatrix* mat,
71 Data& rhs, const DataMap& coefs) const
72 {
73 throw RipleyException("assemblePDEBoundarySystem not implemented in LameAssembler2D");
74 }
75
76 void LameAssembler2D::assemblePDEBoundarySystemReduced(
77 AbstractSystemMatrix* mat,
78 Data& rhs, const DataMap& coefs) const
79 {
80 throw RipleyException("assemblePDEBoundarySystemReduced not implemented in LameAssembler2D");
81 }
82
83 void LameAssembler2D::assemblePDEBoundarySystem(AbstractSystemMatrix* mat,
84 Data& rhs, const DataMap& coefs) const
85 {
86 const Data& d = unpackData("d", coefs);
87 const Data& y = unpackData("y", coefs);
88 dim_t numEq, numComp;
89 if (!mat) {
90 numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize());
91 } else {
92 numEq=mat->getRowBlockSize();
93 numComp=mat->getColumnBlockSize();
94 }
95 const double SQRT3 = 1.73205080756887719318;
96 const double w5 = m_dx[0]/12;
97 const double w6 = w5*(SQRT3 + 2);
98 const double w7 = w5*(-SQRT3 + 2);
99 const double w8 = w5*(SQRT3 + 3);
100 const double w9 = w5*(-SQRT3 + 3);
101 const double w2 = m_dx[1]/12;
102 const double w0 = w2*(SQRT3 + 2);
103 const double w1 = w2*(-SQRT3 + 2);
104 const double w3 = w2*(SQRT3 + 3);
105 const double w4 = w2*(-SQRT3 + 3);
106 const bool addEM_S=!d.isEmpty();
107 const bool addEM_F=!y.isEmpty();
108 rhs.requireWrite();
109 #pragma omp parallel
110 {
111 if (domain->m_faceOffset[0] > -1) {
112 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
113 #pragma omp for
114 for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) {
115 vector<double> EM_S(4*4*numEq*numComp, 0);
116 vector<double> EM_F(4*numEq, 0);
117 const index_t e = k1;
118 ///////////////
119 // process d //
120 ///////////////
121 if (addEM_S) {
122 const double* d_p=d.getSampleDataRO(e);
123 if (d.actsExpanded()) {
124 for (index_t k=0; k<numEq; k++) {
125 for (index_t m=0; m<numComp; m++) {
126 const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
127 const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
128 const double tmp0 = w2*(d_0 + d_1);
129 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=d_0*w0 + d_1*w1;
130 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp0;
131 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp0;
132 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=d_0*w1 + d_1*w0;
133 }
134 }
135 } else { // constant data
136 for (index_t k=0; k<numEq; k++) {
137 for (index_t m=0; m<numComp; m++) {
138 const double d_0 = d_p[INDEX2(k, m, numEq)];
139 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=4*d_0*w2;
140 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=2*d_0*w2;
141 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=2*d_0*w2;
142 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=4*d_0*w2;
143 }
144 }
145 }
146 }
147 ///////////////
148 // process y //
149 ///////////////
150 if (addEM_F) {
151 const double* y_p=y.getSampleDataRO(e);
152 if (y.actsExpanded()) {
153 for (index_t k=0; k<numEq; k++) {
154 const double y_0 = y_p[INDEX2(k, 0, numEq)];
155 const double y_1 = y_p[INDEX2(k, 1, numEq)];
156 EM_F[INDEX2(k,0,numEq)]+=w3*y_0 + w4*y_1;
157 EM_F[INDEX2(k,2,numEq)]+=w3*y_1 + w4*y_0;
158 }
159 } else { // constant data
160 for (index_t k=0; k<numEq; k++) {
161 EM_F[INDEX2(k,0,numEq)]+=6*w2*y_p[k];
162 EM_F[INDEX2(k,2,numEq)]+=6*w2*y_p[k];
163 }
164 }
165 }
166 const index_t firstNode=m_NN[0]*k1;
167 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
168 firstNode, numEq, numComp);
169 }
170 } // end colouring
171 }
172
173 if (domain->m_faceOffset[1] > -1) {
174 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
175 #pragma omp for
176 for (index_t k1=k1_0; k1<m_NE[1]; k1+=2) {
177 vector<double> EM_S(4*4*numEq*numComp, 0);
178 vector<double> EM_F(4*numEq, 0);
179 const index_t e = domain->m_faceOffset[1]+k1;
180 ///////////////
181 // process d //
182 ///////////////
183 if (addEM_S) {
184 const double* d_p=d.getSampleDataRO(e);
185 if (d.actsExpanded()) {
186 for (index_t k=0; k<numEq; k++) {
187 for (index_t m=0; m<numComp; m++) {
188 const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
189 const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
190 const double tmp0 = w2*(d_0 + d_1);
191 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=d_0*w0 + d_1*w1;
192 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp0;
193 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp0;
194 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=d_0*w1 + d_1*w0;
195 }
196 }
197 } else { // constant data
198 for (index_t k=0; k<numEq; k++) {
199 for (index_t m=0; m<numComp; m++) {
200 const double d_0 = d_p[INDEX2(k, m, numEq)];
201 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=4*d_0*w2;
202 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=2*d_0*w2;
203 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=2*d_0*w2;
204 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=4*d_0*w2;
205 }
206 }
207 }
208 }
209 ///////////////
210 // process y //
211 ///////////////
212 if (addEM_F) {
213 const double* y_p=y.getSampleDataRO(e);
214 if (y.actsExpanded()) {
215 for (index_t k=0; k<numEq; k++) {
216 const double y_0 = y_p[INDEX2(k, 0, numEq)];
217 const double y_1 = y_p[INDEX2(k, 1, numEq)];
218 EM_F[INDEX2(k,1,numEq)]+=w3*y_0 + w4*y_1;
219 EM_F[INDEX2(k,3,numEq)]+=w3*y_1 + w4*y_0;
220 }
221 } else { // constant data
222 for (index_t k=0; k<numEq; k++) {
223 EM_F[INDEX2(k,1,numEq)]+=6*w2*y_p[k];
224 EM_F[INDEX2(k,3,numEq)]+=6*w2*y_p[k];
225 }
226 }
227 }
228 const index_t firstNode=m_NN[0]*(k1+1)-2;
229 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
230 firstNode, numEq, numComp);
231 }
232 } // end colouring
233 }
234
235 if (domain->m_faceOffset[2] > -1) {
236 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
237 #pragma omp for
238 for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) {
239 vector<double> EM_S(4*4*numEq*numComp, 0);
240 vector<double> EM_F(4*numEq, 0);
241 const index_t e = domain->m_faceOffset[2]+k0;
242 ///////////////
243 // process d //
244 ///////////////
245 if (addEM_S) {
246 const double* d_p=d.getSampleDataRO(e);
247 if (d.actsExpanded()) {
248 for (index_t k=0; k<numEq; k++) {
249 for (index_t m=0; m<numComp; m++) {
250 const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
251 const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
252 const double tmp0 = w5*(d_0 + d_1);
253 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=d_0*w6 + d_1*w7;
254 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0;
255 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0;
256 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=d_0*w7 + d_1*w6;
257 }
258 }
259 } else { // constant data
260 for (index_t k=0; k<numEq; k++) {
261 for (index_t m=0; m<numComp; m++) {
262 const double d_0 = d_p[INDEX2(k, m, numEq)];
263 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=4*d_0*w5;
264 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=2*d_0*w5;
265 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=2*d_0*w5;
266 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=4*d_0*w5;
267 }
268 }
269 }
270 }
271 ///////////////
272 // process y //
273 ///////////////
274 if (addEM_F) {
275 const double* y_p=y.getSampleDataRO(e);
276 if (y.actsExpanded()) {
277 for (index_t k=0; k<numEq; k++) {
278 const double y_0 = y_p[INDEX2(k, 0, numEq)];
279 const double y_1 = y_p[INDEX2(k, 1, numEq)];
280 EM_F[INDEX2(k,0,numEq)]+=w8*y_0 + w9*y_1;
281 EM_F[INDEX2(k,1,numEq)]+=w8*y_1 + w9*y_0;
282 }
283 } else { // constant data
284 for (index_t k=0; k<numEq; k++) {
285 EM_F[INDEX2(k,0,numEq)]+=6*w5*y_p[k];
286 EM_F[INDEX2(k,1,numEq)]+=6*w5*y_p[k];
287 }
288 }
289 }
290 const index_t firstNode=k0;
291 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
292 firstNode, numEq, numComp);
293 }
294 } // end colouring
295 }
296
297 if (domain->m_faceOffset[3] > -1) {
298 for (index_t k0_0=0; k0_0<2; k0_0++) { // colouring
299 #pragma omp for
300 for (index_t k0 = k0_0; k0 < m_NE[0]; k0+=2) {
301 vector<double> EM_S(4*4*numEq*numComp, 0);
302 vector<double> EM_F(4*numEq, 0);
303 const index_t e = domain->m_faceOffset[3]+k0;
304 ///////////////
305 // process d //
306 ///////////////
307 if (addEM_S) {
308 const double* d_p=d.getSampleDataRO(e);
309 if (d.actsExpanded()) {
310 for (index_t k=0; k<numEq; k++) {
311 for (index_t m=0; m<numComp; m++) {
312 const double d_0 = d_p[INDEX3(k,m,0,numEq,numComp)];
313 const double d_1 = d_p[INDEX3(k,m,1,numEq,numComp)];
314 const double tmp0 = w5*(d_0 + d_1);
315 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=d_0*w6 + d_1*w7;
316 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0;
317 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0;
318 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=d_0*w7 + d_1*w6;
319 }
320 }
321 } else { // constant data
322 for (index_t k=0; k<numEq; k++) {
323 for (index_t m=0; m<numComp; m++) {
324 const double d_0 = d_p[INDEX2(k, m, numEq)];
325 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=4*d_0*w5;
326 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=2*d_0*w5;
327 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=2*d_0*w5;
328 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=4*d_0*w5;
329 }
330 }
331 }
332 }
333 ///////////////
334 // process y //
335 ///////////////
336 if (addEM_F) {
337 const double* y_p=y.getSampleDataRO(e);
338 if (y.actsExpanded()) {
339 for (index_t k=0; k<numEq; k++) {
340 const double y_0 = y_p[INDEX2(k, 0, numEq)];
341 const double y_1 = y_p[INDEX2(k, 1, numEq)];
342 EM_F[INDEX2(k,2,numEq)]+=w8*y_0 + w9*y_1;
343 EM_F[INDEX2(k,3,numEq)]+=w8*y_1 + w9*y_0;
344 }
345 } else { // constant data
346 for (index_t k=0; k<numEq; k++) {
347 EM_F[INDEX2(k,2,numEq)]+=6*w5*y_p[k];
348 EM_F[INDEX2(k,3,numEq)]+=6*w5*y_p[k];
349 }
350 }
351 }
352 const index_t firstNode=m_NN[0]*(m_NN[1]-2)+k0;
353 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S, addEM_F,
354 firstNode, numEq, numComp);
355 }
356 } // end colouring
357 }
358 } // end of parallel section
359 }
360
361 void LameAssembler2D::assemblePDESystem(AbstractSystemMatrix* mat,
362 Data& rhs, const DataMap& coefs) const
363 {
364 if (isNotEmpty("A", coefs))
365 throw RipleyException("Coefficient A was given to LameAssembler "
366 "unexpectedly. Specialised domains can't be used for general "
367 "assemblage.");
368 const Data& lambda = unpackData("lame_lambda", coefs);
369 const Data& mu = unpackData("lame_mu", coefs);
370 const Data& B = unpackData("B", coefs);
371 const Data& C = unpackData("C", coefs);
372 const Data& D = unpackData("D", coefs);
373 const Data& X = unpackData("X", coefs);
374 const Data& Y = unpackData("Y", coefs);
375 dim_t numEq, numComp;
376 if (!mat)
377 numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize());
378 else {
379 numEq=mat->getRowBlockSize();
380 numComp=mat->getColumnBlockSize();
381 }
382 const double SQRT3 = 1.73205080756887719318;
383 const double w1 = 1.0/24;
384 const double w5 = -SQRT3/24 + 1.0/12;
385 const double w2 = -SQRT3/24 - 1.0/12;
386 const double w19 = -m_dx[0]/12;
387 const double w11 = w19*(SQRT3 + 3)/12;
388 const double w14 = w19*(-SQRT3 + 3)/12;
389 const double w16 = w19*(5*SQRT3 + 9)/12;
390 const double w17 = w19*(-5*SQRT3 + 9)/12;
391 const double w27 = w19*(-SQRT3 - 3)/2;
392 const double w28 = w19*(SQRT3 - 3)/2;
393 const double w18 = -m_dx[1]/12;
394 const double w10 = w18*(SQRT3 + 3)/12;
395 const double w15 = w18*(-SQRT3 + 3)/12;
396 const double w12 = w18*(5*SQRT3 + 9)/12;
397 const double w13 = w18*(-5*SQRT3 + 9)/12;
398 const double w25 = w18*(-SQRT3 - 3)/2;
399 const double w26 = w18*(SQRT3 - 3)/2;
400 const double w22 = m_dx[0]*m_dx[1]/144;
401 const double w20 = w22*(SQRT3 + 2);
402 const double w21 = w22*(-SQRT3 + 2);
403 const double w23 = w22*(4*SQRT3 + 7);
404 const double w24 = w22*(-4*SQRT3 + 7);
405 const double w3 = m_dx[0]/(24*m_dx[1]);
406 const double w7 = w3*(SQRT3 + 2);
407 const double w8 = w3*(-SQRT3 + 2);
408 const double w6 = -m_dx[1]/(24*m_dx[0]);
409 const double w0 = w6*(SQRT3 + 2);
410 const double w4 = w6*(-SQRT3 + 2);
411
412 rhs.requireWrite();
413 #pragma omp parallel
414 {
415 for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
416 #pragma omp for
417 for (index_t k1=k1_0; k1 < m_NE[1]; k1+=2) {
418 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
419 bool addEM_S=false;
420 bool addEM_F=false;
421 vector<double> EM_S(4*4*numEq*numComp, 0);
422 vector<double> EM_F(4*numEq, 0);
423 const index_t e = k0 + m_NE[0]*k1;
424 ///////////////
425 // process A //
426 ///////////////
427 if (!mu.isEmpty() || !lambda.isEmpty()) {
428 addEM_S = true;
429 if (mu.actsExpanded() || lambda.actsExpanded()) {
430 double A_0000[4] = {0};
431 double A_0011[4] = {0};
432 double A_0101[4] = {0};
433 double A_0110[4] = {0};
434 double A_1001[4] = {0};
435 double A_1010[4] = {0};
436 double A_1100[4] = {0};
437 double A_1111[4] = {0};
438 if (!mu.isEmpty()) {
439 const double *mu_p = mu.getSampleDataRO(e);
440 A_0000[0] += 2*mu_p[0];
441 A_0000[1] += 2*mu_p[1];
442 A_0000[2] += 2*mu_p[2];
443 A_0000[3] += 2*mu_p[3];
444 A_0110[0] += mu_p[0];
445 A_0101[0] += mu_p[0];
446 A_0110[1] += mu_p[1];
447 A_0101[1] += mu_p[1];
448 A_0110[2] += mu_p[2];
449 A_0101[2] += mu_p[2];
450 A_0110[3] += mu_p[3];
451 A_0101[3] += mu_p[3];
452 A_1001[0] += mu_p[0];
453 A_1010[0] += mu_p[0];
454 A_1001[1] += mu_p[1];
455 A_1010[1] += mu_p[1];
456 A_1001[2] += mu_p[2];
457 A_1010[2] += mu_p[2];
458 A_1001[3] += mu_p[3];
459 A_1010[3] += mu_p[3];
460 A_1111[0] += 2*mu_p[0];
461 A_1111[1] += 2*mu_p[1];
462 A_1111[2] += 2*mu_p[2];
463 A_1111[3] += 2*mu_p[3];
464 }
465 if (!lambda.isEmpty()) {
466 const double *lambda_p = lambda.getSampleDataRO(e);
467 A_0000[0] += lambda_p[0];
468 A_0000[1] += lambda_p[1];
469 A_0000[2] += lambda_p[2];
470 A_0000[3] += lambda_p[3];
471 A_0011[0] += lambda_p[0];
472 A_0011[1] += lambda_p[1];
473 A_0011[2] += lambda_p[2];
474 A_0011[3] += lambda_p[3];
475 A_1100[0] += lambda_p[0];
476 A_1100[1] += lambda_p[1];
477 A_1100[2] += lambda_p[2];
478 A_1100[3] += lambda_p[3];
479 A_1111[0] += lambda_p[0];
480 A_1111[1] += lambda_p[1];
481 A_1111[2] += lambda_p[2];
482 A_1111[3] += lambda_p[3];
483 }
484 {
485 const double tmp0 = w3*(A_0101[0] + A_0101[1] + A_0101[2] + A_0101[3]);
486 const double tmp2 = w4*(A_0000[2] + A_0000[3]);
487 const double tmp3 = w0*(A_0000[0] + A_0000[1]);
488 const double tmp7 = w3*(-A_0101[0] - A_0101[1] - A_0101[2] - A_0101[3]);
489 const double tmp8 = w6*(A_0000[0] + A_0000[1] + A_0000[2] + A_0000[3]);
490 const double tmp11 = w4*(A_0000[0] + A_0000[1]);
491 const double tmp12 = w0*(A_0000[2] + A_0000[3]);
492 const double tmp15 = w7*(A_0101[0] + A_0101[2]);
493 const double tmp16 = w4*(-A_0000[2] - A_0000[3]);
494 const double tmp17 = w0*(-A_0000[0] - A_0000[1]);
495 const double tmp19 = w8*(A_0101[1] + A_0101[3]);
496 const double tmp21 = w7*(A_0101[1] + A_0101[3]);
497 const double tmp22 = w4*(-A_0000[0] - A_0000[1]);
498 const double tmp23 = w0*(-A_0000[2] - A_0000[3]);
499 const double tmp25 = w8*(A_0101[0] + A_0101[2]);
500 const double tmp30 = w7*(-A_0101[1] - A_0101[3]);
501 const double tmp33 = w8*(-A_0101[0] - A_0101[2]);
502 const double tmp34 = w6*(-A_0000[0] - A_0000[1] - A_0000[2] - A_0000[3]);
503 const double tmp38 = w7*(-A_0101[0] - A_0101[2]);
504 const double tmp40 = w8*(-A_0101[1] - A_0101[3]);
505 EM_S[INDEX4(0,0,0,0,numEq,numComp,4)]+= tmp15 + tmp16 + tmp17 + tmp19;
506 EM_S[INDEX4(0,0,0,1,numEq,numComp,4)]+= tmp0 + tmp2 + tmp3;
507 EM_S[INDEX4(0,0,0,2,numEq,numComp,4)]+= tmp34 + tmp38 + tmp40;
508 EM_S[INDEX4(0,0,0,3,numEq,numComp,4)]+= tmp7 + tmp8;
509 EM_S[INDEX4(0,0,1,0,numEq,numComp,4)]+= tmp0 + tmp2 + tmp3;
510 EM_S[INDEX4(0,0,1,1,numEq,numComp,4)]+= tmp16 + tmp17 + tmp21 + tmp25;
511 EM_S[INDEX4(0,0,1,2,numEq,numComp,4)]+= tmp7 + tmp8;
512 EM_S[INDEX4(0,0,1,3,numEq,numComp,4)]+= tmp30 + tmp33 + tmp34;
513 EM_S[INDEX4(0,0,2,0,numEq,numComp,4)]+= tmp34 + tmp38 + tmp40;
514 EM_S[INDEX4(0,0,2,1,numEq,numComp,4)]+= tmp7 + tmp8;
515 EM_S[INDEX4(0,0,2,2,numEq,numComp,4)]+= tmp15 + tmp19 + tmp22 + tmp23;
516 EM_S[INDEX4(0,0,2,3,numEq,numComp,4)]+= tmp0 + tmp11 + tmp12;
517 EM_S[INDEX4(0,0,3,0,numEq,numComp,4)]+= tmp7 + tmp8;
518 EM_S[INDEX4(0,0,3,1,numEq,numComp,4)]+= tmp30 + tmp33 + tmp34;
519 EM_S[INDEX4(0,0,3,2,numEq,numComp,4)]+= tmp0 + tmp11 + tmp12;
520 EM_S[INDEX4(0,0,3,3,numEq,numComp,4)]+= tmp21 + tmp22 + tmp23 + tmp25;
521 }
522 {
523 const double tmp1 = w1*(A_0011[0] + A_0011[3] - A_0110[1] - A_0110[2]);
524 const double tmp9 = w1*(A_0011[1] + A_0011[2] + A_0110[1] + A_0110[2]);
525 const double tmp28 = w1*(-A_0011[0] - A_0011[3] - A_0110[0] - A_0110[3]);
526 const double tmp31 = w1*(-A_0011[1] - A_0011[2] + A_0110[0] + A_0110[3]);
527 EM_S[INDEX4(0,1,0,0,numEq,numComp,4)]+= w5*(A_0011[3] + A_0110[3]) + w2*(-A_0011[0] - A_0110[0]) + tmp9;
528 EM_S[INDEX4(0,1,0,1,numEq,numComp,4)]+= tmp1 + w5*(A_0011[2] - A_0110[3]) + w2*(-A_0011[1] + A_0110[0]);
529 EM_S[INDEX4(0,1,0,2,numEq,numComp,4)]+= tmp31 + w5*(-A_0011[3] + A_0110[1]) + w2*(A_0011[0] - A_0110[2]);
530 EM_S[INDEX4(0,1,0,3,numEq,numComp,4)]+= tmp28 + w5*(-A_0011[2] - A_0110[1]) + w2*(A_0011[1] + A_0110[2]);
531 EM_S[INDEX4(0,1,1,0,numEq,numComp,4)]+= tmp31 + w5*(-A_0011[3] + A_0110[2]) + w2*(A_0011[0] - A_0110[1]);
532 EM_S[INDEX4(0,1,1,1,numEq,numComp,4)]+= tmp28 + w5*(-A_0011[2] - A_0110[2]) + w2*(A_0011[1] + A_0110[1]);
533 EM_S[INDEX4(0,1,1,2,numEq,numComp,4)]+= w2*(-A_0011[0] - A_0110[3]) + w5*(A_0011[3] + A_0110[0]) + tmp9;
534 EM_S[INDEX4(0,1,1,3,numEq,numComp,4)]+= tmp1 + w5*(A_0011[2] - A_0110[0]) + w2*(-A_0011[1] + A_0110[3]);
535 EM_S[INDEX4(0,1,2,0,numEq,numComp,4)]+= tmp1 + w5*(A_0011[1] - A_0110[3]) + w2*(-A_0011[2] + A_0110[0]);
536 EM_S[INDEX4(0,1,2,1,numEq,numComp,4)]+= w5*(A_0011[0] + A_0110[3]) + w2*(-A_0011[3] - A_0110[0]) + tmp9;
537 EM_S[INDEX4(0,1,2,2,numEq,numComp,4)]+= tmp28 + w5*(-A_0011[1] - A_0110[1]) + w2*(A_0011[2] + A_0110[2]);
538 EM_S[INDEX4(0,1,2,3,numEq,numComp,4)]+= tmp31 + w5*(-A_0011[0] + A_0110[1]) + w2*(A_0011[3] - A_0110[2]);
539 EM_S[INDEX4(0,1,3,0,numEq,numComp,4)]+= w5*(-A_0011[1] - A_0110[2]) + tmp28 + w2*(A_0011[2] + A_0110[1]);
540 EM_S[INDEX4(0,1,3,1,numEq,numComp,4)]+= tmp31 + w5*(-A_0011[0] + A_0110[2]) + w2*(A_0011[3] - A_0110[1]);
541 EM_S[INDEX4(0,1,3,2,numEq,numComp,4)]+= tmp1 + w5*(A_0011[1] - A_0110[0]) + w2*(-A_0011[2] + A_0110[3]);
542 EM_S[INDEX4(0,1,3,3,numEq,numComp,4)]+= w5*(A_0011[0] + A_0110[0]) + w2*(-A_0011[3] - A_0110[3]) + tmp9;
543 }
544 {
545 const double tmp1 = w1*(A_1001[0] + A_1001[3] - A_1100[1] - A_1100[2]);
546 const double tmp9 = w1*(A_1001[1] + A_1001[2] + A_1100[1] + A_1100[2]);
547 const double tmp28 = w1*(-A_1001[0] - A_1001[3] - A_1100[0] - A_1100[3]);
548 const double tmp31 = w1*(-A_1001[1] - A_1001[2] + A_1100[0] + A_1100[3]);
549 EM_S[INDEX4(1,0,0,0,numEq,numComp,4)]+= w5*(A_1001[3] + A_1100[3]) + w2*(-A_1001[0] - A_1100[0]) + tmp9;
550 EM_S[INDEX4(1,0,0,1,numEq,numComp,4)]+= tmp1 + w5*(A_1001[2] - A_1100[3]) + w2*(-A_1001[1] + A_1100[0]);
551 EM_S[INDEX4(1,0,0,2,numEq,numComp,4)]+= tmp31 + w5*(-A_1001[3] + A_1100[1]) + w2*(A_1001[0] - A_1100[2]);
552 EM_S[INDEX4(1,0,0,3,numEq,numComp,4)]+= tmp28 + w5*(-A_1001[2] - A_1100[1]) + w2*(A_1001[1] + A_1100[2]);
553 EM_S[INDEX4(1,0,1,0,numEq,numComp,4)]+= tmp31 + w5*(-A_1001[3] + A_1100[2]) + w2*(A_1001[0] - A_1100[1]);
554 EM_S[INDEX4(1,0,1,1,numEq,numComp,4)]+= tmp28 + w5*(-A_1001[2] - A_1100[2]) + w2*(A_1001[1] + A_1100[1]);
555 EM_S[INDEX4(1,0,1,2,numEq,numComp,4)]+= w2*(-A_1001[0] - A_1100[3]) + w5*(A_1001[3] + A_1100[0]) + tmp9;
556 EM_S[INDEX4(1,0,1,3,numEq,numComp,4)]+= tmp1 + w5*(A_1001[2] - A_1100[0]) + w2*(-A_1001[1] + A_1100[3]);
557 EM_S[INDEX4(1,0,2,0,numEq,numComp,4)]+= tmp1 + w5*(A_1001[1] - A_1100[3]) + w2*(-A_1001[2] + A_1100[0]);
558 EM_S[INDEX4(1,0,2,1,numEq,numComp,4)]+= w5*(A_1001[0] + A_1100[3]) + w2*(-A_1001[3] - A_1100[0]) + tmp9;
559 EM_S[INDEX4(1,0,2,2,numEq,numComp,4)]+= tmp28 + w5*(-A_1001[1] - A_1100[1]) + w2*(A_1001[2] + A_1100[2]);
560 EM_S[INDEX4(1,0,2,3,numEq,numComp,4)]+= tmp31 + w5*(-A_1001[0] + A_1100[1]) + w2*(A_1001[3] - A_1100[2]);
561 EM_S[INDEX4(1,0,3,0,numEq,numComp,4)]+= w5*(-A_1001[1] - A_1100[2]) + tmp28 + w2*(A_1001[2] + A_1100[1]);
562 EM_S[INDEX4(1,0,3,1,numEq,numComp,4)]+= tmp31 + w5*(-A_1001[0] + A_1100[2]) + w2*(A_1001[3] - A_1100[1]);
563 EM_S[INDEX4(1,0,3,2,numEq,numComp,4)]+= tmp1 + w5*(A_1001[1] - A_1100[0]) + w2*(-A_1001[2] + A_1100[3]);
564 EM_S[INDEX4(1,0,3,3,numEq,numComp,4)]+= w5*(A_1001[0] + A_1100[0]) + w2*(-A_1001[3] - A_1100[3]) + tmp9;
565 }
566 {
567 const double tmp0 = w3*(A_1111[0] + A_1111[1] + A_1111[2] + A_1111[3]);
568 const double tmp2 = w4*(A_1010[2] + A_1010[3]);
569 const double tmp3 = w0*(A_1010[0] + A_1010[1]);
570 const double tmp7 = w3*(-A_1111[0] - A_1111[1] - A_1111[2] - A_1111[3]);
571 const double tmp8 = w6*(A_1010[0] + A_1010[1] + A_1010[2] + A_1010[3]);
572 const double tmp11 = w4*(A_1010[0] + A_1010[1]);
573 const double tmp12 = w0*(A_1010[2] + A_1010[3]);
574 const double tmp15 = w7*(A_1111[0] + A_1111[2]);
575 const double tmp16 = w4*(-A_1010[2] - A_1010[3]);
576 const double tmp17 = w0*(-A_1010[0] - A_1010[1]);
577 const double tmp19 = w8*(A_1111[1] + A_1111[3]);
578 const double tmp21 = w7*(A_1111[1] + A_1111[3]);
579 const double tmp22 = w4*(-A_1010[0] - A_1010[1]);
580 const double tmp23 = w0*(-A_1010[2] - A_1010[3]);
581 const double tmp25 = w8*(A_1111[0] + A_1111[2]);
582 const double tmp30 = w7*(-A_1111[1] - A_1111[3]);
583 const double tmp33 = w8*(-A_1111[0] - A_1111[2]);
584 const double tmp34 = w6*(-A_1010[0] - A_1010[1] - A_1010[2] - A_1010[3]);
585 const double tmp38 = w7*(-A_1111[0] - A_1111[2]);
586 const double tmp40 = w8*(-A_1111[1] - A_1111[3]);
587 EM_S[INDEX4(1,1,0,0,numEq,numComp,4)]+= tmp15 + tmp16 + tmp17 + tmp19;
588 EM_S[INDEX4(1,1,0,1,numEq,numComp,4)]+= tmp0 + tmp2 + tmp3;
589 EM_S[INDEX4(1,1,0,2,numEq,numComp,4)]+= tmp34 + tmp38 + tmp40;
590 EM_S[INDEX4(1,1,0,3,numEq,numComp,4)]+= tmp7 + tmp8;
591 EM_S[INDEX4(1,1,1,0,numEq,numComp,4)]+= tmp0 + tmp2 + tmp3;
592 EM_S[INDEX4(1,1,1,1,numEq,numComp,4)]+= tmp16 + tmp17 + tmp21 + tmp25;
593 EM_S[INDEX4(1,1,1,2,numEq,numComp,4)]+= tmp7 + tmp8;
594 EM_S[INDEX4(1,1,1,3,numEq,numComp,4)]+= tmp30 + tmp33 + tmp34;
595 EM_S[INDEX4(1,1,2,0,numEq,numComp,4)]+= tmp34 + tmp38 + tmp40;
596 EM_S[INDEX4(1,1,2,1,numEq,numComp,4)]+= tmp7 + tmp8;
597 EM_S[INDEX4(1,1,2,2,numEq,numComp,4)]+= tmp15 + tmp19 + tmp22 + tmp23;
598 EM_S[INDEX4(1,1,2,3,numEq,numComp,4)]+= tmp0 + tmp11 + tmp12;
599 EM_S[INDEX4(1,1,3,0,numEq,numComp,4)]+= tmp7 + tmp8;
600 EM_S[INDEX4(1,1,3,1,numEq,numComp,4)]+= tmp30 + tmp33 + tmp34;
601 EM_S[INDEX4(1,1,3,2,numEq,numComp,4)]+= tmp0 + tmp11 + tmp12;
602 EM_S[INDEX4(1,1,3,3,numEq,numComp,4)]+= tmp21 + tmp22 + tmp23 + tmp25;
603 }
604 } else { // constant data
605 double A_0000 = 0;
606 double A_0011 = 0;
607 double A_0101 = 0;
608 double A_0110 = 0;
609 double A_1001 = 0;
610 double A_1010 = 0;
611 double A_1100 = 0;
612 double A_1111 = 0;
613 if (!mu.isEmpty()) {
614 const double *mu_p = mu.getSampleDataRO(e);
615 A_0000 += 2*mu_p[0];
616 A_0110 += mu_p[0];
617 A_0101 += mu_p[0];
618 A_1001 += mu_p[0];
619 A_1010 += mu_p[0];
620 A_1111 += 2*mu_p[0];
621 }
622 if (!lambda.isEmpty()) {
623 const double *lambda_p = lambda.getSampleDataRO(e);
624 A_0000 += lambda_p[0];
625 A_0011 += lambda_p[0];
626 A_1100 += lambda_p[0];
627 A_1111 += lambda_p[0];
628 }
629
630 const double tmp01_0 = 6*w1*(A_0011 - A_0110);
631 const double tmp01_1 = 6*w1*(A_0011 + A_0110);
632 const double tmp01_2 = 6*w1*(-A_0011 - A_0110);
633 const double tmp01_3 = 6*w1*(-A_0011 + A_0110);
634 const double tmp10_0 = 6*w1*(A_1001 - A_1100);
635 const double tmp10_1 = 6*w1*(A_1001 + A_1100);
636 const double tmp10_2 = 6*w1*(-A_1001 - A_1100);
637 const double tmp10_3 = 6*w1*(-A_1001 + A_1100);
638 EM_S[INDEX4(0,0,0,0,numEq,numComp,4)]+=-8*A_0000*w6 + 8*A_0101*w3;
639 EM_S[INDEX4(0,0,0,1,numEq,numComp,4)]+= 8*A_0000*w6 + 4*A_0101*w3;
640 EM_S[INDEX4(0,0,0,2,numEq,numComp,4)]+=-4*A_0000*w6 - 8*A_0101*w3;
641 EM_S[INDEX4(0,0,0,3,numEq,numComp,4)]+= 4*A_0000*w6 - 4*A_0101*w3;
642 EM_S[INDEX4(0,0,1,0,numEq,numComp,4)]+= 8*A_0000*w6 + 4*A_0101*w3;
643 EM_S[INDEX4(0,0,1,1,numEq,numComp,4)]+=-8*A_0000*w6 + 8*A_0101*w3;
644 EM_S[INDEX4(0,0,1,2,numEq,numComp,4)]+= 4*A_0000*w6 - 4*A_0101*w3;
645 EM_S[INDEX4(0,0,1,3,numEq,numComp,4)]+=-4*A_0000*w6 - 8*A_0101*w3;
646 EM_S[INDEX4(0,0,2,0,numEq,numComp,4)]+=-4*A_0000*w6 - 8*A_0101*w3;
647 EM_S[INDEX4(0,0,2,1,numEq,numComp,4)]+= 4*A_0000*w6 - 4*A_0101*w3;
648 EM_S[INDEX4(0,0,2,2,numEq,numComp,4)]+=-8*A_0000*w6 + 8*A_0101*w3;
649 EM_S[INDEX4(0,0,2,3,numEq,numComp,4)]+= 8*A_0000*w6 + 4*A_0101*w3;
650 EM_S[INDEX4(0,0,3,0,numEq,numComp,4)]+= 4*A_0000*w6 - 4*A_0101*w3;
651 EM_S[INDEX4(0,0,3,1,numEq,numComp,4)]+=-4*A_0000*w6 - 8*A_0101*w3;
652 EM_S[INDEX4(0,0,3,2,numEq,numComp,4)]+= 8*A_0000*w6 + 4*A_0101*w3;
653 EM_S[INDEX4(0,0,3,3,numEq,numComp,4)]+=-8*A_0000*w6 + 8*A_0101*w3;
654 EM_S[INDEX4(0,1,0,0,numEq,numComp,4)]+= tmp01_1;
655 EM_S[INDEX4(0,1,0,1,numEq,numComp,4)]+= tmp01_0;
656 EM_S[INDEX4(0,1,0,2,numEq,numComp,4)]+= tmp01_3;
657 EM_S[INDEX4(0,1,0,3,numEq,numComp,4)]+= tmp01_2;
658 EM_S[INDEX4(0,1,1,0,numEq,numComp,4)]+= tmp01_3;
659 EM_S[INDEX4(0,1,1,1,numEq,numComp,4)]+= tmp01_2;
660 EM_S[INDEX4(0,1,1,2,numEq,numComp,4)]+= tmp01_1;
661 EM_S[INDEX4(0,1,1,3,numEq,numComp,4)]+= tmp01_0;
662 EM_S[INDEX4(0,1,2,0,numEq,numComp,4)]+= tmp01_0;
663 EM_S[INDEX4(0,1,2,1,numEq,numComp,4)]+= tmp01_1;
664 EM_S[INDEX4(0,1,2,2,numEq,numComp,4)]+= tmp01_2;
665 EM_S[INDEX4(0,1,2,3,numEq,numComp,4)]+= tmp01_3;
666 EM_S[INDEX4(0,1,3,0,numEq,numComp,4)]+= tmp01_2;
667 EM_S[INDEX4(0,1,3,1,numEq,numComp,4)]+= tmp01_3;
668 EM_S[INDEX4(0,1,3,2,numEq,numComp,4)]+= tmp01_0;
669 EM_S[INDEX4(0,1,3,3,numEq,numComp,4)]+= tmp01_1;
670 EM_S[INDEX4(1,0,0,0,numEq,numComp,4)]+= tmp10_1;
671 EM_S[INDEX4(1,0,0,1,numEq,numComp,4)]+= tmp10_0;
672 EM_S[INDEX4(1,0,0,2,numEq,numComp,4)]+= tmp10_3;
673 EM_S[INDEX4(1,0,0,3,numEq,numComp,4)]+= tmp10_2;
674 EM_S[INDEX4(1,0,1,0,numEq,numComp,4)]+= tmp10_3;
675 EM_S[INDEX4(1,0,1,1,numEq,numComp,4)]+= tmp10_2;
676 EM_S[INDEX4(1,0,1,2,numEq,numComp,4)]+= tmp10_1;
677 EM_S[INDEX4(1,0,1,3,numEq,numComp,4)]+= tmp10_0;
678 EM_S[INDEX4(1,0,2,0,numEq,numComp,4)]+= tmp10_0;
679 EM_S[INDEX4(1,0,2,1,numEq,numComp,4)]+= tmp10_1;
680 EM_S[INDEX4(1,0,2,2,numEq,numComp,4)]+= tmp10_2;
681 EM_S[INDEX4(1,0,2,3,numEq,numComp,4)]+= tmp10_3;
682 EM_S[INDEX4(1,0,3,0,numEq,numComp,4)]+= tmp10_2;
683 EM_S[INDEX4(1,0,3,1,numEq,numComp,4)]+= tmp10_3;
684 EM_S[INDEX4(1,0,3,2,numEq,numComp,4)]+= tmp10_0;
685 EM_S[INDEX4(1,0,3,3,numEq,numComp,4)]+= tmp10_1;
686 EM_S[INDEX4(1,1,0,0,numEq,numComp,4)]+=-8*A_1010*w6 + 8*A_1111*w3;
687 EM_S[INDEX4(1,1,0,1,numEq,numComp,4)]+= 8*A_1010*w6 + 4*A_1111*w3;
688 EM_S[INDEX4(1,1,0,2,numEq,numComp,4)]+=-4*A_1010*w6 - 8*A_1111*w3;
689 EM_S[INDEX4(1,1,0,3,numEq,numComp,4)]+= 4*A_1010*w6 - 4*A_1111*w3;
690 EM_S[INDEX4(1,1,1,0,numEq,numComp,4)]+= 8*A_1010*w6 + 4*A_1111*w3;
691 EM_S[INDEX4(1,1,1,1,numEq,numComp,4)]+=-8*A_1010*w6 + 8*A_1111*w3;
692 EM_S[INDEX4(1,1,1,2,numEq,numComp,4)]+= 4*A_1010*w6 - 4*A_1111*w3;
693 EM_S[INDEX4(1,1,1,3,numEq,numComp,4)]+=-4*A_1010*w6 - 8*A_1111*w3;
694 EM_S[INDEX4(1,1,2,0,numEq,numComp,4)]+=-4*A_1010*w6 - 8*A_1111*w3;
695 EM_S[INDEX4(1,1,2,1,numEq,numComp,4)]+= 4*A_1010*w6 - 4*A_1111*w3;
696 EM_S[INDEX4(1,1,2,2,numEq,numComp,4)]+=-8*A_1010*w6 + 8*A_1111*w3;
697 EM_S[INDEX4(1,1,2,3,numEq,numComp,4)]+= 8*A_1010*w6 + 4*A_1111*w3;
698 EM_S[INDEX4(1,1,3,0,numEq,numComp,4)]+= 4*A_1010*w6 - 4*A_1111*w3;
699 EM_S[INDEX4(1,1,3,1,numEq,numComp,4)]+=-4*A_1010*w6 - 8*A_1111*w3;
700 EM_S[INDEX4(1,1,3,2,numEq,numComp,4)]+= 8*A_1010*w6 + 4*A_1111*w3;
701 EM_S[INDEX4(1,1,3,3,numEq,numComp,4)]+=-8*A_1010*w6 + 8*A_1111*w3;
702 }
703 }
704 ///////////////
705 // process B //
706 ///////////////
707 if (!B.isEmpty()) {
708 addEM_S=true;
709 const double* B_p=B.getSampleDataRO(e);
710 if (B.actsExpanded()) {
711 for (index_t k=0; k<numEq; k++) {
712 for (index_t m=0; m<numComp; m++) {
713 const double B_0_0 = B_p[INDEX4(k,0,m,0, numEq,2,numComp)];
714 const double B_1_0 = B_p[INDEX4(k,1,m,0, numEq,2,numComp)];
715 const double B_0_1 = B_p[INDEX4(k,0,m,1, numEq,2,numComp)];
716 const double B_1_1 = B_p[INDEX4(k,1,m,1, numEq,2,numComp)];
717 const double B_0_2 = B_p[INDEX4(k,0,m,2, numEq,2,numComp)];
718 const double B_1_2 = B_p[INDEX4(k,1,m,2, numEq,2,numComp)];
719 const double B_0_3 = B_p[INDEX4(k,0,m,3, numEq,2,numComp)];
720 const double B_1_3 = B_p[INDEX4(k,1,m,3, numEq,2,numComp)];
721 const double tmp0 = w11*(B_1_0 + B_1_1);
722 const double tmp1 = w14*(B_1_2 + B_1_3);
723 const double tmp2 = w15*(-B_0_1 - B_0_3);
724 const double tmp3 = w10*(-B_0_0 - B_0_2);
725 const double tmp4 = w11*(B_1_2 + B_1_3);
726 const double tmp5 = w14*(B_1_0 + B_1_1);
727 const double tmp6 = w11*(-B_1_2 - B_1_3);
728 const double tmp7 = w14*(-B_1_0 - B_1_1);
729 const double tmp8 = w11*(-B_1_0 - B_1_1);
730 const double tmp9 = w14*(-B_1_2 - B_1_3);
731 const double tmp10 = w10*(-B_0_1 - B_0_3);
732 const double tmp11 = w15*(-B_0_0 - B_0_2);
733 const double tmp12 = w15*(B_0_0 + B_0_2);
734 const double tmp13 = w10*(B_0_1 + B_0_3);
735 const double tmp14 = w10*(B_0_0 + B_0_2);
736 const double tmp15 = w15*(B_0_1 + B_0_3);
737 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=B_0_0*w12 + B_0_1*w10 + B_0_2*w15 + B_0_3*w13 + B_1_0*w16 + B_1_1*w14 + B_1_2*w11 + B_1_3*w17;
738 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=B_0_0*w10 + B_0_1*w12 + B_0_2*w13 + B_0_3*w15 + tmp0 + tmp1;
739 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=B_1_0*w11 + B_1_1*w17 + B_1_2*w16 + B_1_3*w14 + tmp14 + tmp15;
740 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp12 + tmp13 + tmp4 + tmp5;
741 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=-B_0_0*w12 - B_0_1*w10 - B_0_2*w15 - B_0_3*w13 + tmp0 + tmp1;
742 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-B_0_0*w10 - B_0_1*w12 - B_0_2*w13 - B_0_3*w15 + B_1_0*w14 + B_1_1*w16 + B_1_2*w17 + B_1_3*w11;
743 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2 + tmp3 + tmp4 + tmp5;
744 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=B_1_0*w17 + B_1_1*w11 + B_1_2*w14 + B_1_3*w16 + tmp10 + tmp11;
745 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=-B_1_0*w16 - B_1_1*w14 - B_1_2*w11 - B_1_3*w17 + tmp14 + tmp15;
746 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp12 + tmp13 + tmp8 + tmp9;
747 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=B_0_0*w15 + B_0_1*w13 + B_0_2*w12 + B_0_3*w10 - B_1_0*w11 - B_1_1*w17 - B_1_2*w16 - B_1_3*w14;
748 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=B_0_0*w13 + B_0_1*w15 + B_0_2*w10 + B_0_3*w12 + tmp6 + tmp7;
749 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2 + tmp3 + tmp8 + tmp9;
750 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=-B_1_0*w14 - B_1_1*w16 - B_1_2*w17 - B_1_3*w11 + tmp10 + tmp11;
751 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=-B_0_0*w15 - B_0_1*w13 - B_0_2*w12 - B_0_3*w10 + tmp6 + tmp7;
752 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-B_0_0*w13 - B_0_1*w15 - B_0_2*w10 - B_0_3*w12 - B_1_0*w17 - B_1_1*w11 - B_1_2*w14 - B_1_3*w16;
753 }
754 }
755 } else { // constant data
756 for (index_t k=0; k<numEq; k++) {
757 for (index_t m=0; m<numComp; m++) {
758 const double wB0 = B_p[INDEX3(k,0,m,numEq,2)]*w18;
759 const double wB1 = B_p[INDEX3(k,1,m,numEq,2)]*w19;
760 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+= 2*wB0 + 2*wB1;
761 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+= 2*wB0 + wB1;
762 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+= wB0 + 2*wB1;
763 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+= wB0 + wB1;
764 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=-2*wB0 + wB1;
765 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-2*wB0 + 2*wB1;
766 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+= -wB0 + wB1;
767 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+= -wB0 + 2*wB1;
768 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+= wB0 - 2*wB1;
769 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+= wB0 - wB1;
770 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+= 2*wB0 - 2*wB1;
771 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+= 2*wB0 - wB1;
772 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+= -wB0 - wB1;
773 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+= -wB0 - 2*wB1;
774 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=-2*wB0 - wB1;
775 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-2*wB0 - 2*wB1;
776 }
777 }
778 }
779 }
780 ///////////////
781 // process C //
782 ///////////////
783 if (!C.isEmpty()) {
784 addEM_S=true;
785 const double* C_p=C.getSampleDataRO(e);
786 if (C.actsExpanded()) {
787 for (index_t k=0; k<numEq; k++) {
788 for (index_t m=0; m<numComp; m++) {
789 const double C_0_0 = C_p[INDEX4(k,m,0, 0, numEq,numComp,2)];
790 const double C_1_0 = C_p[INDEX4(k,m,1, 0, numEq,numComp,2)];
791 const double C_0_1 = C_p[INDEX4(k,m,0, 1, numEq,numComp,2)];
792 const double C_1_1 = C_p[INDEX4(k,m,1, 1, numEq,numComp,2)];
793 const double C_0_2 = C_p[INDEX4(k,m,0, 2, numEq,numComp,2)];
794 const double C_1_2 = C_p[INDEX4(k,m,1, 2, numEq,numComp,2)];
795 const double C_0_3 = C_p[INDEX4(k,m,0, 3, numEq,numComp,2)];
796 const double C_1_3 = C_p[INDEX4(k,m,1, 3, numEq,numComp,2)];
797 const double tmp0 = w11*(C_1_0 + C_1_1);
798 const double tmp1 = w14*(C_1_2 + C_1_3);
799 const double tmp2 = w15*(C_0_0 + C_0_2);
800 const double tmp3 = w10*(C_0_1 + C_0_3);
801 const double tmp4 = w11*(-C_1_0 - C_1_1);
802 const double tmp5 = w14*(-C_1_2 - C_1_3);
803 const double tmp6 = w11*(-C_1_2 - C_1_3);
804 const double tmp7 = w14*(-C_1_0 - C_1_1);
805 const double tmp8 = w11*(C_1_2 + C_1_3);
806 const double tmp9 = w14*(C_1_0 + C_1_1);
807 const double tmp10 = w10*(-C_0_1 - C_0_3);
808 const double tmp11 = w15*(-C_0_0 - C_0_2);
809 const double tmp12 = w15*(-C_0_1 - C_0_3);
810 const double tmp13 = w10*(-C_0_0 - C_0_2);
811 const double tmp14 = w10*(C_0_0 + C_0_2);
812 const double tmp15 = w15*(C_0_1 + C_0_3);
813 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=C_0_0*w12 + C_0_1*w10 + C_0_2*w15 + C_0_3*w13 + C_1_0*w16 + C_1_1*w14 + C_1_2*w11 + C_1_3*w17;
814 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=-C_0_0*w12 - C_0_1*w10 - C_0_2*w15 - C_0_3*w13 + tmp0 + tmp1;
815 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=-C_1_0*w16 - C_1_1*w14 - C_1_2*w11 - C_1_3*w17 + tmp14 + tmp15;
816 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp12 + tmp13 + tmp4 + tmp5;
817 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=C_0_0*w10 + C_0_1*w12 + C_0_2*w13 + C_0_3*w15 + tmp0 + tmp1;
818 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-C_0_0*w10 - C_0_1*w12 - C_0_2*w13 - C_0_3*w15 + C_1_0*w14 + C_1_1*w16 + C_1_2*w17 + C_1_3*w11;
819 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2 + tmp3 + tmp4 + tmp5;
820 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=-C_1_0*w14 - C_1_1*w16 - C_1_2*w17 - C_1_3*w11 + tmp10 + tmp11;
821 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=C_1_0*w11 + C_1_1*w17 + C_1_2*w16 + C_1_3*w14 + tmp14 + tmp15;
822 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp12 + tmp13 + tmp8 + tmp9;
823 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=C_0_0*w15 + C_0_1*w13 + C_0_2*w12 + C_0_3*w10 - C_1_0*w11 - C_1_1*w17 - C_1_2*w16 - C_1_3*w14;
824 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=-C_0_0*w15 - C_0_1*w13 - C_0_2*w12 - C_0_3*w10 + tmp6 + tmp7;
825 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2 + tmp3 + tmp8 + tmp9;
826 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=C_1_0*w17 + C_1_1*w11 + C_1_2*w14 + C_1_3*w16 + tmp10 + tmp11;
827 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=C_0_0*w13 + C_0_1*w15 + C_0_2*w10 + C_0_3*w12 + tmp6 + tmp7;
828 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-C_0_0*w13 - C_0_1*w15 - C_0_2*w10 - C_0_3*w12 - C_1_0*w17 - C_1_1*w11 - C_1_2*w14 - C_1_3*w16;
829 }
830 }
831 } else { // constant data
832 for (index_t k=0; k<numEq; k++) {
833 for (index_t m=0; m<numComp; m++) {
834 const double wC0 = C_p[INDEX3(k,m,0,numEq,numComp)]*w18;
835 const double wC1 = C_p[INDEX3(k,m,1,numEq,numComp)]*w19;
836 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+= 2*wC0 + 2*wC1;
837 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=-2*wC0 + wC1;
838 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+= wC0 - 2*wC1;
839 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+= -wC0 - wC1;
840 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+= 2*wC0 + wC1;
841 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=-2*wC0 + 2*wC1;
842 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+= wC0 - wC1;
843 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+= -wC0 - 2*wC1;
844 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+= wC0 + 2*wC1;
845 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+= -wC0 + wC1;
846 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+= 2*wC0 - 2*wC1;
847 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=-2*wC0 - wC1;
848 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+= wC0 + wC1;
849 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+= -wC0 + 2*wC1;
850 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+= 2*wC0 - wC1;
851 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=-2*wC0 - 2*wC1;
852 }
853 }
854 }
855 }
856 ///////////////
857 // process D //
858 ///////////////
859 if (!D.isEmpty()) {
860 addEM_S=true;
861 const double* D_p=D.getSampleDataRO(e);
862 if (D.actsExpanded()) {
863 for (index_t k=0; k<numEq; k++) {
864 for (index_t m=0; m<numComp; m++) {
865 const double D_0 = D_p[INDEX3(k,m,0,numEq,numComp)];
866 const double D_1 = D_p[INDEX3(k,m,1,numEq,numComp)];
867 const double D_2 = D_p[INDEX3(k,m,2,numEq,numComp)];
868 const double D_3 = D_p[INDEX3(k,m,3,numEq,numComp)];
869 const double tmp0 = w21*(D_2 + D_3);
870 const double tmp1 = w20*(D_0 + D_1);
871 const double tmp2 = w22*(D_0 + D_1 + D_2 + D_3);
872 const double tmp3 = w21*(D_0 + D_1);
873 const double tmp4 = w20*(D_2 + D_3);
874 const double tmp5 = w22*(D_1 + D_2);
875 const double tmp6 = w21*(D_0 + D_2);
876 const double tmp7 = w20*(D_1 + D_3);
877 const double tmp8 = w21*(D_1 + D_3);
878 const double tmp9 = w20*(D_0 + D_2);
879 const double tmp10 = w22*(D_0 + D_3);
880 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=D_0*w23 + D_3*w24 + tmp5;
881 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0 + tmp1;
882 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp8 + tmp9;
883 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp2;
884 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0 + tmp1;
885 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=D_1*w23 + D_2*w24 + tmp10;
886 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2;
887 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp6 + tmp7;
888 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp8 + tmp9;
889 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp2;
890 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=D_1*w24 + D_2*w23 + tmp10;
891 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp3 + tmp4;
892 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2;
893 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp6 + tmp7;
894 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp3 + tmp4;
895 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=D_0*w24 + D_3*w23 + tmp5;
896 }
897 }
898 } else { // constant data
899 for (index_t k=0; k<numEq; k++) {
900 for (index_t m=0; m<numComp; m++) {
901 const double D_0 = D_p[INDEX2(k, m, numEq)];
902 EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=16*D_0*w22;
903 EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=8*D_0*w22;
904 EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=8*D_0*w22;
905 EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=4*D_0*w22;
906 EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=8*D_0*w22;
907 EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=16*D_0*w22;
908 EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=4*D_0*w22;
909 EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=8*D_0*w22;
910 EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=8*D_0*w22;
911 EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=4*D_0*w22;
912 EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=16*D_0*w22;
913 EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=8*D_0*w22;
914 EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=4*D_0*w22;
915 EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=8*D_0*w22;
916 EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=8*D_0*w22;
917 EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=16*D_0*w22;
918 }
919 }
920 }
921 }
922 ///////////////
923 // process X //
924 ///////////////
925 if (!X.isEmpty()) {
926 addEM_F=true;
927 const double* X_p=X.getSampleDataRO(e);
928 if (X.actsExpanded()) {
929 for (index_t k=0; k<numEq; k++) {
930 const double X_0_0 = X_p[INDEX3(k,0,0,numEq,2)];
931 const double X_1_0 = X_p[INDEX3(k,1,0,numEq,2)];
932 const double X_0_1 = X_p[INDEX3(k,0,1,numEq,2)];
933 const double X_1_1 = X_p[INDEX3(k,1,1,numEq,2)];
934 const double X_0_2 = X_p[INDEX3(k,0,2,numEq,2)];
935 const double X_1_2 = X_p[INDEX3(k,1,2,numEq,2)];
936 const double X_0_3 = X_p[INDEX3(k,0,3,numEq,2)];
937 const double X_1_3 = X_p[INDEX3(k,1,3,numEq,2)];
938 const double tmp0 = 6*w15*(X_0_2 + X_0_3);
939 const double tmp1 = 6*w10*(X_0_0 + X_0_1);
940 const double tmp2 = 6*w11*(X_1_0 + X_1_2);
941 const double tmp3 = 6*w14*(X_1_1 + X_1_3);
942 const double tmp4 = 6*w11*(X_1_1 + X_1_3);
943 const double tmp5 = w25*(X_0_0 + X_0_1);
944 const double tmp6 = w26*(X_0_2 + X_0_3);
945 const double tmp7 = 6*w14*(X_1_0 + X_1_2);
946 const double tmp8 = w27*(X_1_0 + X_1_2);
947 const double tmp9 = w28*(X_1_1 + X_1_3);
948 const double tmp10 = w25*(-X_0_2 - X_0_3);
949 const double tmp11 = w26*(-X_0_0 - X_0_1);
950 const double tmp12 = w27*(X_1_1 + X_1_3);
951 const double tmp13 = w28*(X_1_0 + X_1_2);
952 const double tmp14 = w25*(X_0_2 + X_0_3);
953 const double tmp15 = w26*(X_0_0 + X_0_1);
954 EM_F[INDEX2(k,0,numEq)]+=tmp0 + tmp1 + tmp2 + tmp3;
955 EM_F[INDEX2(k,1,numEq)]+=tmp4 + tmp5 + tmp6 + tmp7;
956 EM_F[INDEX2(k,2,numEq)]+=tmp10 + tmp11 + tmp8 + tmp9;
957 EM_F[INDEX2(k,3,numEq)]+=tmp12 + tmp13 + tmp14 + tmp15;
958 }
959 } else { // constant data
960 for (index_t k=0; k<numEq; k++) {
961 const double wX0 = X_p[INDEX2(k, 0, numEq)]*w18;
962 const double wX1 = X_p[INDEX2(k, 1, numEq)]*w19;
963 EM_F[INDEX2(k,0,numEq)]+= 6*wX0 + 6*wX1;
964 EM_F[INDEX2(k,1,numEq)]+=-6*wX0 + 6*wX1;
965 EM_F[INDEX2(k,2,numEq)]+= 6*wX0 - 6*wX1;
966 EM_F[INDEX2(k,3,numEq)]+=-6*wX0 - 6*wX1;
967 }
968 }
969 }
970 ///////////////
971 // process Y //
972 ///////////////
973 if (!Y.isEmpty()) {
974 addEM_F=true;
975 const double* Y_p=Y.getSampleDataRO(e);
976 if (Y.actsExpanded()) {
977 for (index_t k=0; k<numEq; k++) {
978 const double Y_0 = Y_p[INDEX2(k, 0, numEq)];
979 const double Y_1 = Y_p[INDEX2(k, 1, numEq)];
980 const double Y_2 = Y_p[INDEX2(k, 2, numEq)];
981 const double Y_3 = Y_p[INDEX2(k, 3, numEq)];
982 const double tmp0 = 6*w22*(Y_1 + Y_2);
983 const double tmp1 = 6*w22*(Y_0 + Y_3);
984 EM_F[INDEX2(k,0,numEq)]+=6*Y_0*w20 + 6*Y_3*w21 + tmp0;
985 EM_F[INDEX2(k,1,numEq)]+=6*Y_1*w20 + 6*Y_2*w21 + tmp1;
986 EM_F[INDEX2(k,2,numEq)]+=6*Y_1*w21 + 6*Y_2*w20 + tmp1;
987 EM_F[INDEX2(k,3,numEq)]+=6*Y_0*w21 + 6*Y_3*w20 + tmp0;
988 }
989 } else { // constant data
990 for (index_t k=0; k<numEq; k++) {
991 EM_F[INDEX2(k,0,numEq)]+=36*Y_p[k]*w22;
992 EM_F[INDEX2(k,1,numEq)]+=36*Y_p[k]*w22;
993 EM_F[INDEX2(k,2,numEq)]+=36*Y_p[k]*w22;
994 EM_F[INDEX2(k,3,numEq)]+=36*Y_p[k]*w22;
995 }
996 }
997 }
998
999 // add to matrix (if addEM_S) and RHS (if addEM_F)
1000 const index_t firstNode=m_NN[0]*k1+k0;
1001 domain->addToMatrixAndRHS(mat, rhs, EM_S, EM_F, addEM_S,
1002 addEM_F, firstNode, numEq, numComp);
1003 } // end k0 loop
1004 } // end k1 loop
1005 } // end of colouring
1006 } // end of parallel region
1007 }
1008
1009 } // namespace ripley
1010

  ViewVC Help
Powered by ViewVC 1.1.26