13 |
|
|
14 |
#include <ripley/Rectangle.h> |
#include <ripley/Rectangle.h> |
15 |
extern "C" { |
extern "C" { |
16 |
#include "paso/SystemMatrixPattern.h" |
#include "paso/SystemMatrix.h" |
17 |
} |
} |
18 |
|
|
19 |
#if USE_SILO |
#if USE_SILO |
226 |
{ |
{ |
227 |
switch (fsType) { |
switch (fsType) { |
228 |
case Nodes: |
case Nodes: |
229 |
|
case ReducedNodes: //FIXME: reduced |
230 |
return &m_nodeId[0]; |
return &m_nodeId[0]; |
231 |
case Elements: |
case Elements: |
232 |
case ReducedElements: |
case ReducedElements: |
728 |
} |
} |
729 |
} |
} |
730 |
|
|
|
void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat, |
|
|
escript::Data& rhs, const escript::Data& A, const escript::Data& B, |
|
|
const escript::Data& C, const escript::Data& D, |
|
|
const escript::Data& X, const escript::Data& Y, |
|
|
const escript::Data& d, const escript::Data& y, |
|
|
const escript::Data& d_contact, const escript::Data& y_contact, |
|
|
const escript::Data& d_dirac, const escript::Data& y_dirac) const |
|
|
{ |
|
|
throw RipleyException("addPDEToSystem() not implemented"); |
|
|
} |
|
|
|
|
731 |
Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder, |
Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder, |
732 |
bool reducedColOrder) const |
bool reducedColOrder) const |
733 |
{ |
{ |
852 |
for (index_t i=0; i<numDOF; i++) { |
for (index_t i=0; i<numDOF; i++) { |
853 |
// always add the node itself |
// always add the node itself |
854 |
index.push_back(i); |
index.push_back(i); |
855 |
int num=insertNeighbours(index, i); |
const int num=insertNeighbours(index, i); |
856 |
ptr.push_back(ptr.back()+num+1); |
ptr.push_back(ptr.back()+num+1); |
857 |
} |
} |
858 |
M=N=ptr.size()-1; |
M=N=ptr.size()-1; |
1509 |
} |
} |
1510 |
} |
} |
1511 |
|
|
1512 |
|
//protected |
1513 |
|
void Rectangle::assemblePDESingle(Paso_SystemMatrix* mat, |
1514 |
|
escript::Data& rhs, const escript::Data& A, const escript::Data& B, |
1515 |
|
const escript::Data& C, const escript::Data& D, |
1516 |
|
const escript::Data& X, const escript::Data& Y, |
1517 |
|
const escript::Data& d, const escript::Data& y) const |
1518 |
|
{ |
1519 |
|
const double h0 = m_l0/m_gNE0; |
1520 |
|
const double h1 = m_l1/m_gNE1; |
1521 |
|
/* GENERATOR SNIP_PDE_SINGLE_PRE TOP */ |
1522 |
|
const double w0 = -0.1555021169820365539*h1/h0; |
1523 |
|
const double w1 = 0.041666666666666666667; |
1524 |
|
const double w10 = -0.041666666666666666667*h0/h1; |
1525 |
|
const double w11 = 0.1555021169820365539*h1/h0; |
1526 |
|
const double w12 = 0.1555021169820365539*h0/h1; |
1527 |
|
const double w13 = 0.01116454968463011277*h0/h1; |
1528 |
|
const double w14 = 0.01116454968463011277*h1/h0; |
1529 |
|
const double w15 = 0.041666666666666666667*h1/h0; |
1530 |
|
const double w16 = -0.01116454968463011277*h0/h1; |
1531 |
|
const double w17 = -0.1555021169820365539*h0/h1; |
1532 |
|
const double w18 = -0.33333333333333333333*h1/h0; |
1533 |
|
const double w19 = 0.25000000000000000000; |
1534 |
|
const double w2 = -0.15550211698203655390; |
1535 |
|
const double w20 = -0.25000000000000000000; |
1536 |
|
const double w21 = 0.16666666666666666667*h0/h1; |
1537 |
|
const double w22 = -0.16666666666666666667*h1/h0; |
1538 |
|
const double w23 = -0.16666666666666666667*h0/h1; |
1539 |
|
const double w24 = 0.33333333333333333333*h1/h0; |
1540 |
|
const double w25 = 0.33333333333333333333*h0/h1; |
1541 |
|
const double w26 = 0.16666666666666666667*h1/h0; |
1542 |
|
const double w27 = -0.33333333333333333333*h0/h1; |
1543 |
|
const double w28 = -0.032861463941450536761*h1; |
1544 |
|
const double w29 = -0.032861463941450536761*h0; |
1545 |
|
const double w3 = 0.041666666666666666667*h0/h1; |
1546 |
|
const double w30 = -0.12264065304058601714*h1; |
1547 |
|
const double w31 = -0.0023593469594139828636*h1; |
1548 |
|
const double w32 = -0.008805202725216129906*h0; |
1549 |
|
const double w33 = -0.008805202725216129906*h1; |
1550 |
|
const double w34 = 0.032861463941450536761*h1; |
1551 |
|
const double w35 = 0.008805202725216129906*h1; |
1552 |
|
const double w36 = 0.008805202725216129906*h0; |
1553 |
|
const double w37 = 0.0023593469594139828636*h1; |
1554 |
|
const double w38 = 0.12264065304058601714*h1; |
1555 |
|
const double w39 = 0.032861463941450536761*h0; |
1556 |
|
const double w4 = 0.15550211698203655390; |
1557 |
|
const double w40 = -0.12264065304058601714*h0; |
1558 |
|
const double w41 = -0.0023593469594139828636*h0; |
1559 |
|
const double w42 = 0.0023593469594139828636*h0; |
1560 |
|
const double w43 = 0.12264065304058601714*h0; |
1561 |
|
const double w44 = -0.16666666666666666667*h1; |
1562 |
|
const double w45 = -0.083333333333333333333*h0; |
1563 |
|
const double w46 = 0.083333333333333333333*h1; |
1564 |
|
const double w47 = 0.16666666666666666667*h1; |
1565 |
|
const double w48 = 0.083333333333333333333*h0; |
1566 |
|
const double w49 = -0.16666666666666666667*h0; |
1567 |
|
const double w5 = -0.041666666666666666667; |
1568 |
|
const double w50 = 0.16666666666666666667*h0; |
1569 |
|
const double w51 = -0.083333333333333333333*h1; |
1570 |
|
const double w52 = 0.025917019497006092316*h0*h1; |
1571 |
|
const double w53 = 0.0018607582807716854616*h0*h1; |
1572 |
|
const double w54 = 0.0069444444444444444444*h0*h1; |
1573 |
|
const double w55 = 0.09672363354357992482*h0*h1; |
1574 |
|
const double w56 = 0.00049858867864229740201*h0*h1; |
1575 |
|
const double w57 = 0.055555555555555555556*h0*h1; |
1576 |
|
const double w58 = 0.027777777777777777778*h0*h1; |
1577 |
|
const double w59 = 0.11111111111111111111*h0*h1; |
1578 |
|
const double w6 = -0.01116454968463011277*h1/h0; |
1579 |
|
const double w60 = -0.19716878364870322056*h1; |
1580 |
|
const double w61 = -0.19716878364870322056*h0; |
1581 |
|
const double w62 = -0.052831216351296779436*h0; |
1582 |
|
const double w63 = -0.052831216351296779436*h1; |
1583 |
|
const double w64 = 0.19716878364870322056*h1; |
1584 |
|
const double w65 = 0.052831216351296779436*h1; |
1585 |
|
const double w66 = 0.19716878364870322056*h0; |
1586 |
|
const double w67 = 0.052831216351296779436*h0; |
1587 |
|
const double w68 = -0.5*h1; |
1588 |
|
const double w69 = -0.5*h0; |
1589 |
|
const double w7 = 0.011164549684630112770; |
1590 |
|
const double w70 = 0.5*h1; |
1591 |
|
const double w71 = 0.5*h0; |
1592 |
|
const double w72 = 0.1555021169820365539*h0*h1; |
1593 |
|
const double w73 = 0.041666666666666666667*h0*h1; |
1594 |
|
const double w74 = 0.01116454968463011277*h0*h1; |
1595 |
|
const double w75 = 0.25*h0*h1; |
1596 |
|
const double w8 = -0.011164549684630112770; |
1597 |
|
const double w9 = -0.041666666666666666667*h1/h0; |
1598 |
|
/* GENERATOR SNIP_PDE_SINGLE_PRE BOTTOM */ |
1599 |
|
|
1600 |
|
rhs.requireWrite(); |
1601 |
|
#pragma omp parallel |
1602 |
|
{ |
1603 |
|
for (index_t k1_0=0; k1_0<2; k1_0++) { // coloring |
1604 |
|
#pragma omp for |
1605 |
|
for (index_t k1=k1_0; k1<m_NE1; k1+=2) { |
1606 |
|
for (index_t k0=0; k0<m_NE0; ++k0) { |
1607 |
|
bool add_EM_S=false; |
1608 |
|
bool add_EM_F=false; |
1609 |
|
vector<double> EM_S(4*4, 0); |
1610 |
|
vector<double> EM_F(4, 0); |
1611 |
|
const index_t e = k0 + m_NE0*k1; |
1612 |
|
/* GENERATOR SNIP_PDE_SINGLE TOP */ |
1613 |
|
/////////////// |
1614 |
|
// process A // |
1615 |
|
/////////////// |
1616 |
|
if (!A.isEmpty()) { |
1617 |
|
add_EM_S=true; |
1618 |
|
const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); |
1619 |
|
if (A.actsExpanded()) { |
1620 |
|
const register double A_00_0 = A_p[INDEX3(0,0,0,2,2)]; |
1621 |
|
const register double A_01_0 = A_p[INDEX3(0,1,0,2,2)]; |
1622 |
|
const register double A_10_0 = A_p[INDEX3(1,0,0,2,2)]; |
1623 |
|
const register double A_11_0 = A_p[INDEX3(1,1,0,2,2)]; |
1624 |
|
const register double A_00_1 = A_p[INDEX3(0,0,1,2,2)]; |
1625 |
|
const register double A_01_1 = A_p[INDEX3(0,1,1,2,2)]; |
1626 |
|
const register double A_10_1 = A_p[INDEX3(1,0,1,2,2)]; |
1627 |
|
const register double A_11_1 = A_p[INDEX3(1,1,1,2,2)]; |
1628 |
|
const register double A_00_2 = A_p[INDEX3(0,0,2,2,2)]; |
1629 |
|
const register double A_01_2 = A_p[INDEX3(0,1,2,2,2)]; |
1630 |
|
const register double A_10_2 = A_p[INDEX3(1,0,2,2,2)]; |
1631 |
|
const register double A_11_2 = A_p[INDEX3(1,1,2,2,2)]; |
1632 |
|
const register double A_00_3 = A_p[INDEX3(0,0,3,2,2)]; |
1633 |
|
const register double A_01_3 = A_p[INDEX3(0,1,3,2,2)]; |
1634 |
|
const register double A_10_3 = A_p[INDEX3(1,0,3,2,2)]; |
1635 |
|
const register double A_11_3 = A_p[INDEX3(1,1,3,2,2)]; |
1636 |
|
const register double tmp4_0 = A_10_1 + A_10_2; |
1637 |
|
const register double tmp12_0 = A_11_0 + A_11_2; |
1638 |
|
const register double tmp2_0 = A_11_0 + A_11_1 + A_11_2 + A_11_3; |
1639 |
|
const register double tmp10_0 = A_01_3 + A_10_3; |
1640 |
|
const register double tmp14_0 = A_01_0 + A_01_3 + A_10_0 + A_10_3; |
1641 |
|
const register double tmp0_0 = A_01_0 + A_01_3; |
1642 |
|
const register double tmp13_0 = A_01_2 + A_10_1; |
1643 |
|
const register double tmp3_0 = A_00_2 + A_00_3; |
1644 |
|
const register double tmp11_0 = A_11_1 + A_11_3; |
1645 |
|
const register double tmp18_0 = A_01_1 + A_10_1; |
1646 |
|
const register double tmp1_0 = A_00_0 + A_00_1; |
1647 |
|
const register double tmp15_0 = A_01_1 + A_10_2; |
1648 |
|
const register double tmp5_0 = A_00_0 + A_00_1 + A_00_2 + A_00_3; |
1649 |
|
const register double tmp16_0 = A_10_0 + A_10_3; |
1650 |
|
const register double tmp6_0 = A_01_3 + A_10_0; |
1651 |
|
const register double tmp17_0 = A_01_1 + A_01_2; |
1652 |
|
const register double tmp9_0 = A_01_0 + A_10_0; |
1653 |
|
const register double tmp7_0 = A_01_0 + A_10_3; |
1654 |
|
const register double tmp8_0 = A_01_1 + A_01_2 + A_10_1 + A_10_2; |
1655 |
|
const register double tmp19_0 = A_01_2 + A_10_2; |
1656 |
|
const register double tmp14_1 = A_10_0*w8; |
1657 |
|
const register double tmp23_1 = tmp3_0*w14; |
1658 |
|
const register double tmp35_1 = A_01_0*w8; |
1659 |
|
const register double tmp54_1 = tmp13_0*w8; |
1660 |
|
const register double tmp20_1 = tmp9_0*w4; |
1661 |
|
const register double tmp25_1 = tmp12_0*w12; |
1662 |
|
const register double tmp2_1 = A_01_1*w4; |
1663 |
|
const register double tmp44_1 = tmp7_0*w7; |
1664 |
|
const register double tmp26_1 = tmp10_0*w4; |
1665 |
|
const register double tmp52_1 = tmp18_0*w8; |
1666 |
|
const register double tmp48_1 = A_10_1*w7; |
1667 |
|
const register double tmp46_1 = A_01_3*w8; |
1668 |
|
const register double tmp50_1 = A_01_0*w2; |
1669 |
|
const register double tmp8_1 = tmp4_0*w5; |
1670 |
|
const register double tmp56_1 = tmp19_0*w8; |
1671 |
|
const register double tmp9_1 = tmp2_0*w10; |
1672 |
|
const register double tmp19_1 = A_10_3*w2; |
1673 |
|
const register double tmp47_1 = A_10_2*w4; |
1674 |
|
const register double tmp16_1 = tmp3_0*w0; |
1675 |
|
const register double tmp18_1 = tmp1_0*w6; |
1676 |
|
const register double tmp31_1 = tmp11_0*w12; |
1677 |
|
const register double tmp55_1 = tmp15_0*w2; |
1678 |
|
const register double tmp39_1 = A_10_2*w7; |
1679 |
|
const register double tmp11_1 = tmp6_0*w7; |
1680 |
|
const register double tmp40_1 = tmp11_0*w17; |
1681 |
|
const register double tmp34_1 = tmp15_0*w8; |
1682 |
|
const register double tmp33_1 = tmp14_0*w5; |
1683 |
|
const register double tmp24_1 = tmp11_0*w13; |
1684 |
|
const register double tmp3_1 = tmp1_0*w0; |
1685 |
|
const register double tmp5_1 = tmp2_0*w3; |
1686 |
|
const register double tmp43_1 = tmp17_0*w5; |
1687 |
|
const register double tmp15_1 = A_01_2*w4; |
1688 |
|
const register double tmp53_1 = tmp19_0*w2; |
1689 |
|
const register double tmp27_1 = tmp3_0*w11; |
1690 |
|
const register double tmp32_1 = tmp13_0*w2; |
1691 |
|
const register double tmp10_1 = tmp5_0*w9; |
1692 |
|
const register double tmp37_1 = A_10_1*w4; |
1693 |
|
const register double tmp38_1 = tmp5_0*w15; |
1694 |
|
const register double tmp17_1 = A_01_1*w7; |
1695 |
|
const register double tmp12_1 = tmp7_0*w4; |
1696 |
|
const register double tmp22_1 = tmp10_0*w7; |
1697 |
|
const register double tmp57_1 = tmp18_0*w2; |
1698 |
|
const register double tmp28_1 = tmp9_0*w7; |
1699 |
|
const register double tmp29_1 = tmp1_0*w14; |
1700 |
|
const register double tmp51_1 = tmp11_0*w16; |
1701 |
|
const register double tmp42_1 = tmp12_0*w16; |
1702 |
|
const register double tmp49_1 = tmp12_0*w17; |
1703 |
|
const register double tmp21_1 = tmp1_0*w11; |
1704 |
|
const register double tmp1_1 = tmp0_0*w1; |
1705 |
|
const register double tmp45_1 = tmp6_0*w4; |
1706 |
|
const register double tmp7_1 = A_10_0*w2; |
1707 |
|
const register double tmp6_1 = tmp3_0*w6; |
1708 |
|
const register double tmp13_1 = tmp8_0*w1; |
1709 |
|
const register double tmp36_1 = tmp16_0*w1; |
1710 |
|
const register double tmp41_1 = A_01_3*w2; |
1711 |
|
const register double tmp30_1 = tmp12_0*w13; |
1712 |
|
const register double tmp4_1 = A_01_2*w7; |
1713 |
|
const register double tmp0_1 = A_10_3*w8; |
1714 |
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1 + tmp8_1; |
1715 |
|
EM_S[INDEX2(1,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp9_1; |
1716 |
|
EM_S[INDEX2(3,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp1_1 + tmp5_1 + tmp8_1; |
1717 |
|
EM_S[INDEX2(0,0,4)]+=tmp13_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1 + tmp24_1 + tmp25_1; |
1718 |
|
EM_S[INDEX2(3,3,4)]+=tmp13_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
1719 |
|
EM_S[INDEX2(3,0,4)]+=tmp10_1 + tmp32_1 + tmp33_1 + tmp34_1 + tmp9_1; |
1720 |
|
EM_S[INDEX2(3,1,4)]+=tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1 + tmp40_1 + tmp41_1 + tmp42_1 + tmp43_1; |
1721 |
|
EM_S[INDEX2(2,1,4)]+=tmp10_1 + tmp13_1 + tmp44_1 + tmp45_1 + tmp9_1; |
1722 |
|
EM_S[INDEX2(0,2,4)]+=tmp36_1 + tmp38_1 + tmp43_1 + tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
1723 |
|
EM_S[INDEX2(2,0,4)]+=tmp0_1 + tmp15_1 + tmp17_1 + tmp1_1 + tmp38_1 + tmp49_1 + tmp51_1 + tmp7_1 + tmp8_1; |
1724 |
|
EM_S[INDEX2(1,3,4)]+=tmp14_1 + tmp19_1 + tmp1_1 + tmp2_1 + tmp38_1 + tmp40_1 + tmp42_1 + tmp4_1 + tmp8_1; |
1725 |
|
EM_S[INDEX2(2,3,4)]+=tmp16_1 + tmp18_1 + tmp35_1 + tmp36_1 + tmp41_1 + tmp43_1 + tmp47_1 + tmp48_1 + tmp5_1; |
1726 |
|
EM_S[INDEX2(2,2,4)]+=tmp24_1 + tmp25_1 + tmp27_1 + tmp29_1 + tmp33_1 + tmp52_1 + tmp53_1; |
1727 |
|
EM_S[INDEX2(1,0,4)]+=tmp36_1 + tmp37_1 + tmp39_1 + tmp3_1 + tmp43_1 + tmp46_1 + tmp50_1 + tmp5_1 + tmp6_1; |
1728 |
|
EM_S[INDEX2(0,3,4)]+=tmp10_1 + tmp33_1 + tmp54_1 + tmp55_1 + tmp9_1; |
1729 |
|
EM_S[INDEX2(1,1,4)]+=tmp21_1 + tmp23_1 + tmp30_1 + tmp31_1 + tmp33_1 + tmp56_1 + tmp57_1; |
1730 |
|
} else { /* constant data */ |
1731 |
|
const register double A_00 = A_p[INDEX2(0,0,2)]; |
1732 |
|
const register double A_01 = A_p[INDEX2(0,1,2)]; |
1733 |
|
const register double A_10 = A_p[INDEX2(1,0,2)]; |
1734 |
|
const register double A_11 = A_p[INDEX2(1,1,2)]; |
1735 |
|
const register double tmp0_0 = A_01 + A_10; |
1736 |
|
const register double tmp0_1 = A_00*w18; |
1737 |
|
const register double tmp10_1 = A_01*w20; |
1738 |
|
const register double tmp12_1 = A_00*w26; |
1739 |
|
const register double tmp4_1 = A_00*w22; |
1740 |
|
const register double tmp8_1 = A_00*w24; |
1741 |
|
const register double tmp13_1 = A_10*w19; |
1742 |
|
const register double tmp9_1 = tmp0_0*w20; |
1743 |
|
const register double tmp3_1 = A_11*w21; |
1744 |
|
const register double tmp11_1 = A_11*w27; |
1745 |
|
const register double tmp1_1 = A_01*w19; |
1746 |
|
const register double tmp6_1 = A_11*w23; |
1747 |
|
const register double tmp7_1 = A_11*w25; |
1748 |
|
const register double tmp2_1 = A_10*w20; |
1749 |
|
const register double tmp5_1 = tmp0_0*w19; |
1750 |
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
1751 |
|
EM_S[INDEX2(1,2,4)]+=tmp4_1 + tmp5_1 + tmp6_1; |
1752 |
|
EM_S[INDEX2(3,2,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
1753 |
|
EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp7_1 + tmp8_1; |
1754 |
|
EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp7_1 + tmp8_1; |
1755 |
|
EM_S[INDEX2(3,0,4)]+=tmp4_1 + tmp6_1 + tmp9_1; |
1756 |
|
EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; |
1757 |
|
EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp5_1 + tmp6_1; |
1758 |
|
EM_S[INDEX2(0,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; |
1759 |
|
EM_S[INDEX2(2,0,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; |
1760 |
|
EM_S[INDEX2(1,3,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; |
1761 |
|
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; |
1762 |
|
EM_S[INDEX2(2,2,4)]+=tmp7_1 + tmp8_1 + tmp9_1; |
1763 |
|
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; |
1764 |
|
EM_S[INDEX2(0,3,4)]+=tmp4_1 + tmp6_1 + tmp9_1; |
1765 |
|
EM_S[INDEX2(1,1,4)]+=tmp7_1 + tmp8_1 + tmp9_1; |
1766 |
|
} |
1767 |
|
} |
1768 |
|
/////////////// |
1769 |
|
// process B // |
1770 |
|
/////////////// |
1771 |
|
if (!B.isEmpty()) { |
1772 |
|
add_EM_S=true; |
1773 |
|
const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); |
1774 |
|
if (B.actsExpanded()) { |
1775 |
|
const register double B_0_0 = B_p[INDEX2(0,0,2)]; |
1776 |
|
const register double B_1_0 = B_p[INDEX2(1,0,2)]; |
1777 |
|
const register double B_0_1 = B_p[INDEX2(0,1,2)]; |
1778 |
|
const register double B_1_1 = B_p[INDEX2(1,1,2)]; |
1779 |
|
const register double B_0_2 = B_p[INDEX2(0,2,2)]; |
1780 |
|
const register double B_1_2 = B_p[INDEX2(1,2,2)]; |
1781 |
|
const register double B_0_3 = B_p[INDEX2(0,3,2)]; |
1782 |
|
const register double B_1_3 = B_p[INDEX2(1,3,2)]; |
1783 |
|
const register double tmp3_0 = B_0_0 + B_0_2; |
1784 |
|
const register double tmp1_0 = B_1_2 + B_1_3; |
1785 |
|
const register double tmp2_0 = B_0_1 + B_0_3; |
1786 |
|
const register double tmp0_0 = B_1_0 + B_1_1; |
1787 |
|
const register double tmp63_1 = B_1_1*w42; |
1788 |
|
const register double tmp79_1 = B_1_1*w40; |
1789 |
|
const register double tmp37_1 = tmp3_0*w35; |
1790 |
|
const register double tmp8_1 = tmp0_0*w32; |
1791 |
|
const register double tmp71_1 = B_0_1*w34; |
1792 |
|
const register double tmp19_1 = B_0_3*w31; |
1793 |
|
const register double tmp15_1 = B_0_3*w34; |
1794 |
|
const register double tmp9_1 = tmp3_0*w34; |
1795 |
|
const register double tmp35_1 = B_1_0*w36; |
1796 |
|
const register double tmp66_1 = B_0_3*w28; |
1797 |
|
const register double tmp28_1 = B_1_0*w42; |
1798 |
|
const register double tmp22_1 = B_1_0*w40; |
1799 |
|
const register double tmp16_1 = B_1_2*w29; |
1800 |
|
const register double tmp6_1 = tmp2_0*w35; |
1801 |
|
const register double tmp55_1 = B_1_3*w40; |
1802 |
|
const register double tmp50_1 = B_1_3*w42; |
1803 |
|
const register double tmp7_1 = tmp1_0*w29; |
1804 |
|
const register double tmp1_1 = tmp1_0*w32; |
1805 |
|
const register double tmp57_1 = B_0_3*w30; |
1806 |
|
const register double tmp18_1 = B_1_1*w32; |
1807 |
|
const register double tmp53_1 = B_1_0*w41; |
1808 |
|
const register double tmp61_1 = B_1_3*w36; |
1809 |
|
const register double tmp27_1 = B_0_3*w38; |
1810 |
|
const register double tmp64_1 = B_0_2*w30; |
1811 |
|
const register double tmp76_1 = B_0_1*w38; |
1812 |
|
const register double tmp39_1 = tmp2_0*w34; |
1813 |
|
const register double tmp62_1 = B_0_1*w31; |
1814 |
|
const register double tmp56_1 = B_0_0*w31; |
1815 |
|
const register double tmp49_1 = B_1_1*w36; |
1816 |
|
const register double tmp2_1 = B_0_2*w31; |
1817 |
|
const register double tmp23_1 = B_0_2*w33; |
1818 |
|
const register double tmp38_1 = B_1_1*w43; |
1819 |
|
const register double tmp74_1 = B_1_2*w41; |
1820 |
|
const register double tmp43_1 = B_1_1*w41; |
1821 |
|
const register double tmp58_1 = B_0_2*w28; |
1822 |
|
const register double tmp67_1 = B_0_0*w33; |
1823 |
|
const register double tmp33_1 = tmp0_0*w39; |
1824 |
|
const register double tmp4_1 = B_0_0*w28; |
1825 |
|
const register double tmp20_1 = B_0_0*w30; |
1826 |
|
const register double tmp13_1 = B_0_2*w38; |
1827 |
|
const register double tmp65_1 = B_1_2*w43; |
1828 |
|
const register double tmp0_1 = tmp0_0*w29; |
1829 |
|
const register double tmp41_1 = tmp3_0*w33; |
1830 |
|
const register double tmp73_1 = B_0_2*w37; |
1831 |
|
const register double tmp69_1 = B_0_0*w38; |
1832 |
|
const register double tmp48_1 = B_1_2*w39; |
1833 |
|
const register double tmp59_1 = B_0_1*w33; |
1834 |
|
const register double tmp17_1 = B_1_3*w41; |
1835 |
|
const register double tmp5_1 = B_0_3*w33; |
1836 |
|
const register double tmp3_1 = B_0_1*w30; |
1837 |
|
const register double tmp21_1 = B_0_1*w28; |
1838 |
|
const register double tmp42_1 = B_1_0*w29; |
1839 |
|
const register double tmp54_1 = B_1_2*w32; |
1840 |
|
const register double tmp60_1 = B_1_0*w39; |
1841 |
|
const register double tmp32_1 = tmp1_0*w36; |
1842 |
|
const register double tmp10_1 = B_0_1*w37; |
1843 |
|
const register double tmp14_1 = B_0_0*w35; |
1844 |
|
const register double tmp29_1 = B_0_1*w35; |
1845 |
|
const register double tmp26_1 = B_1_2*w36; |
1846 |
|
const register double tmp30_1 = B_1_3*w43; |
1847 |
|
const register double tmp70_1 = B_0_2*w35; |
1848 |
|
const register double tmp34_1 = B_1_3*w39; |
1849 |
|
const register double tmp51_1 = B_1_0*w43; |
1850 |
|
const register double tmp31_1 = B_0_2*w34; |
1851 |
|
const register double tmp45_1 = tmp3_0*w28; |
1852 |
|
const register double tmp11_1 = tmp1_0*w39; |
1853 |
|
const register double tmp52_1 = B_1_1*w29; |
1854 |
|
const register double tmp44_1 = B_1_3*w32; |
1855 |
|
const register double tmp25_1 = B_1_1*w39; |
1856 |
|
const register double tmp47_1 = tmp2_0*w33; |
1857 |
|
const register double tmp72_1 = B_1_3*w29; |
1858 |
|
const register double tmp40_1 = tmp2_0*w28; |
1859 |
|
const register double tmp46_1 = B_1_2*w40; |
1860 |
|
const register double tmp36_1 = B_1_2*w42; |
1861 |
|
const register double tmp24_1 = B_0_0*w37; |
1862 |
|
const register double tmp77_1 = B_0_3*w35; |
1863 |
|
const register double tmp68_1 = B_0_3*w37; |
1864 |
|
const register double tmp78_1 = B_0_0*w34; |
1865 |
|
const register double tmp12_1 = tmp0_0*w36; |
1866 |
|
const register double tmp75_1 = B_1_0*w32; |
1867 |
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; |
1868 |
|
EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; |
1869 |
|
EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
1870 |
|
EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; |
1871 |
|
EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
1872 |
|
EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp6_1 + tmp9_1; |
1873 |
|
EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; |
1874 |
|
EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; |
1875 |
|
EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; |
1876 |
|
EM_S[INDEX2(2,0,4)]+=tmp45_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
1877 |
|
EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp39_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; |
1878 |
|
EM_S[INDEX2(2,3,4)]+=tmp11_1 + tmp12_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; |
1879 |
|
EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; |
1880 |
|
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; |
1881 |
|
EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp7_1 + tmp8_1; |
1882 |
|
EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; |
1883 |
|
} else { /* constant data */ |
1884 |
|
const register double B_0 = B_p[0]; |
1885 |
|
const register double B_1 = B_p[1]; |
1886 |
|
const register double tmp6_1 = B_1*w50; |
1887 |
|
const register double tmp1_1 = B_1*w45; |
1888 |
|
const register double tmp5_1 = B_1*w49; |
1889 |
|
const register double tmp4_1 = B_1*w48; |
1890 |
|
const register double tmp0_1 = B_0*w44; |
1891 |
|
const register double tmp2_1 = B_0*w46; |
1892 |
|
const register double tmp7_1 = B_0*w51; |
1893 |
|
const register double tmp3_1 = B_0*w47; |
1894 |
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; |
1895 |
|
EM_S[INDEX2(1,2,4)]+=tmp1_1 + tmp2_1; |
1896 |
|
EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; |
1897 |
|
EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp5_1; |
1898 |
|
EM_S[INDEX2(3,3,4)]+=tmp3_1 + tmp6_1; |
1899 |
|
EM_S[INDEX2(3,0,4)]+=tmp2_1 + tmp4_1; |
1900 |
|
EM_S[INDEX2(3,1,4)]+=tmp2_1 + tmp6_1; |
1901 |
|
EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp7_1; |
1902 |
|
EM_S[INDEX2(0,2,4)]+=tmp5_1 + tmp7_1; |
1903 |
|
EM_S[INDEX2(2,0,4)]+=tmp6_1 + tmp7_1; |
1904 |
|
EM_S[INDEX2(1,3,4)]+=tmp2_1 + tmp5_1; |
1905 |
|
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp4_1; |
1906 |
|
EM_S[INDEX2(2,2,4)]+=tmp0_1 + tmp6_1; |
1907 |
|
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp3_1; |
1908 |
|
EM_S[INDEX2(0,3,4)]+=tmp1_1 + tmp7_1; |
1909 |
|
EM_S[INDEX2(1,1,4)]+=tmp3_1 + tmp5_1; |
1910 |
|
} |
1911 |
|
} |
1912 |
|
/////////////// |
1913 |
|
// process C // |
1914 |
|
/////////////// |
1915 |
|
if (!C.isEmpty()) { |
1916 |
|
add_EM_S=true; |
1917 |
|
const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); |
1918 |
|
if (C.actsExpanded()) { |
1919 |
|
const register double C_0_0 = C_p[INDEX2(0,0,2)]; |
1920 |
|
const register double C_1_0 = C_p[INDEX2(1,0,2)]; |
1921 |
|
const register double C_0_1 = C_p[INDEX2(0,1,2)]; |
1922 |
|
const register double C_1_1 = C_p[INDEX2(1,1,2)]; |
1923 |
|
const register double C_0_2 = C_p[INDEX2(0,2,2)]; |
1924 |
|
const register double C_1_2 = C_p[INDEX2(1,2,2)]; |
1925 |
|
const register double C_0_3 = C_p[INDEX2(0,3,2)]; |
1926 |
|
const register double C_1_3 = C_p[INDEX2(1,3,2)]; |
1927 |
|
const register double tmp2_0 = C_0_1 + C_0_3; |
1928 |
|
const register double tmp1_0 = C_1_2 + C_1_3; |
1929 |
|
const register double tmp3_0 = C_0_0 + C_0_2; |
1930 |
|
const register double tmp0_0 = C_1_0 + C_1_1; |
1931 |
|
const register double tmp64_1 = C_0_2*w30; |
1932 |
|
const register double tmp14_1 = C_0_2*w28; |
1933 |
|
const register double tmp19_1 = C_0_3*w31; |
1934 |
|
const register double tmp22_1 = C_1_0*w40; |
1935 |
|
const register double tmp37_1 = tmp3_0*w35; |
1936 |
|
const register double tmp29_1 = C_0_1*w35; |
1937 |
|
const register double tmp73_1 = C_0_2*w37; |
1938 |
|
const register double tmp74_1 = C_1_2*w41; |
1939 |
|
const register double tmp52_1 = C_1_3*w39; |
1940 |
|
const register double tmp25_1 = C_1_1*w39; |
1941 |
|
const register double tmp62_1 = C_0_1*w31; |
1942 |
|
const register double tmp79_1 = C_1_1*w40; |
1943 |
|
const register double tmp43_1 = C_1_1*w36; |
1944 |
|
const register double tmp27_1 = C_0_3*w38; |
1945 |
|
const register double tmp28_1 = C_1_0*w42; |
1946 |
|
const register double tmp63_1 = C_1_1*w42; |
1947 |
|
const register double tmp59_1 = C_0_3*w34; |
1948 |
|
const register double tmp72_1 = C_1_3*w29; |
1949 |
|
const register double tmp40_1 = tmp2_0*w35; |
1950 |
|
const register double tmp13_1 = C_0_3*w30; |
1951 |
|
const register double tmp51_1 = C_1_2*w40; |
1952 |
|
const register double tmp54_1 = C_1_2*w42; |
1953 |
|
const register double tmp12_1 = C_0_0*w31; |
1954 |
|
const register double tmp2_1 = tmp1_0*w32; |
1955 |
|
const register double tmp68_1 = C_0_2*w31; |
1956 |
|
const register double tmp75_1 = C_1_0*w32; |
1957 |
|
const register double tmp49_1 = C_1_1*w41; |
1958 |
|
const register double tmp4_1 = C_0_2*w35; |
1959 |
|
const register double tmp66_1 = C_0_3*w28; |
1960 |
|
const register double tmp56_1 = C_0_1*w37; |
1961 |
|
const register double tmp5_1 = C_0_1*w34; |
1962 |
|
const register double tmp38_1 = tmp2_0*w34; |
1963 |
|
const register double tmp76_1 = C_0_1*w38; |
1964 |
|
const register double tmp21_1 = C_0_1*w28; |
1965 |
|
const register double tmp69_1 = C_0_1*w30; |
1966 |
|
const register double tmp53_1 = C_1_0*w36; |
1967 |
|
const register double tmp42_1 = C_1_2*w39; |
1968 |
|
const register double tmp32_1 = tmp1_0*w29; |
1969 |
|
const register double tmp45_1 = C_1_0*w43; |
1970 |
|
const register double tmp33_1 = tmp0_0*w32; |
1971 |
|
const register double tmp35_1 = C_1_0*w41; |
1972 |
|
const register double tmp26_1 = C_1_2*w36; |
1973 |
|
const register double tmp67_1 = C_0_0*w33; |
1974 |
|
const register double tmp31_1 = C_0_2*w34; |
1975 |
|
const register double tmp20_1 = C_0_0*w30; |
1976 |
|
const register double tmp70_1 = C_0_0*w28; |
1977 |
|
const register double tmp8_1 = tmp0_0*w39; |
1978 |
|
const register double tmp30_1 = C_1_3*w43; |
1979 |
|
const register double tmp0_1 = tmp0_0*w29; |
1980 |
|
const register double tmp17_1 = C_1_3*w41; |
1981 |
|
const register double tmp58_1 = C_0_0*w35; |
1982 |
|
const register double tmp9_1 = tmp3_0*w33; |
1983 |
|
const register double tmp61_1 = C_1_3*w36; |
1984 |
|
const register double tmp41_1 = tmp3_0*w34; |
1985 |
|
const register double tmp50_1 = C_1_3*w32; |
1986 |
|
const register double tmp18_1 = C_1_1*w32; |
1987 |
|
const register double tmp6_1 = tmp1_0*w36; |
1988 |
|
const register double tmp3_1 = C_0_0*w38; |
1989 |
|
const register double tmp34_1 = C_1_1*w29; |
1990 |
|
const register double tmp77_1 = C_0_3*w35; |
1991 |
|
const register double tmp65_1 = C_1_2*w43; |
1992 |
|
const register double tmp71_1 = C_0_3*w33; |
1993 |
|
const register double tmp55_1 = C_1_1*w43; |
1994 |
|
const register double tmp46_1 = tmp3_0*w28; |
1995 |
|
const register double tmp24_1 = C_0_0*w37; |
1996 |
|
const register double tmp10_1 = tmp1_0*w39; |
1997 |
|
const register double tmp48_1 = C_1_0*w29; |
1998 |
|
const register double tmp15_1 = C_0_1*w33; |
1999 |
|
const register double tmp36_1 = C_1_2*w32; |
2000 |
|
const register double tmp60_1 = C_1_0*w39; |
2001 |
|
const register double tmp47_1 = tmp2_0*w33; |
2002 |
|
const register double tmp16_1 = C_1_2*w29; |
2003 |
|
const register double tmp1_1 = C_0_3*w37; |
2004 |
|
const register double tmp7_1 = tmp2_0*w28; |
2005 |
|
const register double tmp39_1 = C_1_3*w40; |
2006 |
|
const register double tmp44_1 = C_1_3*w42; |
2007 |
|
const register double tmp57_1 = C_0_2*w38; |
2008 |
|
const register double tmp78_1 = C_0_0*w34; |
2009 |
|
const register double tmp11_1 = tmp0_0*w36; |
2010 |
|
const register double tmp23_1 = C_0_2*w33; |
2011 |
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; |
2012 |
|
EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; |
2013 |
|
EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
2014 |
|
EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; |
2015 |
|
EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
2016 |
|
EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp7_1 + tmp9_1; |
2017 |
|
EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; |
2018 |
|
EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; |
2019 |
|
EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; |
2020 |
|
EM_S[INDEX2(2,0,4)]+=tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
2021 |
|
EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp38_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; |
2022 |
|
EM_S[INDEX2(2,3,4)]+=tmp10_1 + tmp11_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; |
2023 |
|
EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; |
2024 |
|
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp2_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; |
2025 |
|
EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp6_1 + tmp8_1; |
2026 |
|
EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; |
2027 |
|
} else { /* constant data */ |
2028 |
|
const register double C_0 = C_p[0]; |
2029 |
|
const register double C_1 = C_p[1]; |
2030 |
|
const register double tmp1_1 = C_1*w45; |
2031 |
|
const register double tmp3_1 = C_0*w51; |
2032 |
|
const register double tmp4_1 = C_0*w44; |
2033 |
|
const register double tmp7_1 = C_0*w46; |
2034 |
|
const register double tmp5_1 = C_1*w49; |
2035 |
|
const register double tmp2_1 = C_1*w48; |
2036 |
|
const register double tmp0_1 = C_0*w47; |
2037 |
|
const register double tmp6_1 = C_1*w50; |
2038 |
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; |
2039 |
|
EM_S[INDEX2(1,2,4)]+=tmp2_1 + tmp3_1; |
2040 |
|
EM_S[INDEX2(3,2,4)]+=tmp2_1 + tmp4_1; |
2041 |
|
EM_S[INDEX2(0,0,4)]+=tmp4_1 + tmp5_1; |
2042 |
|
EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp6_1; |
2043 |
|
EM_S[INDEX2(3,0,4)]+=tmp1_1 + tmp3_1; |
2044 |
|
EM_S[INDEX2(3,1,4)]+=tmp5_1 + tmp7_1; |
2045 |
|
EM_S[INDEX2(2,1,4)]+=tmp1_1 + tmp7_1; |
2046 |
|
EM_S[INDEX2(0,2,4)]+=tmp3_1 + tmp6_1; |
2047 |
|
EM_S[INDEX2(2,0,4)]+=tmp3_1 + tmp5_1; |
2048 |
|
EM_S[INDEX2(1,3,4)]+=tmp6_1 + tmp7_1; |
2049 |
|
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp2_1; |
2050 |
|
EM_S[INDEX2(2,2,4)]+=tmp4_1 + tmp6_1; |
2051 |
|
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp4_1; |
2052 |
|
EM_S[INDEX2(0,3,4)]+=tmp2_1 + tmp7_1; |
2053 |
|
EM_S[INDEX2(1,1,4)]+=tmp0_1 + tmp5_1; |
2054 |
|
} |
2055 |
|
} |
2056 |
|
/////////////// |
2057 |
|
// process D // |
2058 |
|
/////////////// |
2059 |
|
if (!D.isEmpty()) { |
2060 |
|
add_EM_S=true; |
2061 |
|
const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); |
2062 |
|
if (D.actsExpanded()) { |
2063 |
|
const register double D_0 = D_p[0]; |
2064 |
|
const register double D_1 = D_p[1]; |
2065 |
|
const register double D_2 = D_p[2]; |
2066 |
|
const register double D_3 = D_p[3]; |
2067 |
|
const register double tmp4_0 = D_1 + D_3; |
2068 |
|
const register double tmp2_0 = D_0 + D_1 + D_2 + D_3; |
2069 |
|
const register double tmp5_0 = D_0 + D_2; |
2070 |
|
const register double tmp0_0 = D_0 + D_1; |
2071 |
|
const register double tmp6_0 = D_0 + D_3; |
2072 |
|
const register double tmp1_0 = D_2 + D_3; |
2073 |
|
const register double tmp3_0 = D_1 + D_2; |
2074 |
|
const register double tmp16_1 = D_1*w56; |
2075 |
|
const register double tmp14_1 = tmp6_0*w54; |
2076 |
|
const register double tmp8_1 = D_3*w55; |
2077 |
|
const register double tmp2_1 = tmp2_0*w54; |
2078 |
|
const register double tmp12_1 = tmp5_0*w52; |
2079 |
|
const register double tmp4_1 = tmp0_0*w53; |
2080 |
|
const register double tmp3_1 = tmp1_0*w52; |
2081 |
|
const register double tmp13_1 = tmp4_0*w53; |
2082 |
|
const register double tmp10_1 = tmp4_0*w52; |
2083 |
|
const register double tmp1_1 = tmp1_0*w53; |
2084 |
|
const register double tmp7_1 = D_3*w56; |
2085 |
|
const register double tmp0_1 = tmp0_0*w52; |
2086 |
|
const register double tmp11_1 = tmp5_0*w53; |
2087 |
|
const register double tmp9_1 = D_0*w56; |
2088 |
|
const register double tmp5_1 = tmp3_0*w54; |
2089 |
|
const register double tmp18_1 = D_2*w56; |
2090 |
|
const register double tmp17_1 = D_1*w55; |
2091 |
|
const register double tmp6_1 = D_0*w55; |
2092 |
|
const register double tmp15_1 = D_2*w55; |
2093 |
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; |
2094 |
|
EM_S[INDEX2(1,2,4)]+=tmp2_1; |
2095 |
|
EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; |
2096 |
|
EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp6_1 + tmp7_1; |
2097 |
|
EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp8_1 + tmp9_1; |
2098 |
|
EM_S[INDEX2(3,0,4)]+=tmp2_1; |
2099 |
|
EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1; |
2100 |
|
EM_S[INDEX2(2,1,4)]+=tmp2_1; |
2101 |
|
EM_S[INDEX2(0,2,4)]+=tmp12_1 + tmp13_1; |
2102 |
|
EM_S[INDEX2(2,0,4)]+=tmp12_1 + tmp13_1; |
2103 |
|
EM_S[INDEX2(1,3,4)]+=tmp10_1 + tmp11_1; |
2104 |
|
EM_S[INDEX2(2,3,4)]+=tmp3_1 + tmp4_1; |
2105 |
|
EM_S[INDEX2(2,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1; |
2106 |
|
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1; |
2107 |
|
EM_S[INDEX2(0,3,4)]+=tmp2_1; |
2108 |
|
EM_S[INDEX2(1,1,4)]+=tmp14_1 + tmp17_1 + tmp18_1; |
2109 |
|
} else { /* constant data */ |
2110 |
|
const register double D_0 = D_p[0]; |
2111 |
|
const register double tmp2_1 = D_0*w59; |
2112 |
|
const register double tmp1_1 = D_0*w58; |
2113 |
|
const register double tmp0_1 = D_0*w57; |
2114 |
|
EM_S[INDEX2(0,1,4)]+=tmp0_1; |
2115 |
|
EM_S[INDEX2(1,2,4)]+=tmp1_1; |
2116 |
|
EM_S[INDEX2(3,2,4)]+=tmp0_1; |
2117 |
|
EM_S[INDEX2(0,0,4)]+=tmp2_1; |
2118 |
|
EM_S[INDEX2(3,3,4)]+=tmp2_1; |
2119 |
|
EM_S[INDEX2(3,0,4)]+=tmp1_1; |
2120 |
|
EM_S[INDEX2(3,1,4)]+=tmp0_1; |
2121 |
|
EM_S[INDEX2(2,1,4)]+=tmp1_1; |
2122 |
|
EM_S[INDEX2(0,2,4)]+=tmp0_1; |
2123 |
|
EM_S[INDEX2(2,0,4)]+=tmp0_1; |
2124 |
|
EM_S[INDEX2(1,3,4)]+=tmp0_1; |
2125 |
|
EM_S[INDEX2(2,3,4)]+=tmp0_1; |
2126 |
|
EM_S[INDEX2(2,2,4)]+=tmp2_1; |
2127 |
|
EM_S[INDEX2(1,0,4)]+=tmp0_1; |
2128 |
|
EM_S[INDEX2(0,3,4)]+=tmp1_1; |
2129 |
|
EM_S[INDEX2(1,1,4)]+=tmp2_1; |
2130 |
|
} |
2131 |
|
} |
2132 |
|
/////////////// |
2133 |
|
// process X // |
2134 |
|
/////////////// |
2135 |
|
if (!X.isEmpty()) { |
2136 |
|
add_EM_F=true; |
2137 |
|
const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); |
2138 |
|
if (X.actsExpanded()) { |
2139 |
|
const register double X_0_0 = X_p[INDEX2(0,0,2)]; |
2140 |
|
const register double X_1_0 = X_p[INDEX2(1,0,2)]; |
2141 |
|
const register double X_0_1 = X_p[INDEX2(0,1,2)]; |
2142 |
|
const register double X_1_1 = X_p[INDEX2(1,1,2)]; |
2143 |
|
const register double X_0_2 = X_p[INDEX2(0,2,2)]; |
2144 |
|
const register double X_1_2 = X_p[INDEX2(1,2,2)]; |
2145 |
|
const register double X_0_3 = X_p[INDEX2(0,3,2)]; |
2146 |
|
const register double X_1_3 = X_p[INDEX2(1,3,2)]; |
2147 |
|
const register double tmp1_0 = X_1_1 + X_1_3; |
2148 |
|
const register double tmp3_0 = X_0_0 + X_0_1; |
2149 |
|
const register double tmp2_0 = X_1_0 + X_1_2; |
2150 |
|
const register double tmp0_0 = X_0_2 + X_0_3; |
2151 |
|
const register double tmp8_1 = tmp2_0*w66; |
2152 |
|
const register double tmp5_1 = tmp3_0*w64; |
2153 |
|
const register double tmp14_1 = tmp0_0*w64; |
2154 |
|
const register double tmp3_1 = tmp3_0*w60; |
2155 |
|
const register double tmp9_1 = tmp3_0*w63; |
2156 |
|
const register double tmp13_1 = tmp3_0*w65; |
2157 |
|
const register double tmp12_1 = tmp1_0*w66; |
2158 |
|
const register double tmp10_1 = tmp0_0*w60; |
2159 |
|
const register double tmp2_1 = tmp2_0*w61; |
2160 |
|
const register double tmp6_1 = tmp2_0*w62; |
2161 |
|
const register double tmp4_1 = tmp0_0*w65; |
2162 |
|
const register double tmp11_1 = tmp1_0*w67; |
2163 |
|
const register double tmp1_1 = tmp1_0*w62; |
2164 |
|
const register double tmp7_1 = tmp1_0*w61; |
2165 |
|
const register double tmp0_1 = tmp0_0*w63; |
2166 |
|
const register double tmp15_1 = tmp2_0*w67; |
2167 |
|
EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
2168 |
|
EM_F[1]+=tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1; |
2169 |
|
EM_F[2]+=tmp10_1 + tmp11_1 + tmp8_1 + tmp9_1; |
2170 |
|
EM_F[3]+=tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
2171 |
|
} else { /* constant data */ |
2172 |
|
const register double X_0 = X_p[0]; |
2173 |
|
const register double X_1 = X_p[1]; |
2174 |
|
const register double tmp3_1 = X_1*w71; |
2175 |
|
const register double tmp2_1 = X_0*w70; |
2176 |
|
const register double tmp1_1 = X_0*w68; |
2177 |
|
const register double tmp0_1 = X_1*w69; |
2178 |
|
EM_F[0]+=tmp0_1 + tmp1_1; |
2179 |
|
EM_F[1]+=tmp0_1 + tmp2_1; |
2180 |
|
EM_F[2]+=tmp1_1 + tmp3_1; |
2181 |
|
EM_F[3]+=tmp2_1 + tmp3_1; |
2182 |
|
} |
2183 |
|
} |
2184 |
|
/////////////// |
2185 |
|
// process Y // |
2186 |
|
/////////////// |
2187 |
|
if (!Y.isEmpty()) { |
2188 |
|
add_EM_F=true; |
2189 |
|
const double* Y_p=const_cast<escript::Data*>(&Y)->getSampleDataRO(e); |
2190 |
|
if (Y.actsExpanded()) { |
2191 |
|
const register double Y_0 = Y_p[0]; |
2192 |
|
const register double Y_1 = Y_p[1]; |
2193 |
|
const register double Y_2 = Y_p[2]; |
2194 |
|
const register double Y_3 = Y_p[3]; |
2195 |
|
const register double tmp0_0 = Y_1 + Y_2; |
2196 |
|
const register double tmp1_0 = Y_0 + Y_3; |
2197 |
|
const register double tmp9_1 = Y_0*w74; |
2198 |
|
const register double tmp4_1 = tmp1_0*w73; |
2199 |
|
const register double tmp5_1 = Y_2*w74; |
2200 |
|
const register double tmp7_1 = Y_1*w74; |
2201 |
|
const register double tmp6_1 = Y_2*w72; |
2202 |
|
const register double tmp2_1 = Y_3*w74; |
2203 |
|
const register double tmp8_1 = Y_3*w72; |
2204 |
|
const register double tmp3_1 = Y_1*w72; |
2205 |
|
const register double tmp0_1 = Y_0*w72; |
2206 |
|
const register double tmp1_1 = tmp0_0*w73; |
2207 |
|
EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1; |
2208 |
|
EM_F[1]+=tmp3_1 + tmp4_1 + tmp5_1; |
2209 |
|
EM_F[2]+=tmp4_1 + tmp6_1 + tmp7_1; |
2210 |
|
EM_F[3]+=tmp1_1 + tmp8_1 + tmp9_1; |
2211 |
|
} else { /* constant data */ |
2212 |
|
const register double Y_0 = Y_p[0]; |
2213 |
|
const register double tmp0_1 = Y_0*w75; |
2214 |
|
EM_F[0]+=tmp0_1; |
2215 |
|
EM_F[1]+=tmp0_1; |
2216 |
|
EM_F[2]+=tmp0_1; |
2217 |
|
EM_F[3]+=tmp0_1; |
2218 |
|
} |
2219 |
|
} |
2220 |
|
/* GENERATOR SNIP_PDE_SINGLE BOTTOM */ |
2221 |
|
|
2222 |
|
// add to matrix (if add_EM_S) and RHS (if add_EM_F) |
2223 |
|
IndexVector rowIndex; |
2224 |
|
const index_t firstNode=k1*m_N0+k0; |
2225 |
|
rowIndex.push_back(firstNode); |
2226 |
|
rowIndex.push_back(firstNode+1); |
2227 |
|
rowIndex.push_back(firstNode+m_N0); |
2228 |
|
rowIndex.push_back(firstNode+1+m_N0); |
2229 |
|
if (add_EM_S) { |
2230 |
|
addToSystemMatrix(mat, 4, rowIndex, 1, 4, rowIndex, 1, EM_S); |
2231 |
|
} |
2232 |
|
if (add_EM_F) { |
2233 |
|
double *F_p=rhs.getSampleDataRW(0); |
2234 |
|
for (index_t i=0; i<4; i++) { |
2235 |
|
if (rowIndex[i]<getNumNodes()) |
2236 |
|
F_p[rowIndex[i]]+=EM_F[i]; |
2237 |
|
} |
2238 |
|
} |
2239 |
|
} // end k0 loop |
2240 |
|
} // end k1 loop |
2241 |
|
} // end of coloring |
2242 |
|
} // end of parallel region |
2243 |
|
} |
2244 |
|
|
2245 |
|
void Rectangle::addToSystemMatrix(Paso_SystemMatrix* in, dim_t NN_Eq, |
2246 |
|
const IndexVector& nodes_Eq, dim_t num_Eq, dim_t NN_Sol, |
2247 |
|
const IndexVector& nodes_Sol, dim_t num_Sol, const vector<double>& array) const |
2248 |
|
{ |
2249 |
|
const dim_t row_block_size = in->row_block_size; |
2250 |
|
const dim_t col_block_size = in->col_block_size; |
2251 |
|
const dim_t block_size = in->block_size; |
2252 |
|
const dim_t num_subblocks_Eq = num_Eq / row_block_size; |
2253 |
|
const dim_t num_subblocks_Sol = num_Sol / col_block_size; |
2254 |
|
const dim_t numMyCols = in->pattern->mainPattern->numInput; |
2255 |
|
const dim_t numMyRows = in->pattern->mainPattern->numOutput; |
2256 |
|
|
2257 |
|
const index_t* mainBlock_ptr = in->mainBlock->pattern->ptr; |
2258 |
|
const index_t* mainBlock_index = in->mainBlock->pattern->index; |
2259 |
|
double* mainBlock_val = in->mainBlock->val; |
2260 |
|
const index_t* col_coupleBlock_ptr = in->col_coupleBlock->pattern->ptr; |
2261 |
|
const index_t* col_coupleBlock_index = in->col_coupleBlock->pattern->index; |
2262 |
|
double* col_coupleBlock_val = in->col_coupleBlock->val; |
2263 |
|
const index_t* row_coupleBlock_ptr = in->row_coupleBlock->pattern->ptr; |
2264 |
|
const index_t* row_coupleBlock_index = in->row_coupleBlock->pattern->index; |
2265 |
|
double* row_coupleBlock_val = in->row_coupleBlock->val; |
2266 |
|
for (dim_t k_Eq = 0; k_Eq < NN_Eq; ++k_Eq) { |
2267 |
|
// down columns of array |
2268 |
|
const dim_t j_Eq = nodes_Eq[k_Eq]; |
2269 |
|
for (dim_t l_row = 0; l_row < num_subblocks_Eq; ++l_row) { |
2270 |
|
const dim_t i_row = j_Eq * num_subblocks_Eq + l_row; |
2271 |
|
// only look at the matrix rows stored on this processor |
2272 |
|
if (i_row < numMyRows) { |
2273 |
|
for (dim_t k_Sol = 0; k_Sol < NN_Sol; ++k_Sol) { |
2274 |
|
// across rows of array |
2275 |
|
const dim_t j_Sol = nodes_Sol[k_Sol]; |
2276 |
|
for (dim_t l_col = 0; l_col < num_subblocks_Sol; ++l_col) { |
2277 |
|
// only look at the matrix rows stored on this processor |
2278 |
|
const dim_t i_col = j_Sol * num_subblocks_Sol + l_col; |
2279 |
|
if (i_col < numMyCols) { |
2280 |
|
for (dim_t k = mainBlock_ptr[i_row]; |
2281 |
|
k < mainBlock_ptr[i_row + 1]; ++k) { |
2282 |
|
if (mainBlock_index[k] == i_col) { |
2283 |
|
// entry array(k_Sol, j_Eq) is a block |
2284 |
|
// (row_block_size x col_block_size) |
2285 |
|
for (dim_t ic = 0; ic < col_block_size; ++ic) { |
2286 |
|
const dim_t i_Sol = ic + col_block_size * l_col; |
2287 |
|
for (dim_t ir = 0; ir < row_block_size; ++ir) { |
2288 |
|
const dim_t i_Eq = ir + row_block_size * l_row; |
2289 |
|
mainBlock_val[k*block_size + ir + row_block_size*ic] += |
2290 |
|
array[INDEX4 |
2291 |
|
(i_Eq, i_Sol, k_Eq, k_Sol, num_Eq, num_Sol, NN_Eq)]; |
2292 |
|
} |
2293 |
|
} |
2294 |
|
break; |
2295 |
|
} |
2296 |
|
} |
2297 |
|
} else { |
2298 |
|
for (dim_t k = col_coupleBlock_ptr[i_row]; |
2299 |
|
k < col_coupleBlock_ptr[i_row + 1]; ++k) { |
2300 |
|
if (col_coupleBlock_index[k] == i_col - numMyCols) { |
2301 |
|
// entry array(k_Sol, j_Eq) is a block |
2302 |
|
// (row_block_size x col_block_size) |
2303 |
|
for (dim_t ic = 0; ic < col_block_size; ++ic) { |
2304 |
|
const dim_t i_Sol = ic + col_block_size * l_col; |
2305 |
|
for (dim_t ir = 0; ir < row_block_size; ++ir) { |
2306 |
|
const dim_t i_Eq = ir + row_block_size * l_row; |
2307 |
|
col_coupleBlock_val[k * block_size + ir + row_block_size * ic] += |
2308 |
|
array[INDEX4 |
2309 |
|
(i_Eq, i_Sol, k_Eq, k_Sol, num_Eq, num_Sol, NN_Eq)]; |
2310 |
|
} |
2311 |
|
} |
2312 |
|
break; |
2313 |
|
} |
2314 |
|
} |
2315 |
|
} |
2316 |
|
} |
2317 |
|
} |
2318 |
|
} else { |
2319 |
|
for (dim_t k_Sol = 0; k_Sol < NN_Sol; ++k_Sol) { |
2320 |
|
// across rows of array |
2321 |
|
const dim_t j_Sol = nodes_Sol[k_Sol]; |
2322 |
|
for (dim_t l_col = 0; l_col < num_subblocks_Sol; ++l_col) { |
2323 |
|
const dim_t i_col = j_Sol * num_subblocks_Sol + l_col; |
2324 |
|
if (i_col < numMyCols) { |
2325 |
|
for (dim_t k = row_coupleBlock_ptr[i_row - numMyRows]; |
2326 |
|
k < row_coupleBlock_ptr[i_row - numMyRows + 1]; ++k) |
2327 |
|
{ |
2328 |
|
if (row_coupleBlock_index[k] == i_col) { |
2329 |
|
// entry array(k_Sol, j_Eq) is a block |
2330 |
|
// (row_block_size x col_block_size) |
2331 |
|
for (dim_t ic = 0; ic < col_block_size; ++ic) { |
2332 |
|
const dim_t i_Sol = ic + col_block_size * l_col; |
2333 |
|
for (dim_t ir = 0; ir < row_block_size; ++ir) { |
2334 |
|
const dim_t i_Eq = ir + row_block_size * l_row; |
2335 |
|
row_coupleBlock_val[k * block_size + ir + row_block_size * ic] += |
2336 |
|
array[INDEX4 |
2337 |
|
(i_Eq, i_Sol, k_Eq, k_Sol, num_Eq, num_Sol, NN_Eq)]; |
2338 |
|
} |
2339 |
|
} |
2340 |
|
break; |
2341 |
|
} |
2342 |
|
} |
2343 |
|
} |
2344 |
|
} |
2345 |
|
} |
2346 |
|
} |
2347 |
|
} |
2348 |
|
} |
2349 |
|
} |
2350 |
|
|
2351 |
} // end of namespace ripley |
} // end of namespace ripley |
2352 |
|
|