1 |
gross |
1565 |
NUM_THREADS=8 |
2 |
gross |
1564 |
import os |
3 |
|
|
TEST_STR="timing: per iteration step:" |
4 |
|
|
REPEAT=10 |
5 |
|
|
HEADER="""from esys.escript import * |
6 |
|
|
from esys.finley import Rectangle,Brick |
7 |
|
|
from esys.escript.linearPDEs import LinearPDE |
8 |
|
|
SOLVER_TOL=1.e-4 |
9 |
|
|
REL_TOL=1.e-1 |
10 |
|
|
OPTIMIZE=False |
11 |
|
|
SOLVER_VERBOSE=True |
12 |
|
|
FAC_DIAG=1. |
13 |
|
|
FAC_OFFDIAG=-0.4 |
14 |
|
|
|
15 |
|
|
setNumberOfThreads(%d) |
16 |
|
|
""" |
17 |
|
|
|
18 |
|
|
DOM_2_1="dom=Rectangle(NE,NE,order=1, useFullElementOrder=False,optimize=OPTIMIZE)" |
19 |
|
|
DOM_2_2="dom=Rectangle(NE,NE,order=2, useFullElementOrder=False,optimize=OPTIMIZE)" |
20 |
|
|
DOM_3_1="dom=Brick(NE,NE,NE,order=1, useFullElementOrder=True,optimize=OPTIMIZE)" |
21 |
|
|
DOM_3_2="dom=Brick(NE,NE,NE,order=2, useFullElementOrder=True,optimize=OPTIMIZE)" |
22 |
|
|
|
23 |
|
|
TEST_2_s="""x=Solution(dom).getX() |
24 |
|
|
u_ex=Scalar(0,Solution(dom)) |
25 |
|
|
u_ex=1.+2.*x[0]+3.*x[1] |
26 |
|
|
g_ex=Data(0.,(2,),Solution(dom)) |
27 |
|
|
g_ex[0]=2. |
28 |
|
|
g_ex[1]=3. |
29 |
|
|
pde=LinearPDE(dom,numEquations=1) |
30 |
|
|
mask=whereZero(x[0]) |
31 |
|
|
pde.setValue(r=u_ex,q=mask) |
32 |
|
|
pde.setValue(A=kronecker(2),y=inner(g_ex,dom.getNormal())) |
33 |
|
|
""" |
34 |
|
|
TEST_2_v="""x=Solution(dom).getX() |
35 |
|
|
x=Solution(dom).getX() |
36 |
|
|
u_ex=Vector(0,Solution(dom)) |
37 |
|
|
u_ex[0]=1.+2.*x[0]+3.*x[1] |
38 |
|
|
u_ex[1]=-1.+3.*x[0]+2.*x[1] |
39 |
|
|
g_ex=Data(0.,(2,2),Solution(dom)) |
40 |
|
|
g_ex[0,0]=2. |
41 |
|
|
g_ex[0,1]=3. |
42 |
|
|
g_ex[1,0]=3. |
43 |
|
|
g_ex[1,1]=2. |
44 |
|
|
pde=LinearPDE(dom,numEquations=2) |
45 |
|
|
mask=whereZero(x[0]) |
46 |
|
|
pde.setValue(r=u_ex,q=mask*numarray.ones(2,)) |
47 |
|
|
A=Tensor4(0,Function(dom)) |
48 |
|
|
A[0,:,0,:]=kronecker(2) |
49 |
|
|
A[1,:,1,:]=kronecker(2) |
50 |
|
|
Y=Vector(0.,Function(dom)) |
51 |
|
|
Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG |
52 |
|
|
Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG |
53 |
|
|
pde.setValue(A=A, D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numarray.ones((2,2))*FAC_OFFDIAG, Y=Y, y=matrixmult(g_ex,dom.getNormal())) |
54 |
|
|
""" |
55 |
|
|
|
56 |
|
|
TEST_3_s="""x=Solution(dom).getX() |
57 |
|
|
u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2] |
58 |
|
|
g_ex=Data(0.,(3,),Solution(dom)) |
59 |
|
|
g_ex[0]=2. |
60 |
|
|
g_ex[1]=3. |
61 |
|
|
g_ex[2]=4. |
62 |
|
|
pde=LinearPDE(dom,numEquations=1) |
63 |
|
|
mask=whereZero(x[0]) |
64 |
|
|
pde.setValue(r=u_ex,q=mask) |
65 |
|
|
pde.setValue(A=kronecker(3),y=inner(g_ex,dom.getNormal())) |
66 |
|
|
""" |
67 |
|
|
|
68 |
|
|
TEST_3_v="""x=Solution(dom).getX() |
69 |
|
|
u_ex=Vector(0,Solution(dom)) |
70 |
|
|
u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2] |
71 |
|
|
u_ex[1]=-1.+4.*x[0]+1.*x[1]-2.*x[2] |
72 |
|
|
u_ex[2]=5.+8.*x[0]+4.*x[1]+5.*x[2] |
73 |
|
|
g_ex=Data(0.,(3,3),Solution(dom)) |
74 |
|
|
g_ex[0,0]=2. |
75 |
|
|
g_ex[0,1]=3. |
76 |
|
|
g_ex[0,2]=4. |
77 |
|
|
g_ex[1,0]=4. |
78 |
|
|
g_ex[1,1]=1. |
79 |
|
|
g_ex[1,2]=-2. |
80 |
|
|
g_ex[2,0]=8. |
81 |
|
|
g_ex[2,1]=4. |
82 |
|
|
g_ex[2,2]=5. |
83 |
|
|
pde=LinearPDE(dom,numEquations=3) |
84 |
|
|
mask=whereZero(x[0]) |
85 |
|
|
pde.setValue(r=u_ex,q=mask*numarray.ones(3,)) |
86 |
|
|
A=Tensor4(0,Function(dom)) |
87 |
|
|
A[0,:,0,:]=kronecker(3) |
88 |
|
|
A[1,:,1,:]=kronecker(3) |
89 |
|
|
A[2,:,2,:]=kronecker(3) |
90 |
|
|
Y=Vector(0.,Function(dom)) |
91 |
|
|
Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG |
92 |
|
|
Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG |
93 |
|
|
Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG |
94 |
|
|
pde.setValue(A=A, |
95 |
|
|
D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numarray.ones((3,3))*FAC_OFFDIAG, |
96 |
|
|
Y=Y, |
97 |
|
|
y=matrixmult(g_ex,dom.getNormal())) |
98 |
|
|
""" |
99 |
|
|
|
100 |
|
|
SOLVE_AND_TEST="""pde.setTolerance(SOLVER_TOL) |
101 |
|
|
pde.setSolverMethod(pde.PCG,pde.JACOBI) |
102 |
|
|
pde.setSolverPackage(pde.PASO) |
103 |
|
|
u=pde.getSolution(verbose=SOLVER_VERBOSE) |
104 |
|
|
error=Lsup(u-u_ex)/Lsup(u_ex) |
105 |
|
|
if error>REL_TOL*Lsup(u_ex): raise RuntimeError("solution error %s is too big."%error) |
106 |
|
|
""" |
107 |
|
|
|
108 |
|
|
|
109 |
gross |
1565 |
for n in [1000, 10000, 50000, 100000]: |
110 |
|
|
# for n in [1000, 10000]: |
111 |
gross |
1564 |
for prop in [ (1,2), (2,2), (1,3), (2,3) ]: |
112 |
|
|
for tp in [ "s", "v" ]: |
113 |
|
|
# create code: |
114 |
|
|
prog=HEADER%NUM_THREADS |
115 |
|
|
dim=prop[1] |
116 |
|
|
if isinstance(prop[0], int): |
117 |
|
|
o=prop[0] |
118 |
|
|
NE=int(float(n)**(1./dim)/o)+1 |
119 |
|
|
prog+="NE=%d\n"%NE |
120 |
|
|
if dim==2: |
121 |
|
|
if o==1: |
122 |
|
|
prog+=DOM_2_1 |
123 |
|
|
else: |
124 |
|
|
prog+=DOM_2_2 |
125 |
|
|
else: |
126 |
|
|
if o==1: |
127 |
|
|
prog+=DOM_3_1 |
128 |
|
|
else: |
129 |
|
|
prog+=DOM_3_2 |
130 |
|
|
prog+="\n" |
131 |
|
|
if dim==2: |
132 |
|
|
if tp =="s": |
133 |
|
|
prog+=TEST_2_s |
134 |
|
|
else: |
135 |
|
|
prog+=TEST_2_v |
136 |
|
|
else: |
137 |
|
|
if tp =="s": |
138 |
|
|
prog+=TEST_3_s |
139 |
|
|
else: |
140 |
|
|
prog+=TEST_3_v |
141 |
|
|
print "l= %d, dim= %d, type=%s, order=%s"%((o*NE+1)**dim,dim,tp,o) |
142 |
|
|
|
143 |
|
|
prog+=SOLVE_AND_TEST |
144 |
|
|
# run code: |
145 |
|
|
print >> file("__prog","w"), prog |
146 |
gross |
1565 |
for CHUNK in [-1,10,100,1000,10000, 100000]: |
147 |
|
|
if CHUNK <= n: |
148 |
gross |
1564 |
time_per_iter=0 |
149 |
|
|
for i in range(REPEAT): |
150 |
|
|
os.system("export OMP_NUM_THREADS=%d;export PASO_CHUNK_SIZE_MVM=%d; python __prog > __out;"%(NUM_THREADS,CHUNK)) |
151 |
|
|
out=file("__out","r").read() |
152 |
|
|
for i in out.split("\n"): |
153 |
|
|
if i.startswith(TEST_STR): time_per_iter+=float(i[len(TEST_STR):-3].strip()) |
154 |
|
|
print CHUNK,time_per_iter/REPEAT |
155 |
|
|
|