1 |
NUM_THREADS=8 |
2 |
import os |
3 |
TEST_STR="timing: per iteration step:" |
4 |
REPEAT=10 |
5 |
HEADER="""from esys.escript import * |
6 |
from esys.finley import Rectangle,Brick |
7 |
from esys.escript.linearPDEs import LinearPDE |
8 |
SOLVER_TOL=1.e-2 |
9 |
REL_TOL=1. |
10 |
OPTIMIZE=False |
11 |
SOLVER_VERBOSE=True |
12 |
FAC_DIAG=1. |
13 |
FAC_OFFDIAG=-0.4 |
14 |
|
15 |
setNumberOfThreads(%d) |
16 |
""" |
17 |
|
18 |
DOM_2_1="dom=Rectangle(NE,NE,order=1, useFullElementOrder=False,optimize=OPTIMIZE)" |
19 |
DOM_2_2="dom=Rectangle(NE,NE,order=2, useFullElementOrder=False,optimize=OPTIMIZE)" |
20 |
DOM_3_1="dom=Brick(NE,NE,NE,order=1, useFullElementOrder=True,optimize=OPTIMIZE)" |
21 |
DOM_3_2="dom=Brick(NE,NE,NE,order=2, useFullElementOrder=True,optimize=OPTIMIZE)" |
22 |
|
23 |
TEST_2_s="""x=Solution(dom).getX() |
24 |
u_ex=Scalar(0,Solution(dom)) |
25 |
u_ex=1.+2.*x[0]+3.*x[1] |
26 |
g_ex=Data(0.,(2,),Solution(dom)) |
27 |
g_ex[0]=2. |
28 |
g_ex[1]=3. |
29 |
pde=LinearPDE(dom,numEquations=1) |
30 |
mask=whereZero(x[0]) |
31 |
pde.setValue(r=u_ex,q=mask) |
32 |
pde.setValue(A=kronecker(2),y=inner(g_ex,dom.getNormal())) |
33 |
""" |
34 |
TEST_2_v="""x=Solution(dom).getX() |
35 |
x=Solution(dom).getX() |
36 |
u_ex=Vector(0,Solution(dom)) |
37 |
u_ex[0]=1.+2.*x[0]+3.*x[1] |
38 |
u_ex[1]=-1.+3.*x[0]+2.*x[1] |
39 |
g_ex=Data(0.,(2,2),Solution(dom)) |
40 |
g_ex[0,0]=2. |
41 |
g_ex[0,1]=3. |
42 |
g_ex[1,0]=3. |
43 |
g_ex[1,1]=2. |
44 |
pde=LinearPDE(dom,numEquations=2) |
45 |
mask=whereZero(x[0]) |
46 |
pde.setValue(r=u_ex,q=mask*numarray.ones(2,)) |
47 |
A=Tensor4(0,Function(dom)) |
48 |
A[0,:,0,:]=kronecker(2) |
49 |
A[1,:,1,:]=kronecker(2) |
50 |
Y=Vector(0.,Function(dom)) |
51 |
Y[0]=u_ex[0]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG |
52 |
Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG |
53 |
pde.setValue(A=A, D=kronecker(2)*(FAC_DIAG-FAC_OFFDIAG)+numarray.ones((2,2))*FAC_OFFDIAG, Y=Y, y=matrixmult(g_ex,dom.getNormal())) |
54 |
""" |
55 |
|
56 |
TEST_3_s="""x=Solution(dom).getX() |
57 |
u_ex=1.+2.*x[0]+3.*x[1]+4.*x[2] |
58 |
g_ex=Data(0.,(3,),Solution(dom)) |
59 |
g_ex[0]=2. |
60 |
g_ex[1]=3. |
61 |
g_ex[2]=4. |
62 |
pde=LinearPDE(dom,numEquations=1) |
63 |
mask=whereZero(x[0]) |
64 |
pde.setValue(r=u_ex,q=mask) |
65 |
pde.setValue(A=kronecker(3),y=inner(g_ex,dom.getNormal())) |
66 |
""" |
67 |
|
68 |
TEST_3_v="""x=Solution(dom).getX() |
69 |
u_ex=Vector(0,Solution(dom)) |
70 |
u_ex[0]=1.+2.*x[0]+3.*x[1]+4.*x[2] |
71 |
u_ex[1]=-1.+4.*x[0]+1.*x[1]-2.*x[2] |
72 |
u_ex[2]=5.+8.*x[0]+4.*x[1]+5.*x[2] |
73 |
g_ex=Data(0.,(3,3),Solution(dom)) |
74 |
g_ex[0,0]=2. |
75 |
g_ex[0,1]=3. |
76 |
g_ex[0,2]=4. |
77 |
g_ex[1,0]=4. |
78 |
g_ex[1,1]=1. |
79 |
g_ex[1,2]=-2. |
80 |
g_ex[2,0]=8. |
81 |
g_ex[2,1]=4. |
82 |
g_ex[2,2]=5. |
83 |
pde=LinearPDE(dom,numEquations=3) |
84 |
mask=whereZero(x[0]) |
85 |
pde.setValue(r=u_ex,q=mask*numarray.ones(3,)) |
86 |
A=Tensor4(0,Function(dom)) |
87 |
A[0,:,0,:]=kronecker(3) |
88 |
A[1,:,1,:]=kronecker(3) |
89 |
A[2,:,2,:]=kronecker(3) |
90 |
Y=Vector(0.,Function(dom)) |
91 |
Y[0]=u_ex[0]*FAC_DIAG+u_ex[2]*FAC_OFFDIAG+u_ex[1]*FAC_OFFDIAG |
92 |
Y[1]=u_ex[1]*FAC_DIAG+u_ex[0]*FAC_OFFDIAG+u_ex[2]*FAC_OFFDIAG |
93 |
Y[2]=u_ex[2]*FAC_DIAG+u_ex[1]*FAC_OFFDIAG+u_ex[0]*FAC_OFFDIAG |
94 |
pde.setValue(A=A, |
95 |
D=kronecker(3)*(FAC_DIAG-FAC_OFFDIAG)+numarray.ones((3,3))*FAC_OFFDIAG, |
96 |
Y=Y, |
97 |
y=matrixmult(g_ex,dom.getNormal())) |
98 |
""" |
99 |
|
100 |
SOLVE_AND_TEST="""pde.setTolerance(SOLVER_TOL) |
101 |
pde.setSolverMethod(pde.PCG,pde.JACOBI) |
102 |
pde.setSolverPackage(pde.PASO) |
103 |
u=pde.getSolution(verbose=SOLVER_VERBOSE) |
104 |
error=Lsup(u-u_ex)/Lsup(u_ex) |
105 |
if error>REL_TOL*Lsup(u_ex): raise RuntimeError("solution error %s is too big."%error) |
106 |
""" |
107 |
|
108 |
|
109 |
#for n in [10000, 50000, 100000]: |
110 |
for n in [100000]: |
111 |
# for n in [1000, 10000]: |
112 |
#for prop in [ (1,2), (2,2), (1,3), (2,3) ]: |
113 |
for prop in [ (1,2), (1,3) ]: |
114 |
for tp in [ "s", "v" ]: |
115 |
# create code: |
116 |
prog=HEADER%NUM_THREADS |
117 |
dim=prop[1] |
118 |
if isinstance(prop[0], int): |
119 |
o=prop[0] |
120 |
if tp=="s": |
121 |
q=1 |
122 |
else: |
123 |
q=dim |
124 |
NE=int(float(n/q-1)**(1./dim)/o+0.5) |
125 |
prog+="NE=%d\n"%NE |
126 |
if dim==2: |
127 |
if o==1: |
128 |
prog+=DOM_2_1 |
129 |
else: |
130 |
prog+=DOM_2_2 |
131 |
else: |
132 |
if o==1: |
133 |
prog+=DOM_3_1 |
134 |
else: |
135 |
prog+=DOM_3_2 |
136 |
prog+="\n" |
137 |
if dim==2: |
138 |
if tp =="s": |
139 |
prog+=TEST_2_s |
140 |
else: |
141 |
prog+=TEST_2_v |
142 |
else: |
143 |
if tp =="s": |
144 |
prog+=TEST_3_s |
145 |
else: |
146 |
prog+=TEST_3_v |
147 |
print "l= %d, dim= %d, type=%s, order=%s"%(q*(o*NE+1)**dim,dim,tp,o) |
148 |
|
149 |
prog+=SOLVE_AND_TEST |
150 |
# run code: |
151 |
print >> file("__prog","w"), prog |
152 |
# activate for dynamic |
153 |
# for CHUNK in [1,10,100,1000,10000, 100000]: |
154 |
# for CHUNK_PCG in [1,10,100,1000,10000, 100000]: |
155 |
# activate for static |
156 |
for CHUNK in [-1]: |
157 |
for CHUNK_PCG in [-1]: |
158 |
if CHUNK*NUM_THREADS <= n and CHUNK_PCG*NUM_THREADS <=n: |
159 |
time_per_iter=0 |
160 |
for i in range(REPEAT): |
161 |
os.system("export OMP_NUM_THREADS=%d;export PASO_CHUNK_SIZE_MVM=%d; export PASO_CHUNK_SIZE_PCG=%d; python __prog > __out;"%(NUM_THREADS,CHUNK,CHUNK_PCG)) |
162 |
out=file("__out","r").read() |
163 |
for i in out.split("\n"): |
164 |
if i.startswith(TEST_STR): time_per_iter+=float(i[len(TEST_STR):-3].strip()) |
165 |
print CHUNK,CHUNK_PCG,time_per_iter/REPEAT |
166 |
|