1 |
artak |
1703 |
|
2 |
|
|
/******************************************************* |
3 |
|
|
* |
4 |
|
|
* Copyright 2003-2007 by ACceSS MNRF |
5 |
|
|
* Copyright 2007 by University of Queensland |
6 |
|
|
* |
7 |
|
|
* http://esscc.uq.edu.au |
8 |
|
|
* Primary Business: Queensland, Australia |
9 |
|
|
* Licensed under the Open Software License version 3.0 |
10 |
|
|
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
|
|
* |
12 |
|
|
*******************************************************/ |
13 |
|
|
|
14 |
|
|
/* TFQMR iterations */ |
15 |
|
|
|
16 |
|
|
#include "SystemMatrix.h" |
17 |
|
|
#include "Paso.h" |
18 |
|
|
#include "Solver.h" |
19 |
|
|
#include "PasoUtil.h" |
20 |
|
|
|
21 |
|
|
#ifdef _OPENMP |
22 |
|
|
#include <omp.h> |
23 |
|
|
#endif |
24 |
|
|
|
25 |
|
|
#ifdef PASO_MPI |
26 |
|
|
#include <mpi.h> |
27 |
|
|
#endif |
28 |
|
|
|
29 |
|
|
/* |
30 |
|
|
* |
31 |
|
|
* Purpose |
32 |
|
|
* ======= |
33 |
|
|
* |
34 |
|
|
* TFQMR solves the linear system A*x = b |
35 |
|
|
* |
36 |
|
|
* Convergence test: norm( b - A*x )< TOL. |
37 |
|
|
* For other measures, see the above reference. |
38 |
|
|
* |
39 |
|
|
* Arguments |
40 |
|
|
* ========= |
41 |
|
|
* |
42 |
|
|
* r (input) DOUBLE PRECISION array, dimension N. |
43 |
|
|
* On entry, residual of inital guess x |
44 |
|
|
* |
45 |
|
|
* x (input/output) DOUBLE PRECISION array, dimension N. |
46 |
|
|
* On input, the initial guess. |
47 |
|
|
* |
48 |
|
|
* ITER (input/output) INT |
49 |
|
|
* On input, the maximum iterations to be performed. |
50 |
|
|
* On output, actual number of iterations performed. |
51 |
|
|
* |
52 |
|
|
* INFO (output) INT |
53 |
|
|
* |
54 |
|
|
* = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned. |
55 |
|
|
* = SOLVEr_MAXITER_REACHED |
56 |
|
|
* = SOLVER_INPUT_ERROR Illegal parameter: |
57 |
|
|
* = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller |
58 |
|
|
* = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller |
59 |
|
|
* |
60 |
|
|
* ============================================================== |
61 |
|
|
*/ |
62 |
|
|
|
63 |
|
|
/* #define PASO_DYNAMIC_SCHEDULING_MVM */ |
64 |
|
|
|
65 |
|
|
#if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP |
66 |
|
|
#define USE_DYNAMIC_SCHEDULING |
67 |
|
|
#endif |
68 |
|
|
|
69 |
|
|
err_t Paso_Solver_TFQMR( |
70 |
|
|
Paso_SystemMatrix * A, |
71 |
|
|
double * r, |
72 |
|
|
double * x, |
73 |
|
|
dim_t *iter, |
74 |
|
|
double * tolerance, |
75 |
|
|
Paso_Performance* pp) { |
76 |
|
|
|
77 |
|
|
/* Local variables */ |
78 |
artak |
1706 |
|
79 |
|
|
int m=1; |
80 |
|
|
int j=0; |
81 |
|
|
|
82 |
artak |
1703 |
dim_t num_iter=0,maxit; |
83 |
|
|
bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; |
84 |
|
|
err_t status = SOLVER_NO_ERROR; |
85 |
|
|
dim_t n = Paso_SystemMatrix_getTotalNumRows(A); |
86 |
|
|
double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *v_old=NULL, *tmp=NULL ; |
87 |
|
|
|
88 |
|
|
double eta,theta,tau,rho,beta,alpha,sigma,rhon,c; |
89 |
|
|
|
90 |
|
|
double norm_of_residual; |
91 |
artak |
1706 |
|
92 |
artak |
1703 |
/* */ |
93 |
|
|
/*-----------------------------------------------------------------*/ |
94 |
|
|
/* */ |
95 |
|
|
/* Start of Calculation : */ |
96 |
|
|
/* --------------------- */ |
97 |
|
|
/* */ |
98 |
|
|
/* */ |
99 |
|
|
u1=TMPMEMALLOC(n,double); |
100 |
|
|
u2=TMPMEMALLOC(n,double); |
101 |
|
|
y1=TMPMEMALLOC(n,double); |
102 |
|
|
y2=TMPMEMALLOC(n,double); |
103 |
|
|
d=TMPMEMALLOC(n,double); |
104 |
|
|
w=TMPMEMALLOC(n,double); |
105 |
|
|
v=TMPMEMALLOC(n,double); |
106 |
|
|
v_old=TMPMEMALLOC(n,double); |
107 |
|
|
|
108 |
|
|
|
109 |
|
|
tmp=TMPMEMALLOC(n,double); |
110 |
|
|
|
111 |
|
|
|
112 |
|
|
if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL || v_old==NULL) { |
113 |
|
|
status=SOLVER_MEMORY_ERROR; |
114 |
|
|
} |
115 |
|
|
|
116 |
|
|
maxit = *iter; |
117 |
|
|
|
118 |
|
|
/* Test the input parameters. */ |
119 |
|
|
if (n < 0 || maxit<=0 ) { |
120 |
|
|
status=SOLVER_INPUT_ERROR; |
121 |
|
|
} |
122 |
|
|
|
123 |
|
|
Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); |
124 |
|
|
Paso_Solver_solvePreconditioner(A,r,r); |
125 |
|
|
Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); |
126 |
|
|
|
127 |
|
|
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
128 |
|
|
|
129 |
|
|
Paso_zeroes(n,u2); |
130 |
|
|
Paso_zeroes(n,y2); |
131 |
|
|
|
132 |
|
|
Paso_Copy(n,w,r); |
133 |
|
|
Paso_Copy(n,y1,r); |
134 |
|
|
|
135 |
|
|
Paso_zeroes(n,d); |
136 |
|
|
|
137 |
|
|
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
138 |
|
|
Performance_startMonitor(pp,PERFORMANCE_MVM); |
139 |
|
|
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y1,ZERO,v); |
140 |
|
|
Performance_stopMonitor(pp,PERFORMANCE_MVM); |
141 |
|
|
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
142 |
|
|
|
143 |
|
|
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
144 |
|
|
Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); |
145 |
|
|
Paso_Solver_solvePreconditioner(A,v,v); |
146 |
|
|
Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); |
147 |
|
|
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
148 |
|
|
|
149 |
|
|
Paso_Copy(n,u1,v); |
150 |
|
|
|
151 |
|
|
theta = 0.0; |
152 |
|
|
eta = 0.0; |
153 |
|
|
|
154 |
|
|
tau = Paso_l2(n,r,A->mpi_info); |
155 |
|
|
|
156 |
|
|
rho = tau * tau; |
157 |
artak |
1706 |
|
158 |
artak |
1703 |
norm_of_residual=tau*sqrt ( m + 1 ); |
159 |
|
|
|
160 |
|
|
while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) )) |
161 |
|
|
{ |
162 |
|
|
|
163 |
|
|
|
164 |
|
|
sigma=Paso_InnerProduct(n,r,v,A->mpi_info); |
165 |
|
|
|
166 |
|
|
if (! (breakFlag = (ABS(sigma) == 0.))) { |
167 |
|
|
|
168 |
|
|
alpha = rho / sigma; |
169 |
|
|
|
170 |
|
|
for (j=0; j<=1; j=j+1) |
171 |
|
|
{ |
172 |
|
|
/* Compute y2 and u2 only if you have to */ |
173 |
|
|
if ( j == 1 ){ |
174 |
|
|
Paso_LinearCombination(n,y2,1.,y1,-alpha,v); |
175 |
|
|
|
176 |
|
|
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
177 |
|
|
Performance_startMonitor(pp,PERFORMANCE_MVM); |
178 |
|
|
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y2,ZERO,u2); |
179 |
|
|
Performance_stopMonitor(pp,PERFORMANCE_MVM); |
180 |
|
|
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
181 |
|
|
|
182 |
|
|
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
183 |
|
|
Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); |
184 |
|
|
Paso_Solver_solvePreconditioner(A,u2,u2); |
185 |
|
|
Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); |
186 |
|
|
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
187 |
|
|
} |
188 |
|
|
m = 2 * (num_iter+1) - 2 + (j+1); |
189 |
|
|
if (j==0) { |
190 |
|
|
Paso_Update(n,1.,w,-alpha,u1); |
191 |
|
|
Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1); |
192 |
|
|
} |
193 |
|
|
if (j==1) { |
194 |
|
|
Paso_Update(n,1.,w,-alpha,u2); |
195 |
|
|
Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2); |
196 |
|
|
} |
197 |
|
|
|
198 |
|
|
theta =Paso_l2(n,w,A->mpi_info)/tau; |
199 |
|
|
c = 1.0 / sqrt ( 1.0 + theta * theta ); |
200 |
|
|
tau = tau * theta * c; |
201 |
|
|
eta = c * c * alpha; |
202 |
|
|
Paso_Update(n,1.,x,eta,d); |
203 |
|
|
} |
204 |
|
|
|
205 |
|
|
breakFlag = (ABS(rho) == 0); |
206 |
|
|
|
207 |
|
|
rhon = Paso_InnerProduct(n,r,w,A->mpi_info); |
208 |
|
|
beta = rhon / rho; |
209 |
|
|
rho = rhon; |
210 |
|
|
|
211 |
|
|
Paso_LinearCombination(n,y1,1.,w,beta,y2); |
212 |
|
|
|
213 |
|
|
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
214 |
|
|
Performance_startMonitor(pp,PERFORMANCE_MVM); |
215 |
|
|
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y1,ZERO,u1); |
216 |
|
|
Performance_stopMonitor(pp,PERFORMANCE_MVM); |
217 |
|
|
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
218 |
|
|
|
219 |
|
|
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
220 |
|
|
Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); |
221 |
|
|
Paso_Solver_solvePreconditioner(A,u1,u1); |
222 |
|
|
Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); |
223 |
|
|
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
224 |
|
|
|
225 |
|
|
Paso_Copy(n,v_old,v); |
226 |
|
|
|
227 |
|
|
Paso_Update(n,beta,v_old,1,u2); |
228 |
|
|
Paso_LinearCombination(n,v,1.,u1,beta,v_old); |
229 |
|
|
} |
230 |
|
|
maxIterFlag = (num_iter > maxit); |
231 |
|
|
norm_of_residual=tau*sqrt ( m + 1 ); |
232 |
|
|
convergeFlag=(norm_of_residual<(*tolerance)); |
233 |
|
|
|
234 |
|
|
|
235 |
|
|
if (maxIterFlag) { |
236 |
|
|
status = SOLVER_MAXITER_REACHED; |
237 |
|
|
} else if (breakFlag) { |
238 |
|
|
status = SOLVER_BREAKDOWN; |
239 |
|
|
} |
240 |
|
|
++(num_iter); |
241 |
|
|
} |
242 |
|
|
/* end of iteration */ |
243 |
|
|
|
244 |
|
|
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
245 |
|
|
TMPMEMFREE(u1); |
246 |
|
|
TMPMEMFREE(u2); |
247 |
|
|
TMPMEMFREE(y1); |
248 |
|
|
TMPMEMFREE(y2); |
249 |
|
|
TMPMEMFREE(d); |
250 |
|
|
TMPMEMFREE(w); |
251 |
|
|
TMPMEMFREE(v); |
252 |
|
|
TMPMEMFREE(v_old); |
253 |
|
|
|
254 |
|
|
*iter=num_iter; |
255 |
|
|
*tolerance=norm_of_residual; |
256 |
|
|
|
257 |
|
|
/* End of TFQMR */ |
258 |
|
|
return status; |
259 |
ksteube |
1708 |
} |