1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright 2003-2007 by ACceSS MNRF |
5 |
* Copyright 2007 by University of Queensland |
6 |
* |
7 |
* http://esscc.uq.edu.au |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
/* TFQMR iterations */ |
15 |
|
16 |
#include "SystemMatrix.h" |
17 |
#include "Paso.h" |
18 |
#include "Solver.h" |
19 |
#include "PasoUtil.h" |
20 |
|
21 |
#ifdef _OPENMP |
22 |
#include <omp.h> |
23 |
#endif |
24 |
|
25 |
#ifdef PASO_MPI |
26 |
#include <mpi.h> |
27 |
#endif |
28 |
|
29 |
/* |
30 |
* |
31 |
* Purpose |
32 |
* ======= |
33 |
* |
34 |
* TFQMR solves the linear system A*x = b |
35 |
* |
36 |
* Convergence test: norm( b - A*x )< TOL. |
37 |
* For other measures, see the above reference. |
38 |
* |
39 |
* Arguments |
40 |
* ========= |
41 |
* |
42 |
* r (input) DOUBLE PRECISION array, dimension N. |
43 |
* On entry, residual of inital guess x |
44 |
* |
45 |
* x (input/output) DOUBLE PRECISION array, dimension N. |
46 |
* On input, the initial guess. |
47 |
* |
48 |
* ITER (input/output) INT |
49 |
* On input, the maximum iterations to be performed. |
50 |
* On output, actual number of iterations performed. |
51 |
* |
52 |
* INFO (output) INT |
53 |
* |
54 |
* = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned. |
55 |
* = SOLVEr_MAXITER_REACHED |
56 |
* = SOLVER_INPUT_ERROR Illegal parameter: |
57 |
* = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller |
58 |
* = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller |
59 |
* |
60 |
* ============================================================== |
61 |
*/ |
62 |
|
63 |
/* #define PASO_DYNAMIC_SCHEDULING_MVM */ |
64 |
|
65 |
#if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP |
66 |
#define USE_DYNAMIC_SCHEDULING |
67 |
#endif |
68 |
|
69 |
err_t Paso_Solver_TFQMR( |
70 |
Paso_SystemMatrix * A, |
71 |
double * r, |
72 |
double * x, |
73 |
dim_t *iter, |
74 |
double * tolerance, |
75 |
Paso_Performance* pp) { |
76 |
|
77 |
/* Local variables */ |
78 |
|
79 |
int m=1; |
80 |
int j=0; |
81 |
|
82 |
dim_t num_iter=0,maxit; |
83 |
bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; |
84 |
err_t status = SOLVER_NO_ERROR; |
85 |
dim_t n = Paso_SystemMatrix_getTotalNumRows(A); |
86 |
double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *v_old=NULL, *tmp=NULL ; |
87 |
|
88 |
double eta,theta,tau,rho,beta,alpha,sigma,rhon,c; |
89 |
|
90 |
double norm_of_residual; |
91 |
|
92 |
/* */ |
93 |
/*-----------------------------------------------------------------*/ |
94 |
/* */ |
95 |
/* Start of Calculation : */ |
96 |
/* --------------------- */ |
97 |
/* */ |
98 |
/* */ |
99 |
u1=TMPMEMALLOC(n,double); |
100 |
u2=TMPMEMALLOC(n,double); |
101 |
y1=TMPMEMALLOC(n,double); |
102 |
y2=TMPMEMALLOC(n,double); |
103 |
d=TMPMEMALLOC(n,double); |
104 |
w=TMPMEMALLOC(n,double); |
105 |
v=TMPMEMALLOC(n,double); |
106 |
v_old=TMPMEMALLOC(n,double); |
107 |
|
108 |
|
109 |
tmp=TMPMEMALLOC(n,double); |
110 |
|
111 |
|
112 |
if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL || v_old==NULL) { |
113 |
status=SOLVER_MEMORY_ERROR; |
114 |
} |
115 |
|
116 |
maxit = *iter; |
117 |
|
118 |
/* Test the input parameters. */ |
119 |
if (n < 0 || maxit<=0 ) { |
120 |
status=SOLVER_INPUT_ERROR; |
121 |
} |
122 |
|
123 |
Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); |
124 |
Paso_Solver_solvePreconditioner(A,r,r); |
125 |
Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); |
126 |
|
127 |
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
128 |
|
129 |
Paso_zeroes(n,u2); |
130 |
Paso_zeroes(n,y2); |
131 |
|
132 |
Paso_Copy(n,w,r); |
133 |
Paso_Copy(n,y1,r); |
134 |
|
135 |
Paso_zeroes(n,d); |
136 |
|
137 |
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
138 |
Performance_startMonitor(pp,PERFORMANCE_MVM); |
139 |
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y1,ZERO,v); |
140 |
Performance_stopMonitor(pp,PERFORMANCE_MVM); |
141 |
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
142 |
|
143 |
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
144 |
Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); |
145 |
Paso_Solver_solvePreconditioner(A,v,v); |
146 |
Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); |
147 |
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
148 |
|
149 |
Paso_Copy(n,u1,v); |
150 |
|
151 |
theta = 0.0; |
152 |
eta = 0.0; |
153 |
|
154 |
tau = Paso_l2(n,r,A->mpi_info); |
155 |
|
156 |
rho = tau * tau; |
157 |
|
158 |
norm_of_residual=tau*sqrt ( m + 1 ); |
159 |
|
160 |
while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) )) |
161 |
{ |
162 |
|
163 |
|
164 |
sigma=Paso_InnerProduct(n,r,v,A->mpi_info); |
165 |
|
166 |
if (! (breakFlag = (ABS(sigma) == 0.))) { |
167 |
|
168 |
alpha = rho / sigma; |
169 |
|
170 |
for (j=0; j<=1; j=j+1) |
171 |
{ |
172 |
/* Compute y2 and u2 only if you have to */ |
173 |
if ( j == 1 ){ |
174 |
Paso_LinearCombination(n,y2,1.,y1,-alpha,v); |
175 |
|
176 |
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
177 |
Performance_startMonitor(pp,PERFORMANCE_MVM); |
178 |
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y2,ZERO,u2); |
179 |
Performance_stopMonitor(pp,PERFORMANCE_MVM); |
180 |
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
181 |
|
182 |
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
183 |
Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); |
184 |
Paso_Solver_solvePreconditioner(A,u2,u2); |
185 |
Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); |
186 |
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
187 |
} |
188 |
m = 2 * (num_iter+1) - 2 + (j+1); |
189 |
if (j==0) { |
190 |
Paso_Update(n,1.,w,-alpha,u1); |
191 |
Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1); |
192 |
} |
193 |
if (j==1) { |
194 |
Paso_Update(n,1.,w,-alpha,u2); |
195 |
Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2); |
196 |
} |
197 |
|
198 |
theta =Paso_l2(n,w,A->mpi_info)/tau; |
199 |
c = 1.0 / sqrt ( 1.0 + theta * theta ); |
200 |
tau = tau * theta * c; |
201 |
eta = c * c * alpha; |
202 |
Paso_Update(n,1.,x,eta,d); |
203 |
} |
204 |
|
205 |
breakFlag = (ABS(rho) == 0); |
206 |
|
207 |
rhon = Paso_InnerProduct(n,r,w,A->mpi_info); |
208 |
beta = rhon / rho; |
209 |
rho = rhon; |
210 |
|
211 |
Paso_LinearCombination(n,y1,1.,w,beta,y2); |
212 |
|
213 |
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
214 |
Performance_startMonitor(pp,PERFORMANCE_MVM); |
215 |
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y1,ZERO,u1); |
216 |
Performance_stopMonitor(pp,PERFORMANCE_MVM); |
217 |
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
218 |
|
219 |
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
220 |
Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); |
221 |
Paso_Solver_solvePreconditioner(A,u1,u1); |
222 |
Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); |
223 |
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
224 |
|
225 |
Paso_Copy(n,v_old,v); |
226 |
|
227 |
Paso_Update(n,beta,v_old,1,u2); |
228 |
Paso_LinearCombination(n,v,1.,u1,beta,v_old); |
229 |
} |
230 |
maxIterFlag = (num_iter > maxit); |
231 |
norm_of_residual=tau*sqrt ( m + 1 ); |
232 |
convergeFlag=(norm_of_residual<(*tolerance)); |
233 |
|
234 |
|
235 |
if (maxIterFlag) { |
236 |
status = SOLVER_MAXITER_REACHED; |
237 |
} else if (breakFlag) { |
238 |
status = SOLVER_BREAKDOWN; |
239 |
} |
240 |
++(num_iter); |
241 |
} |
242 |
/* end of iteration */ |
243 |
|
244 |
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
245 |
TMPMEMFREE(u1); |
246 |
TMPMEMFREE(u2); |
247 |
TMPMEMFREE(y1); |
248 |
TMPMEMFREE(y2); |
249 |
TMPMEMFREE(d); |
250 |
TMPMEMFREE(w); |
251 |
TMPMEMFREE(v); |
252 |
TMPMEMFREE(v_old); |
253 |
|
254 |
*iter=num_iter; |
255 |
*tolerance=norm_of_residual; |
256 |
|
257 |
/* End of TFQMR */ |
258 |
return status; |
259 |
} |