1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright 2003-2007 by ACceSS MNRF |
5 |
* Copyright 2007 by University of Queensland |
6 |
* |
7 |
* http://esscc.uq.edu.au |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
/* TFQMR iterations */ |
15 |
|
16 |
#include "SystemMatrix.h" |
17 |
#include "Paso.h" |
18 |
#include "Solver.h" |
19 |
#include "PasoUtil.h" |
20 |
|
21 |
#ifdef _OPENMP |
22 |
#include <omp.h> |
23 |
#endif |
24 |
|
25 |
#ifdef PASO_MPI |
26 |
#include <mpi.h> |
27 |
#endif |
28 |
|
29 |
/* |
30 |
* |
31 |
* Purpose |
32 |
* ======= |
33 |
* |
34 |
* MINRES solves the linear system A*x = b |
35 |
* |
36 |
* Convergence test: norm( b - A*x )< TOL. |
37 |
* For other measures, see the above reference. |
38 |
* |
39 |
* Arguments |
40 |
* ========= |
41 |
* |
42 |
* r (input) DOUBLE PRECISION array, dimension N. |
43 |
* On entry, residual of inital guess x |
44 |
* |
45 |
* x (input/output) DOUBLE PRECISION array, dimension N. |
46 |
* On input, the initial guess. |
47 |
* |
48 |
* ITER (input/output) INT |
49 |
* On input, the maximum iterations to be performed. |
50 |
* On output, actual number of iterations performed. |
51 |
* |
52 |
* INFO (output) INT |
53 |
* |
54 |
* = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned. |
55 |
* = SOLVER_MAXITER_REACHED |
56 |
* = SOLVER_INPUT_ERROR Illegal parameter: |
57 |
* = SOLVER_MEMORY_ERROR : |
58 |
* = SOLVER_NEGATIVE_NORM_ERROR : |
59 |
* |
60 |
* ============================================================== |
61 |
*/ |
62 |
|
63 |
/* #define PASO_DYNAMIC_SCHEDULING_MVM */ |
64 |
|
65 |
#if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP |
66 |
#define USE_DYNAMIC_SCHEDULING |
67 |
#endif |
68 |
|
69 |
err_t Paso_Solver_MINRES( |
70 |
Paso_SystemMatrix * A, |
71 |
double * r, |
72 |
double * x, |
73 |
dim_t *iter, |
74 |
double *tol, |
75 |
double * tolerance, |
76 |
Paso_Performance* pp) { |
77 |
|
78 |
/* Local variables */ |
79 |
|
80 |
dim_t num_iter=0,maxit; |
81 |
bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; |
82 |
err_t status = SOLVER_NO_ERROR; |
83 |
dim_t n = Paso_SystemMatrix_getTotalNumRows(A); |
84 |
double *w=NULL, *w1=NULL, *w2=NULL, *r1=NULL, *r2=NULL, *y=NULL, *v=NULL; |
85 |
|
86 |
double Anorm,ynorm,oldb,dbar,epsln,phibar,rhs1,rhs2,rnorm,tnorm2,ynorm2,cs,sn,eps,s,alfa,denom,z,beta1,beta; |
87 |
double gmax,gmin,oldeps,delta,gbar,gamma,phi; |
88 |
|
89 |
double norm_of_residual; |
90 |
|
91 |
/* */ |
92 |
/*-----------------------------------------------------------------*/ |
93 |
/* */ |
94 |
/* Start of Calculation : */ |
95 |
/* --------------------- */ |
96 |
/* */ |
97 |
/* */ |
98 |
w=TMPMEMALLOC(n,double); |
99 |
w1=TMPMEMALLOC(n,double); |
100 |
w2=TMPMEMALLOC(n,double); |
101 |
r1=TMPMEMALLOC(n,double); |
102 |
r2=TMPMEMALLOC(n,double); |
103 |
y=TMPMEMALLOC(n,double); |
104 |
v=TMPMEMALLOC(n,double); |
105 |
|
106 |
|
107 |
if (w ==NULL || w1== NULL || w2== NULL || r1 == NULL || r2== NULL || y==NULL || v==NULL ) { |
108 |
status=SOLVER_MEMORY_ERROR; |
109 |
} |
110 |
|
111 |
maxit = *iter; |
112 |
|
113 |
/* Test the input parameters. */ |
114 |
if (n < 0 || maxit<=0 ) { |
115 |
status=SOLVER_INPUT_ERROR; |
116 |
} |
117 |
|
118 |
Paso_Copy(n,r1,r); |
119 |
|
120 |
Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); |
121 |
Paso_Solver_solvePreconditioner(A,y,r1); |
122 |
Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); |
123 |
|
124 |
beta1=Paso_InnerProduct(n,r1,y,A->mpi_info); |
125 |
if (beta1<0) { |
126 |
status=SOLVER_NEGATIVE_NORM_ERROR; |
127 |
} |
128 |
|
129 |
if (beta1>0) { |
130 |
beta1=sqrt(beta1); |
131 |
} |
132 |
|
133 |
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
134 |
|
135 |
Paso_zeroes(n,w); |
136 |
Paso_zeroes(n,w2); |
137 |
|
138 |
Paso_Copy(n,r2,r1); |
139 |
|
140 |
Anorm = 0; |
141 |
ynorm = 0; |
142 |
oldb = 0; |
143 |
beta = beta1; |
144 |
dbar = 0; |
145 |
epsln = 0; |
146 |
phibar = beta1; |
147 |
rhs1 = beta1; |
148 |
rhs2 = 0; |
149 |
rnorm = phibar; |
150 |
tnorm2 = 0; |
151 |
ynorm2 = 0; |
152 |
cs = -1; |
153 |
sn = 0; |
154 |
eps = 0.0001; |
155 |
|
156 |
while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) )) |
157 |
{ |
158 |
|
159 |
s=1/beta; |
160 |
Paso_Update(n, 0., v, s, y); |
161 |
|
162 |
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
163 |
Performance_startMonitor(pp,PERFORMANCE_MVM); |
164 |
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, v,ZERO,y); |
165 |
Performance_stopMonitor(pp,PERFORMANCE_MVM); |
166 |
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
167 |
|
168 |
if (num_iter >= 1) { |
169 |
Paso_Update(n, 1., y, -(beta/oldb), r1); |
170 |
} |
171 |
|
172 |
alfa = Paso_InnerProduct(n,v,y,A->mpi_info); |
173 |
Paso_Update(n, 1., y, -(alfa/beta), r2); |
174 |
Paso_Copy(n,r1,r2); |
175 |
Paso_Copy(n,r2,y); |
176 |
|
177 |
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
178 |
Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); |
179 |
Paso_Solver_solvePreconditioner(A,y,r2); |
180 |
Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); |
181 |
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
182 |
|
183 |
oldb = beta; |
184 |
beta = Paso_InnerProduct(n,y,r2,A->mpi_info); |
185 |
if (beta<0) { |
186 |
status=SOLVER_NEGATIVE_NORM_ERROR; |
187 |
} |
188 |
|
189 |
beta = sqrt( beta ); |
190 |
tnorm2 = tnorm2 + alfa*alfa + oldb*oldb + beta*beta; |
191 |
|
192 |
if (num_iter==0) { |
193 |
gmax = ABS(alfa); |
194 |
gmin = gmax; |
195 |
} |
196 |
|
197 |
/* Apply previous rotation Qk-1 to get |
198 |
[deltak epslnk+1] = [cs sn][dbark 0 ] |
199 |
[gbar k dbar k+1] [sn -cs][alfak betak+1]. */ |
200 |
|
201 |
oldeps = epsln; |
202 |
delta = cs * dbar + sn * alfa ; |
203 |
gbar = sn * dbar - cs * alfa ; |
204 |
epsln = sn * beta ; |
205 |
dbar = - cs * beta; |
206 |
|
207 |
gamma = sqrt(gbar*gbar+beta*beta) ; |
208 |
gamma = MAX(gamma,eps) ; |
209 |
cs = gbar / gamma ; |
210 |
sn = beta / gamma ; |
211 |
phi = cs * phibar ; |
212 |
phibar = sn * phibar ; |
213 |
|
214 |
/* Update x. */ |
215 |
|
216 |
denom = 1/gamma ; |
217 |
Paso_Copy(n,w1,w2); |
218 |
Paso_Copy(n,w2,w); |
219 |
|
220 |
Paso_LinearCombination(n,w,denom,v,-(denom*oldeps),w1); |
221 |
Paso_Update(n, 1., w, -(delta*denom), w2) ; |
222 |
Paso_Update(n, 1., x, phi, w) ; |
223 |
|
224 |
/* Go round again. */ |
225 |
|
226 |
gmax = MAX(gmax,gamma); |
227 |
gmin = MIN(gmin,gamma); |
228 |
z = rhs1 / gamma; |
229 |
ynorm2 = z*z + ynorm2; |
230 |
rhs1 = rhs2 - delta*z; |
231 |
rhs2 = - epsln*z; |
232 |
|
233 |
Anorm = sqrt( tnorm2 ) ; |
234 |
ynorm = sqrt( ynorm2 ) ; |
235 |
|
236 |
rnorm = phibar; |
237 |
|
238 |
maxIterFlag = (num_iter > maxit); |
239 |
norm_of_residual=rnorm; |
240 |
convergeFlag=(norm_of_residual<Anorm*ynorm*(*tolerance)); |
241 |
|
242 |
|
243 |
if (maxIterFlag) { |
244 |
status = SOLVER_MAXITER_REACHED; |
245 |
} else if (breakFlag) { |
246 |
status = SOLVER_BREAKDOWN; |
247 |
} |
248 |
++(num_iter); |
249 |
/*printf("residual norm %.10f < %.10f %.10f %.10f \n",rnorm,Anorm*ynorm*(*tolerance), Anorm*ynorm, (*tolerance));*/ |
250 |
} |
251 |
/* end of iteration */ |
252 |
|
253 |
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
254 |
TMPMEMFREE(w); |
255 |
TMPMEMFREE(w1); |
256 |
TMPMEMFREE(w2); |
257 |
TMPMEMFREE(r1); |
258 |
TMPMEMFREE(r2); |
259 |
TMPMEMFREE(y); |
260 |
TMPMEMFREE(v); |
261 |
|
262 |
*iter=num_iter; |
263 |
*tol=norm_of_residual; |
264 |
|
265 |
/* End of MINRES */ |
266 |
return status; |
267 |
} |