40 |
* Arguments |
* Arguments |
41 |
* ========= |
* ========= |
42 |
* |
* |
43 |
* r (input) DOUBLE PRECISION array, dimension N. |
* R (input) DOUBLE PRECISION array, dimension N. |
44 |
* On entry, residual of inital guess x |
* On entry, residual of inital guess x |
45 |
* |
* |
46 |
* x (input/output) DOUBLE PRECISION array, dimension N. |
* X (input/output) DOUBLE PRECISION array, dimension N. |
47 |
* On input, the initial guess. |
* On input, the initial guess. |
48 |
* |
* |
49 |
* ITER (input/output) INT |
* ITER (input/output) INT |
61 |
* ============================================================== |
* ============================================================== |
62 |
*/ |
*/ |
63 |
|
|
|
/* #define PASO_DYNAMIC_SCHEDULING_MVM */ |
|
64 |
|
|
|
#if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP |
|
|
#define USE_DYNAMIC_SCHEDULING |
|
|
#endif |
|
65 |
|
|
66 |
err_t Paso_Solver_MINRES( |
err_t Paso_Solver_MINRES( |
67 |
Paso_SystemMatrix * A, |
Paso_SystemMatrix * A, |
68 |
double * r, |
double * R, |
69 |
double * x, |
double * X, |
70 |
dim_t *iter, |
dim_t *iter, |
71 |
double * tolerance, |
double * tolerance, |
72 |
Paso_Performance* pp) { |
Paso_Performance* pp) |
73 |
|
{ |
74 |
|
|
75 |
/* Local variables */ |
double delta,gamma=0.,gamma_old=0.,eta=0.,dp0=0., c=1.0,c_old=1.0,c_ancient=1.,s=0.0,s_old=0.0,s_ancient, norm_of_residual=0., rnorm_prec=1; |
76 |
|
double tol=1., norm_scal=1.; |
77 |
dim_t num_iter=0,maxit; |
const dim_t maxit = *iter; |
78 |
bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; |
double alpha_0,alpha_1,alpha_2,alpha_3,dp = 0.0; |
79 |
err_t status = SOLVER_NO_ERROR; |
dim_t num_iter = 0; |
80 |
dim_t n = Paso_SystemMatrix_getTotalNumRows(A); |
double *Z=NULL, *W=NULL, *AZ=NULL, *R_old=NULL, *R_ancient=NULL, *W_old=NULL, *W_ancient=NULL, *ZNEW=NULL; |
81 |
double *w=NULL, *w1=NULL, *w2=NULL, *r1=NULL, *r2=NULL, *y=NULL, *v=NULL; |
const dim_t n = Paso_SystemMatrix_getTotalNumRows(A); |
82 |
|
bool_t convergeFlag=FALSE; |
83 |
double Anorm,Arnorm,ynorm,oldb,dbar,epsln,phibar,rhs1,rhs2,rnorm,tnorm2,ynorm2,cs,sn,eps,s,alfa,denom,z,beta1,beta; |
err_t status = SOLVER_NO_ERROR; |
84 |
double gmax,gmin,oldeps,delta,gbar,gamma,phi,root,epsx; |
/* |
85 |
|
* |
86 |
double norm_of_residual=0; |
* Start of Calculation : |
87 |
|
* --------------------- |
88 |
/* */ |
* |
89 |
/*-----------------------------------------------------------------*/ |
* */ |
90 |
/* */ |
|
91 |
/* Start of Calculation : */ |
|
92 |
/* --------------------- */ |
/* Test the input parameters. */ |
93 |
/* */ |
if (n < 0 || maxit<=0 ) { |
94 |
/* */ |
status=SOLVER_INPUT_ERROR; |
95 |
w=TMPMEMALLOC(n,double); |
} |
96 |
w1=TMPMEMALLOC(n,double); |
|
97 |
w2=TMPMEMALLOC(n,double); |
ZNEW = TMPMEMALLOC(n,double); |
98 |
r1=TMPMEMALLOC(n,double); |
Z = TMPMEMALLOC(n,double); |
99 |
r2=TMPMEMALLOC(n,double); |
AZ = TMPMEMALLOC(n,double); |
100 |
y=TMPMEMALLOC(n,double); |
W = TMPMEMALLOC(n,double); |
101 |
v=TMPMEMALLOC(n,double); |
R_old = TMPMEMALLOC(n,double); |
102 |
|
W_old = TMPMEMALLOC(n,double); |
103 |
|
R_ancient = TMPMEMALLOC(n,double); |
104 |
if (w ==NULL || w1== NULL || w2== NULL || r1 == NULL || r2== NULL || y==NULL || v==NULL ) { |
W_ancient = TMPMEMALLOC(n,double); |
105 |
status=SOLVER_MEMORY_ERROR; |
|
106 |
} |
if (R_ancient==NULL || Z==NULL || W==NULL || AZ==NULL || R_old==NULL || W_old==NULL || W_ancient==NULL || ZNEW==NULL) { |
107 |
|
status=SOLVER_MEMORY_ERROR; |
108 |
maxit = *iter; |
} |
109 |
|
|
110 |
/* Test the input parameters. */ |
if (status ==SOLVER_NO_ERROR) { |
111 |
if (n < 0 || maxit<=0 ) { |
|
112 |
status=SOLVER_INPUT_ERROR; |
Paso_SystemMatrix_solvePreconditioner(A, Z, R); /* z <- Prec*r */ |
113 |
} |
/* gamma <- r'*z */ |
114 |
|
dp=Paso_InnerProduct(n, R ,Z,A->mpi_info); /* gamma <- r'*z */ |
115 |
Paso_Copy(n,r1,r); |
dp0=dp; |
116 |
|
if (dp<0) { |
117 |
Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); |
status=SOLVER_NEGATIVE_NORM_ERROR; |
118 |
Paso_Solver_solvePreconditioner(A,y,r1); |
} else if (! ABS(dp)>0) { |
119 |
Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); |
convergeFlag = TRUE; /* happy break down */ |
120 |
|
} else { |
121 |
beta1=Paso_InnerProduct(n,r1,y,A->mpi_info); |
gamma = sqrt(dp); /* gamma <- sqrt(r'*z) */ |
122 |
if (beta1<0) { |
eta = gamma; |
123 |
status=SOLVER_NEGATIVE_NORM_ERROR; |
rnorm_prec = gamma; |
124 |
} |
norm_of_residual=Paso_l2(n, R, A->mpi_info); |
125 |
|
norm_scal=rnorm_prec/norm_of_residual; |
126 |
if (beta1>0) { |
tol=(*tolerance)*norm_scal; |
127 |
beta1=sqrt(beta1); |
} |
128 |
} |
} |
129 |
|
while (!(convergeFlag || (status !=SOLVER_NO_ERROR) )) |
130 |
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
{ |
131 |
|
/* z <- z / gamma */ |
132 |
Paso_zeroes(n,w); |
Paso_Scale(n, Z,1./gamma); |
133 |
Paso_zeroes(n,w2); |
|
134 |
|
/* Az <- A*z */ |
135 |
Paso_zeroes(n,x); |
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, Z,PASO_ZERO,AZ); |
136 |
|
|
137 |
Paso_Copy(n,r2,r1); |
/* delta <- Az'.z */ |
138 |
|
delta=Paso_InnerProduct(n,AZ,Z,A->mpi_info); |
139 |
Anorm = 0; |
|
140 |
ynorm = 0; |
/* r_new <- Az-delta/gamma * r - gamma/gamma_old r_old */ |
141 |
oldb = 0; |
if (num_iter>0) Paso_Copy(n, R_ancient, R_old); /* r__ancient <- r_old */ |
142 |
beta = beta1; |
Paso_Copy(n, R_old, R); /* r_old <- r */ |
143 |
dbar = 0; |
Paso_Copy(n, R, AZ); /* r <- Az */ |
144 |
epsln = 0; |
Paso_AXPY(n, R, -delta/gamma, R_old); /* r <- r - delta/gamma v */ |
145 |
phibar = beta1; |
if (num_iter>0) Paso_AXPY(n, R, -gamma/gamma_old, R_ancient); /* r <- r - gamma/gamma_old r__ancient */ |
|
rhs1 = beta1; |
|
|
rhs2 = 0; |
|
|
rnorm = phibar; |
|
|
tnorm2 = 0; |
|
|
ynorm2 = 0; |
|
|
cs = -1; |
|
|
sn = 0; |
|
|
eps = 0.000001; |
|
|
|
|
|
while (!(convergeFlag || (status !=SOLVER_NO_ERROR) )) |
|
|
{ |
|
|
|
|
|
s=1/beta; |
|
|
Paso_Update(n, 0., v, s, y); |
|
|
|
|
|
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
|
|
Performance_startMonitor(pp,PERFORMANCE_MVM); |
|
|
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, v,PASO_ZERO,y); |
|
|
Performance_stopMonitor(pp,PERFORMANCE_MVM); |
|
|
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
|
|
|
|
|
if (num_iter >= 1) { |
|
|
Paso_Update(n, 1., y, -(beta/oldb), r1); |
|
|
} |
|
|
|
|
|
alfa = Paso_InnerProduct(n,v,y,A->mpi_info); |
|
|
Paso_Update(n, 1., y, (-alfa/beta), r2); |
|
|
Paso_Copy(n,r1,r2); |
|
|
Paso_Copy(n,r2,y); |
|
|
|
|
|
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
|
|
Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); |
|
|
Paso_Solver_solvePreconditioner(A,y,r2); |
|
|
Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); |
|
|
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
|
|
|
|
|
oldb = beta; |
|
|
beta = Paso_InnerProduct(n,y,r2,A->mpi_info); |
|
|
if (beta<0) { |
|
|
status=SOLVER_NEGATIVE_NORM_ERROR; |
|
|
} |
|
146 |
|
|
147 |
beta = sqrt( beta ); |
/* z <- prec*r */ |
148 |
tnorm2 = tnorm2 + alfa*alfa + oldb*oldb + beta*beta; |
Paso_SystemMatrix_solvePreconditioner(A, ZNEW, R); |
149 |
|
|
150 |
if (num_iter==0) { |
|
151 |
gmax = ABS(alfa); |
dp=Paso_InnerProduct(n,R,ZNEW,A->mpi_info); |
152 |
gmin = gmax; |
if (dp < 0.) { |
153 |
} |
status=SOLVER_NEGATIVE_NORM_ERROR; |
154 |
|
} else if (ABS(dp) == 0.) { |
155 |
/* Apply previous rotation Qk-1 to get |
convergeFlag = TRUE; /* happy break down */ |
156 |
[deltak epslnk+1] = [cs sn][dbark 0 ] |
} else if (ABS(dp) > 0.e-13 * ABS(dp0) ) { |
157 |
[gbar k dbar k+1] [sn -cs][alfak betak+1]. */ |
/* gamma <- sqrt(r'*z) */ |
158 |
|
gamma_old=gamma; |
159 |
oldeps = epsln; |
gamma = sqrt(dp); |
160 |
delta = cs * dbar + sn * alfa ; |
/* QR factorisation */ |
161 |
gbar = sn * dbar - cs * alfa ; |
|
162 |
epsln = sn * beta ; |
c_ancient = c_old; c_old = c; |
163 |
dbar = - cs * beta; |
s_ancient = s_old; s_old = s; |
164 |
|
|
165 |
root = sqrt(gbar*gbar+dbar*dbar) ; |
alpha_0 = c_old * delta - c_ancient * s_old * gamma_old; |
166 |
Arnorm = phibar*root; |
alpha_1 = sqrt(alpha_0*alpha_0 + gamma*gamma); |
167 |
|
alpha_2 = s_old * delta + c_ancient * c_old * gamma_old; |
168 |
gamma = sqrt(gbar*gbar+beta*beta) ; |
alpha_3 = s_ancient * gamma_old; |
169 |
gamma = MAX(gamma,eps) ; |
|
170 |
cs = gbar / gamma ; |
/* Givens rotation */ |
171 |
sn = beta / gamma ; |
|
172 |
phi = cs * phibar ; |
c = alpha_0 / alpha_1; |
173 |
phibar = sn * phibar ; |
s = gamma / alpha_1; |
174 |
|
|
175 |
/* Update x. */ |
rnorm_prec = rnorm_prec * s; |
176 |
|
|
177 |
denom = 1/gamma ; |
/* w_new <- (z-alpha_3 w - alpha_2 w_old)/alpha_1 */ |
178 |
Paso_Copy(n,w1,w2); |
|
179 |
Paso_Copy(n,w2,w); |
if (num_iter>1) Paso_Copy(n, W_ancient, W_old); /* w__ancient <- w_old */ |
180 |
|
if (num_iter>0) Paso_Copy(n, W_old, W); /* w_old <- w */ |
181 |
Paso_LinearCombination(n,w,denom,v,-(denom*oldeps),w1); |
|
182 |
Paso_Update(n, 1., w, -(delta*denom), w2) ; |
Paso_Copy(n, W, Z); |
183 |
Paso_Update(n, 1., x, phi, w) ; |
if (num_iter>1) Paso_AXPY(n, W,- alpha_3,W_ancient); /* w <- w - alpha_3 w__ancient */ |
184 |
|
if (num_iter>0) Paso_AXPY(n, W,- alpha_2,W_old); /* w <- w - alpha_2 w_old */ |
185 |
/* Go round again. */ |
Paso_Scale(n, W, 1.0 / alpha_1); /* w <- w / alpha_1 */ |
186 |
|
/* */ |
187 |
gmax = MAX(gmax,gamma); |
Paso_AXPY(n, X,c * eta,W); /* x <- x + c eta w */ |
188 |
gmin = MIN(gmin,gamma); |
eta = - s * eta; |
189 |
z = rhs1 / gamma; |
convergeFlag = rnorm_prec <= tol; |
190 |
ynorm2 = z*z + ynorm2; |
} else { |
191 |
rhs1 = rhs2 - delta*z; |
status=SOLVER_BREAKDOWN; |
192 |
rhs2 = - epsln*z; |
} |
193 |
|
Paso_Copy(n, Z, ZNEW); |
194 |
Anorm = sqrt( tnorm2 ) ; |
++(num_iter); |
195 |
ynorm = sqrt( ynorm2 ) ; |
if ( !convergeFlag && (num_iter>=maxit)) status = SOLVER_MAXITER_REACHED; |
196 |
|
} |
197 |
rnorm = phibar; |
TMPMEMFREE(Z); |
198 |
epsx = Anorm*ynorm*eps; |
TMPMEMFREE(ZNEW); |
199 |
|
TMPMEMFREE(AZ); |
200 |
|
TMPMEMFREE(R_old); |
201 |
if (status==SOLVER_NO_ERROR) { |
TMPMEMFREE(R_ancient); |
202 |
maxIterFlag = (num_iter > maxit); |
TMPMEMFREE(W); |
203 |
norm_of_residual=rnorm; |
TMPMEMFREE(W_old); |
204 |
convergeFlag=((norm_of_residual/(Anorm*ynorm))<(*tolerance) || 1+(norm_of_residual/(Anorm*ynorm)) <=1); |
TMPMEMFREE(W_ancient); |
205 |
if (maxIterFlag) { |
|
206 |
status = SOLVER_MAXITER_REACHED; |
*iter=num_iter; |
207 |
} else if (breakFlag) { |
*tolerance=rnorm_prec/norm_scal; |
208 |
status = SOLVER_BREAKDOWN; |
|
209 |
} |
/* End of MINRES */ |
210 |
} |
return status; |
|
++(num_iter); |
|
|
} |
|
|
/* end of iteration */ |
|
|
|
|
|
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
|
|
TMPMEMFREE(w); |
|
|
TMPMEMFREE(w1); |
|
|
TMPMEMFREE(w2); |
|
|
TMPMEMFREE(r1); |
|
|
TMPMEMFREE(r2); |
|
|
TMPMEMFREE(y); |
|
|
TMPMEMFREE(v); |
|
|
|
|
|
*iter=num_iter; |
|
|
*tolerance=norm_of_residual; |
|
|
|
|
|
/* End of MINRES */ |
|
|
return status; |
|
211 |
} |
} |