1 |
jgs |
150 |
/* $Id$ */ |
2 |
|
|
|
3 |
|
|
/* |
4 |
dhawcroft |
631 |
******************************************************************************** |
5 |
dhawcroft |
633 |
* Copyright 2006 by ACcESS MNRF * |
6 |
dhawcroft |
631 |
* * |
7 |
|
|
* http://www.access.edu.au * |
8 |
|
|
* Primary Business: Queensland, Australia * |
9 |
|
|
* Licensed under the Open Software License version 3.0 * |
10 |
|
|
* http://www.opensource.org/licenses/osl-3.0.php * |
11 |
|
|
******************************************************************************** |
12 |
|
|
*/ |
13 |
|
|
|
14 |
|
|
/* |
15 |
jgs |
150 |
Crude modifications and translations for Paso by Matt Davies and Lutz Gross |
16 |
|
|
*/ |
17 |
|
|
|
18 |
gross |
700 |
#include "Paso.h" |
19 |
|
|
#include "SystemMatrix.h" |
20 |
jgs |
150 |
#include "Solver.h" |
21 |
|
|
#ifdef _OPENMP |
22 |
|
|
#include <omp.h> |
23 |
|
|
#endif |
24 |
|
|
|
25 |
|
|
/* -- Iterative template routine -- |
26 |
|
|
* Univ. of Tennessee and Oak Ridge National Laboratory |
27 |
|
|
* October 1, 1993 |
28 |
|
|
* Details of this algorithm are described in "Templates for the |
29 |
|
|
* Solution of Linear Systems: Building Blocks for Iterative |
30 |
|
|
* Methods", Barrett, Berry, Chan, Demmel, Donato, Dongarra, |
31 |
|
|
* Eijkhout, Pozo, Romine, and van der Vorst, SIAM Publications, |
32 |
|
|
* 1993. (ftp netlib2.cs.utk.edu; cd linalg; get templates.ps). |
33 |
|
|
* |
34 |
|
|
* Purpose |
35 |
|
|
* ======= |
36 |
|
|
* |
37 |
|
|
* BICGSTAB solves the linear system A*x = b using the |
38 |
|
|
* BiConjugate Gradient Stabilized iterative method with |
39 |
|
|
* preconditioning. |
40 |
|
|
* |
41 |
|
|
* Convergence test: norm( b - A*x )< TOL. |
42 |
|
|
* For other measures, see the above reference. |
43 |
|
|
* |
44 |
|
|
* Arguments |
45 |
|
|
* ========= |
46 |
|
|
* |
47 |
|
|
* A (input) |
48 |
|
|
* |
49 |
|
|
* R (input) DOUBLE PRECISION array, dimension N. |
50 |
|
|
* On entry, residual of inital guess X |
51 |
|
|
* |
52 |
|
|
* X (input/output) DOUBLE PRECISION array, dimension N. |
53 |
|
|
* On input, the initial guess. |
54 |
|
|
* |
55 |
|
|
* ITER (input/output) INT |
56 |
|
|
* On input, the maximum iterations to be performed. |
57 |
|
|
* On output, actual number of iterations performed. |
58 |
|
|
* |
59 |
|
|
* RESID (input/output) DOUBLE PRECISION |
60 |
|
|
* On input, the allowable convergence measure for |
61 |
|
|
* norm( b - A*x ) |
62 |
|
|
* On output, the final value of this measure. |
63 |
|
|
* |
64 |
|
|
* return value |
65 |
|
|
* |
66 |
|
|
* = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned. |
67 |
|
|
* = SOLVER_MAXITER_REACHED |
68 |
|
|
* = SOLVER_INPUT_ERROR Illegal parameter: |
69 |
|
|
* = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller |
70 |
|
|
* = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller |
71 |
|
|
* |
72 |
|
|
* ============================================================== |
73 |
|
|
*/ |
74 |
|
|
|
75 |
|
|
err_t Paso_Solver_BiCGStab( |
76 |
|
|
Paso_SystemMatrix * A, |
77 |
|
|
double * r, |
78 |
|
|
double * x, |
79 |
|
|
dim_t *iter, |
80 |
gross |
584 |
double * tolerance, |
81 |
|
|
Paso_Performance* pp) { |
82 |
jgs |
150 |
|
83 |
|
|
|
84 |
|
|
/* Local variables */ |
85 |
|
|
double *rtld=NULL,*p=NULL,*v=NULL,*t=NULL,*phat=NULL,*shat=NULL,*s=NULL; |
86 |
|
|
double beta,norm_of_residual,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global; |
87 |
|
|
double alpha, omega, omegaNumtr, omegaDenumtr, rho, tol, rho1; |
88 |
|
|
dim_t num_iter=0,maxit,num_iter_global; |
89 |
|
|
dim_t i0; |
90 |
|
|
bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; |
91 |
|
|
dim_t status = SOLVER_NO_ERROR; |
92 |
|
|
|
93 |
|
|
/* adapt original routine parameters */ |
94 |
|
|
dim_t n = A->num_cols * A-> col_block_size;; |
95 |
|
|
double * resid = tolerance; |
96 |
|
|
|
97 |
|
|
/* Executable Statements */ |
98 |
|
|
|
99 |
|
|
/* allocate memory: */ |
100 |
|
|
rtld=TMPMEMALLOC(n,double); |
101 |
|
|
p=TMPMEMALLOC(n,double); |
102 |
|
|
v=TMPMEMALLOC(n,double); |
103 |
|
|
t=TMPMEMALLOC(n,double); |
104 |
|
|
phat=TMPMEMALLOC(n,double); |
105 |
|
|
shat=TMPMEMALLOC(n,double); |
106 |
|
|
s=TMPMEMALLOC(n,double); |
107 |
|
|
|
108 |
|
|
/* Test the input parameters. */ |
109 |
|
|
|
110 |
|
|
if (n < 0) { |
111 |
|
|
status = SOLVER_INPUT_ERROR; |
112 |
|
|
} else if (rtld==NULL || p==NULL || v==NULL || t==NULL || phat==NULL || shat==NULL || s==NULL) { |
113 |
|
|
status = SOLVER_MEMORY_ERROR; |
114 |
|
|
} else { |
115 |
|
|
|
116 |
|
|
/* now bicgstab starts : */ |
117 |
|
|
maxit = *iter; |
118 |
|
|
tol = *resid; |
119 |
|
|
|
120 |
gross |
929 |
#pragma omp parallel firstprivate(maxit,tol) \ |
121 |
|
|
private(rho,omega,num_iter,norm_of_residual,beta,alpha,rho1, convergeFlag,maxIterFlag,breakFlag) |
122 |
jgs |
150 |
{ |
123 |
|
|
num_iter =0; |
124 |
gross |
929 |
convergeFlag=FALSE; |
125 |
|
|
maxIterFlag=FALSE; |
126 |
|
|
breakFlag=FALSE; |
127 |
jgs |
150 |
|
128 |
|
|
/* initialize arrays */ |
129 |
|
|
|
130 |
|
|
#pragma omp for private(i0) schedule(static) |
131 |
|
|
for (i0 = 0; i0 < n; i0++) { |
132 |
|
|
rtld[i0]=0; |
133 |
|
|
p[i0]=0; |
134 |
|
|
v[i0]=0; |
135 |
|
|
t[i0]=0; |
136 |
|
|
phat[i0]=0; |
137 |
|
|
shat[i0]=0; |
138 |
|
|
} |
139 |
|
|
#pragma omp for private(i0) schedule(static) |
140 |
|
|
for (i0 = 0; i0 < n; i0++) rtld[i0] = r[i0]; |
141 |
|
|
|
142 |
|
|
/* Perform BiConjugate Gradient Stabilized iteration. */ |
143 |
|
|
|
144 |
|
|
L10: |
145 |
|
|
++(num_iter); |
146 |
|
|
#pragma omp barrier |
147 |
|
|
#pragma omp master |
148 |
|
|
{ |
149 |
|
|
sum_1 = 0; |
150 |
|
|
sum_2 = 0; |
151 |
|
|
sum_3 = 0; |
152 |
|
|
sum_4 = 0; |
153 |
|
|
omegaNumtr = 0.0; |
154 |
|
|
omegaDenumtr = 0.0; |
155 |
|
|
} |
156 |
|
|
#pragma omp barrier |
157 |
|
|
#pragma omp for private(i0) reduction(+:sum_1) schedule(static) |
158 |
|
|
for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0]; |
159 |
|
|
rho = sum_1; |
160 |
|
|
|
161 |
|
|
if (! (breakFlag = (ABS(rho) <= TOLERANCE_FOR_SCALARS))) { |
162 |
|
|
/* Compute vector P. */ |
163 |
|
|
|
164 |
|
|
if (num_iter > 1) { |
165 |
|
|
beta = rho / rho1 * (alpha / omega); |
166 |
|
|
#pragma omp for private(i0) schedule(static) |
167 |
|
|
for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]); |
168 |
|
|
} else { |
169 |
|
|
#pragma omp for private(i0) schedule(static) |
170 |
|
|
for (i0 = 0; i0 < n; i0++) p[i0] = r[i0]; |
171 |
|
|
} |
172 |
|
|
|
173 |
|
|
/* Compute direction adjusting vector PHAT and scalar ALPHA. */ |
174 |
|
|
|
175 |
|
|
Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]); |
176 |
gross |
415 |
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &phat[0],ZERO, &v[0]); |
177 |
jgs |
150 |
|
178 |
|
|
#pragma omp for private(i0) reduction(+:sum_2) schedule(static) |
179 |
|
|
for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0]; |
180 |
|
|
if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) { |
181 |
|
|
alpha = rho / sum_2; |
182 |
|
|
|
183 |
|
|
#pragma omp for private(i0) reduction(+:sum_3) schedule(static) |
184 |
|
|
for (i0 = 0; i0 < n; i0++) { |
185 |
|
|
r[i0] -= alpha * v[i0]; |
186 |
|
|
s[i0] = r[i0]; |
187 |
|
|
sum_3 += s[i0] * s[i0]; |
188 |
|
|
} |
189 |
|
|
norm_of_residual = sqrt(sum_3); |
190 |
|
|
|
191 |
|
|
/* Early check for tolerance. */ |
192 |
|
|
if ( (convergeFlag = (norm_of_residual <= tol)) ) { |
193 |
|
|
#pragma omp for private(i0) schedule(static) |
194 |
|
|
for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0]; |
195 |
|
|
maxIterFlag = FALSE; |
196 |
|
|
breakFlag = FALSE; |
197 |
|
|
} else { |
198 |
|
|
/* Compute stabilizer vector SHAT and scalar OMEGA. */ |
199 |
|
|
Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]); |
200 |
gross |
415 |
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &shat[0],ZERO,&t[0]); |
201 |
jgs |
150 |
|
202 |
|
|
#pragma omp for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static) |
203 |
|
|
for (i0 = 0; i0 < n; i0++) { |
204 |
|
|
omegaNumtr +=t[i0] * s[i0]; |
205 |
|
|
omegaDenumtr += t[i0] * t[i0]; |
206 |
|
|
} |
207 |
|
|
if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) { |
208 |
|
|
omega = omegaNumtr / omegaDenumtr; |
209 |
|
|
|
210 |
|
|
#pragma omp for private(i0) reduction(+:sum_4) schedule(static) |
211 |
|
|
for (i0 = 0; i0 < n; i0++) { |
212 |
|
|
x[i0] += alpha * phat[i0] + omega * shat[i0]; |
213 |
|
|
r[i0] -= omega * t[i0]; |
214 |
|
|
sum_4 += r[i0] * r[i0]; |
215 |
|
|
} |
216 |
|
|
norm_of_residual = sqrt(sum_4); |
217 |
|
|
convergeFlag = norm_of_residual <= tol; |
218 |
|
|
maxIterFlag = num_iter == maxit; |
219 |
|
|
breakFlag = (ABS(omega) <= TOLERANCE_FOR_SCALARS); |
220 |
|
|
} |
221 |
|
|
} |
222 |
|
|
} |
223 |
|
|
if (!(convergeFlag || maxIterFlag || breakFlag)) { |
224 |
|
|
rho1 = rho; |
225 |
|
|
goto L10; |
226 |
|
|
} |
227 |
|
|
} |
228 |
|
|
/* end of iteration */ |
229 |
|
|
#pragma omp master |
230 |
|
|
{ |
231 |
|
|
num_iter_global=num_iter; |
232 |
|
|
norm_of_residual_global=norm_of_residual; |
233 |
|
|
if (maxIterFlag) { |
234 |
|
|
status = SOLVER_MAXITER_REACHED; |
235 |
|
|
} else if (breakFlag) { |
236 |
|
|
status = SOLVER_BREAKDOWN; |
237 |
|
|
} |
238 |
|
|
} |
239 |
|
|
} /* end of parallel region */ |
240 |
|
|
} |
241 |
|
|
TMPMEMFREE(rtld); |
242 |
|
|
TMPMEMFREE(p); |
243 |
|
|
TMPMEMFREE(v); |
244 |
|
|
TMPMEMFREE(t); |
245 |
|
|
TMPMEMFREE(phat); |
246 |
|
|
TMPMEMFREE(shat); |
247 |
|
|
TMPMEMFREE(s); |
248 |
|
|
*iter=num_iter_global; |
249 |
|
|
*resid=norm_of_residual_global; |
250 |
|
|
|
251 |
|
|
/* End of BICGSTAB */ |
252 |
|
|
return status; |
253 |
|
|
} |
254 |
|
|
/* |
255 |
|
|
* $Log$ |
256 |
|
|
* Revision 1.2 2005/09/15 03:44:40 jgs |
257 |
|
|
* Merge of development branch dev-02 back to main trunk on 2005-09-15 |
258 |
|
|
* |
259 |
|
|
* Revision 1.1.2.1 2005/09/05 06:29:49 gross |
260 |
|
|
* These files have been extracted from finley to define a stand alone libray for iterative |
261 |
|
|
* linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but |
262 |
|
|
* has not been tested yet. |
263 |
|
|
* |
264 |
|
|
* |
265 |
|
|
*/ |