1 |
|
2 |
/***************************************************************************** |
3 |
* |
4 |
* Copyright (c) 2003-2013 by University of Queensland |
5 |
* http://www.uq.edu.au |
6 |
* |
7 |
* Primary Business: Queensland, Australia |
8 |
* Licensed under the Open Software License version 3.0 |
9 |
* http://www.opensource.org/licenses/osl-3.0.php |
10 |
* |
11 |
* Development until 2012 by Earth Systems Science Computational Center (ESSCC) |
12 |
* Development since 2012 by School of Earth Sciences |
13 |
* |
14 |
*****************************************************************************/ |
15 |
|
16 |
|
17 |
/* |
18 |
Crude modifications and translations for Paso by Matt Davies and Lutz Gross |
19 |
*/ |
20 |
|
21 |
#include "Paso.h" |
22 |
#include "SystemMatrix.h" |
23 |
#include "Solver.h" |
24 |
#ifdef _OPENMP |
25 |
#include <omp.h> |
26 |
#endif |
27 |
|
28 |
/* -- Iterative template routine -- |
29 |
* Univ. of Tennessee and Oak Ridge National Laboratory |
30 |
* October 1, 1993 |
31 |
* Details of this algorithm are described in "Templates for the |
32 |
* Solution of Linear Systems: Building Blocks for Iterative |
33 |
* Methods", Barrett, Berry, Chan, Demmel, Donato, Dongarra, |
34 |
* Eijkhout, Pozo, Romine, and van der Vorst, SIAM Publications, |
35 |
* 1993. (ftp netlib2.cs.utk.edu; cd linalg; get templates.ps). |
36 |
* |
37 |
* Purpose |
38 |
* ======= |
39 |
* |
40 |
* BICGSTAB solves the linear system A*x = b using the |
41 |
* BiConjugate Gradient Stabilized iterative method with |
42 |
* preconditioning. |
43 |
* |
44 |
* Convergence test: norm( b - A*x )< TOL. |
45 |
* For other measures, see the above reference. |
46 |
* |
47 |
* Arguments |
48 |
* ========= |
49 |
* |
50 |
* A (input) |
51 |
* |
52 |
* R (input) DOUBLE PRECISION array, dimension N. |
53 |
* On entry, residual of initial guess X |
54 |
* |
55 |
* X (input/output) DOUBLE PRECISION array, dimension N. |
56 |
* On input, the initial guess. |
57 |
* |
58 |
* ITER (input/output) INT |
59 |
* On input, the maximum iterations to be performed. |
60 |
* On output, actual number of iterations performed. |
61 |
* |
62 |
* RESID (input/output) DOUBLE PRECISION |
63 |
* On input, the allowable convergence measure for |
64 |
* norm( b - A*x ) |
65 |
* On output, the final value of this measure. |
66 |
* |
67 |
* return value |
68 |
* |
69 |
* = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned. |
70 |
* = SOLVER_MAXITER_REACHED |
71 |
* = SOLVER_INPUT_ERROR Illegal parameter: |
72 |
* = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller |
73 |
* = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller |
74 |
* |
75 |
* ============================================================== |
76 |
*/ |
77 |
|
78 |
err_t Paso_Solver_BiCGStab( |
79 |
Paso_SystemMatrix * A, |
80 |
double * r, |
81 |
double * x, |
82 |
dim_t *iter, |
83 |
double * tolerance, |
84 |
Paso_Performance* pp) { |
85 |
|
86 |
|
87 |
/* Local variables */ |
88 |
double *rtld=NULL,*p=NULL,*v=NULL,*t=NULL,*phat=NULL,*shat=NULL,*s=NULL;/*, *buf1=NULL, *buf0=NULL;*/ |
89 |
double beta,norm_of_residual=0,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global=0; |
90 |
double alpha=0, omega=0, omegaNumtr, omegaDenumtr, rho, tol, rho1=0; |
91 |
#ifdef ESYS_MPI |
92 |
double loc_sum[2], sum[2]; |
93 |
#endif |
94 |
dim_t num_iter=0,maxit,num_iter_global=0; |
95 |
dim_t i0; |
96 |
bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; |
97 |
dim_t status = SOLVER_NO_ERROR; |
98 |
double *resid = tolerance; |
99 |
dim_t n = Paso_SystemMatrix_getTotalNumRows(A); |
100 |
|
101 |
/* Executable Statements */ |
102 |
|
103 |
/* allocate memory: */ |
104 |
rtld=new double[n]; |
105 |
p=new double[n]; |
106 |
v=new double[n]; |
107 |
t=new double[n]; |
108 |
phat=new double[n]; |
109 |
shat=new double[n]; |
110 |
s=new double[n]; |
111 |
/* Test the input parameters. */ |
112 |
|
113 |
if (n < 0) { |
114 |
status = SOLVER_INPUT_ERROR; |
115 |
} else if (rtld==NULL || p==NULL || v==NULL || t==NULL || phat==NULL || shat==NULL || s==NULL) { |
116 |
status = SOLVER_MEMORY_ERROR; |
117 |
} else { |
118 |
|
119 |
/* now bicgstab starts : */ |
120 |
maxit = *iter; |
121 |
tol = *resid; |
122 |
|
123 |
num_iter =0; |
124 |
convergeFlag=FALSE; |
125 |
maxIterFlag=FALSE; |
126 |
breakFlag=FALSE; |
127 |
|
128 |
/* initialise arrays */ |
129 |
|
130 |
#pragma omp parallel for private(i0) schedule(static) |
131 |
for (i0 = 0; i0 < n; i0++) { |
132 |
rtld[i0]=0; |
133 |
p[i0]=0; |
134 |
v[i0]=0; |
135 |
t[i0]=0; |
136 |
phat[i0]=0; |
137 |
shat[i0]=0; |
138 |
rtld[i0] = r[i0]; |
139 |
} |
140 |
|
141 |
/* Perform BiConjugate Gradient Stabilized iteration. */ |
142 |
|
143 |
L10: |
144 |
++(num_iter); |
145 |
sum_1 = 0; |
146 |
sum_2 = 0; |
147 |
sum_3 = 0; |
148 |
sum_4 = 0; |
149 |
omegaNumtr = 0.0; |
150 |
omegaDenumtr = 0.0; |
151 |
#pragma omp parallel for private(i0) reduction(+:sum_1) schedule(static) |
152 |
for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0]; |
153 |
#ifdef ESYS_MPI |
154 |
loc_sum[0] = sum_1; |
155 |
MPI_Allreduce(loc_sum, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm); |
156 |
#endif |
157 |
rho = sum_1; |
158 |
|
159 |
if (! (breakFlag = (ABS(rho) <= TOLERANCE_FOR_SCALARS))) { |
160 |
/* Compute vector P. */ |
161 |
|
162 |
if (num_iter > 1) { |
163 |
beta = rho / rho1 * (alpha / omega); |
164 |
#pragma omp parallel for private(i0) schedule(static) |
165 |
for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]); |
166 |
} else { |
167 |
#pragma omp parallel for private(i0) schedule(static) |
168 |
for (i0 = 0; i0 < n; i0++) p[i0] = r[i0]; |
169 |
} |
170 |
|
171 |
/* Compute direction adjusting vector PHAT and scalar ALPHA. */ |
172 |
|
173 |
Paso_SystemMatrix_solvePreconditioner(A,&phat[0], &p[0]); |
174 |
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, &phat[0],PASO_ZERO, &v[0]); |
175 |
|
176 |
#pragma omp parallel for private(i0) reduction(+:sum_2) schedule(static) |
177 |
for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0]; |
178 |
#ifdef ESYS_MPI |
179 |
loc_sum[0] = sum_2; |
180 |
MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm); |
181 |
#endif |
182 |
if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) { |
183 |
alpha = rho / sum_2; |
184 |
|
185 |
#pragma omp parallel for private(i0) reduction(+:sum_3) schedule(static) |
186 |
for (i0 = 0; i0 < n; i0++) { |
187 |
r[i0] -= alpha * v[i0]; |
188 |
s[i0] = r[i0]; |
189 |
sum_3 += s[i0] * s[i0]; |
190 |
} |
191 |
#ifdef ESYS_MPI |
192 |
loc_sum[0] = sum_3; |
193 |
MPI_Allreduce(loc_sum, &sum_3, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm); |
194 |
#endif |
195 |
norm_of_residual = sqrt(sum_3); |
196 |
|
197 |
/* Early check for tolerance. */ |
198 |
if ( (convergeFlag = (norm_of_residual <= tol)) ) { |
199 |
#pragma omp parallel for private(i0) schedule(static) |
200 |
for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0]; |
201 |
maxIterFlag = FALSE; |
202 |
breakFlag = FALSE; |
203 |
} else { |
204 |
/* Compute stabilizer vector SHAT and scalar OMEGA. */ |
205 |
Paso_SystemMatrix_solvePreconditioner(A,&shat[0], &s[0]); |
206 |
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, &shat[0],PASO_ZERO,&t[0]); |
207 |
|
208 |
#pragma omp parallel for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static) |
209 |
for (i0 = 0; i0 < n; i0++) { |
210 |
omegaNumtr +=t[i0] * s[i0]; |
211 |
omegaDenumtr += t[i0] * t[i0]; |
212 |
} |
213 |
#ifdef ESYS_MPI |
214 |
loc_sum[0] = omegaNumtr; |
215 |
loc_sum[1] = omegaDenumtr; |
216 |
MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm); |
217 |
omegaNumtr=sum[0]; |
218 |
omegaDenumtr=sum[1]; |
219 |
#endif |
220 |
if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) { |
221 |
omega = omegaNumtr / omegaDenumtr; |
222 |
|
223 |
#pragma omp parallel for private(i0) reduction(+:sum_4) schedule(static) |
224 |
for (i0 = 0; i0 < n; i0++) { |
225 |
x[i0] += alpha * phat[i0] + omega * shat[i0]; |
226 |
r[i0] = s[i0]-omega * t[i0]; |
227 |
sum_4 += r[i0] * r[i0]; |
228 |
} |
229 |
#ifdef ESYS_MPI |
230 |
loc_sum[0] = sum_4; |
231 |
MPI_Allreduce(loc_sum, &sum_4, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm); |
232 |
#endif |
233 |
norm_of_residual = sqrt(sum_4); |
234 |
convergeFlag = norm_of_residual <= tol; |
235 |
maxIterFlag = num_iter > maxit; |
236 |
breakFlag = (ABS(omega) <= TOLERANCE_FOR_SCALARS); |
237 |
} |
238 |
} |
239 |
} |
240 |
if (!(convergeFlag || maxIterFlag || breakFlag)) { |
241 |
rho1 = rho; |
242 |
goto L10; |
243 |
} |
244 |
} |
245 |
/* end of iteration */ |
246 |
num_iter_global=num_iter; |
247 |
norm_of_residual_global=norm_of_residual; |
248 |
if (maxIterFlag) { |
249 |
status = SOLVER_MAXITER_REACHED; |
250 |
} else if (breakFlag) { |
251 |
status = SOLVER_BREAKDOWN; |
252 |
} |
253 |
} |
254 |
delete[] rtld; |
255 |
delete[] p; |
256 |
delete[] v; |
257 |
delete[] t; |
258 |
delete[] phat; |
259 |
delete[] shat; |
260 |
delete[] s; |
261 |
*iter=num_iter_global; |
262 |
*resid=norm_of_residual_global; |
263 |
|
264 |
/* End of BICGSTAB */ |
265 |
return status; |
266 |
} |