/[escript]/branches/doubleplusgood/paso/src/BiCGStab.cpp
ViewVC logotype

Contents of /branches/doubleplusgood/paso/src/BiCGStab.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4345 - (show annotations)
Fri Mar 29 07:09:41 2013 UTC (6 years, 8 months ago) by jfenwick
File size: 8338 byte(s)
Spelling fixes
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16
17 /*
18 Crude modifications and translations for Paso by Matt Davies and Lutz Gross
19 */
20
21 #include "Paso.h"
22 #include "SystemMatrix.h"
23 #include "Solver.h"
24 #ifdef _OPENMP
25 #include <omp.h>
26 #endif
27
28 /* -- Iterative template routine --
29 * Univ. of Tennessee and Oak Ridge National Laboratory
30 * October 1, 1993
31 * Details of this algorithm are described in "Templates for the
32 * Solution of Linear Systems: Building Blocks for Iterative
33 * Methods", Barrett, Berry, Chan, Demmel, Donato, Dongarra,
34 * Eijkhout, Pozo, Romine, and van der Vorst, SIAM Publications,
35 * 1993. (ftp netlib2.cs.utk.edu; cd linalg; get templates.ps).
36 *
37 * Purpose
38 * =======
39 *
40 * BICGSTAB solves the linear system A*x = b using the
41 * BiConjugate Gradient Stabilized iterative method with
42 * preconditioning.
43 *
44 * Convergence test: norm( b - A*x )< TOL.
45 * For other measures, see the above reference.
46 *
47 * Arguments
48 * =========
49 *
50 * A (input)
51 *
52 * R (input) DOUBLE PRECISION array, dimension N.
53 * On entry, residual of initial guess X
54 *
55 * X (input/output) DOUBLE PRECISION array, dimension N.
56 * On input, the initial guess.
57 *
58 * ITER (input/output) INT
59 * On input, the maximum iterations to be performed.
60 * On output, actual number of iterations performed.
61 *
62 * RESID (input/output) DOUBLE PRECISION
63 * On input, the allowable convergence measure for
64 * norm( b - A*x )
65 * On output, the final value of this measure.
66 *
67 * return value
68 *
69 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
70 * = SOLVER_MAXITER_REACHED
71 * = SOLVER_INPUT_ERROR Illegal parameter:
72 * = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller
73 * = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller
74 *
75 * ==============================================================
76 */
77
78 err_t Paso_Solver_BiCGStab(
79 Paso_SystemMatrix * A,
80 double * r,
81 double * x,
82 dim_t *iter,
83 double * tolerance,
84 Paso_Performance* pp) {
85
86
87 /* Local variables */
88 double *rtld=NULL,*p=NULL,*v=NULL,*t=NULL,*phat=NULL,*shat=NULL,*s=NULL;/*, *buf1=NULL, *buf0=NULL;*/
89 double beta,norm_of_residual=0,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global=0;
90 double alpha=0, omega=0, omegaNumtr, omegaDenumtr, rho, tol, rho1=0;
91 #ifdef ESYS_MPI
92 double loc_sum[2], sum[2];
93 #endif
94 dim_t num_iter=0,maxit,num_iter_global=0;
95 dim_t i0;
96 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
97 dim_t status = SOLVER_NO_ERROR;
98 double *resid = tolerance;
99 dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
100
101 /* Executable Statements */
102
103 /* allocate memory: */
104 rtld=new double[n];
105 p=new double[n];
106 v=new double[n];
107 t=new double[n];
108 phat=new double[n];
109 shat=new double[n];
110 s=new double[n];
111 /* Test the input parameters. */
112
113 if (n < 0) {
114 status = SOLVER_INPUT_ERROR;
115 } else if (rtld==NULL || p==NULL || v==NULL || t==NULL || phat==NULL || shat==NULL || s==NULL) {
116 status = SOLVER_MEMORY_ERROR;
117 } else {
118
119 /* now bicgstab starts : */
120 maxit = *iter;
121 tol = *resid;
122
123 num_iter =0;
124 convergeFlag=FALSE;
125 maxIterFlag=FALSE;
126 breakFlag=FALSE;
127
128 /* initialise arrays */
129
130 #pragma omp parallel for private(i0) schedule(static)
131 for (i0 = 0; i0 < n; i0++) {
132 rtld[i0]=0;
133 p[i0]=0;
134 v[i0]=0;
135 t[i0]=0;
136 phat[i0]=0;
137 shat[i0]=0;
138 rtld[i0] = r[i0];
139 }
140
141 /* Perform BiConjugate Gradient Stabilized iteration. */
142
143 L10:
144 ++(num_iter);
145 sum_1 = 0;
146 sum_2 = 0;
147 sum_3 = 0;
148 sum_4 = 0;
149 omegaNumtr = 0.0;
150 omegaDenumtr = 0.0;
151 #pragma omp parallel for private(i0) reduction(+:sum_1) schedule(static)
152 for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0];
153 #ifdef ESYS_MPI
154 loc_sum[0] = sum_1;
155 MPI_Allreduce(loc_sum, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
156 #endif
157 rho = sum_1;
158
159 if (! (breakFlag = (ABS(rho) <= TOLERANCE_FOR_SCALARS))) {
160 /* Compute vector P. */
161
162 if (num_iter > 1) {
163 beta = rho / rho1 * (alpha / omega);
164 #pragma omp parallel for private(i0) schedule(static)
165 for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]);
166 } else {
167 #pragma omp parallel for private(i0) schedule(static)
168 for (i0 = 0; i0 < n; i0++) p[i0] = r[i0];
169 }
170
171 /* Compute direction adjusting vector PHAT and scalar ALPHA. */
172
173 Paso_SystemMatrix_solvePreconditioner(A,&phat[0], &p[0]);
174 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, &phat[0],PASO_ZERO, &v[0]);
175
176 #pragma omp parallel for private(i0) reduction(+:sum_2) schedule(static)
177 for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0];
178 #ifdef ESYS_MPI
179 loc_sum[0] = sum_2;
180 MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
181 #endif
182 if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) {
183 alpha = rho / sum_2;
184
185 #pragma omp parallel for private(i0) reduction(+:sum_3) schedule(static)
186 for (i0 = 0; i0 < n; i0++) {
187 r[i0] -= alpha * v[i0];
188 s[i0] = r[i0];
189 sum_3 += s[i0] * s[i0];
190 }
191 #ifdef ESYS_MPI
192 loc_sum[0] = sum_3;
193 MPI_Allreduce(loc_sum, &sum_3, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
194 #endif
195 norm_of_residual = sqrt(sum_3);
196
197 /* Early check for tolerance. */
198 if ( (convergeFlag = (norm_of_residual <= tol)) ) {
199 #pragma omp parallel for private(i0) schedule(static)
200 for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0];
201 maxIterFlag = FALSE;
202 breakFlag = FALSE;
203 } else {
204 /* Compute stabilizer vector SHAT and scalar OMEGA. */
205 Paso_SystemMatrix_solvePreconditioner(A,&shat[0], &s[0]);
206 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, &shat[0],PASO_ZERO,&t[0]);
207
208 #pragma omp parallel for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static)
209 for (i0 = 0; i0 < n; i0++) {
210 omegaNumtr +=t[i0] * s[i0];
211 omegaDenumtr += t[i0] * t[i0];
212 }
213 #ifdef ESYS_MPI
214 loc_sum[0] = omegaNumtr;
215 loc_sum[1] = omegaDenumtr;
216 MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
217 omegaNumtr=sum[0];
218 omegaDenumtr=sum[1];
219 #endif
220 if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) {
221 omega = omegaNumtr / omegaDenumtr;
222
223 #pragma omp parallel for private(i0) reduction(+:sum_4) schedule(static)
224 for (i0 = 0; i0 < n; i0++) {
225 x[i0] += alpha * phat[i0] + omega * shat[i0];
226 r[i0] = s[i0]-omega * t[i0];
227 sum_4 += r[i0] * r[i0];
228 }
229 #ifdef ESYS_MPI
230 loc_sum[0] = sum_4;
231 MPI_Allreduce(loc_sum, &sum_4, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
232 #endif
233 norm_of_residual = sqrt(sum_4);
234 convergeFlag = norm_of_residual <= tol;
235 maxIterFlag = num_iter > maxit;
236 breakFlag = (ABS(omega) <= TOLERANCE_FOR_SCALARS);
237 }
238 }
239 }
240 if (!(convergeFlag || maxIterFlag || breakFlag)) {
241 rho1 = rho;
242 goto L10;
243 }
244 }
245 /* end of iteration */
246 num_iter_global=num_iter;
247 norm_of_residual_global=norm_of_residual;
248 if (maxIterFlag) {
249 status = SOLVER_MAXITER_REACHED;
250 } else if (breakFlag) {
251 status = SOLVER_BREAKDOWN;
252 }
253 }
254 delete[] rtld;
255 delete[] p;
256 delete[] v;
257 delete[] t;
258 delete[] phat;
259 delete[] shat;
260 delete[] s;
261 *iter=num_iter_global;
262 *resid=norm_of_residual_global;
263
264 /* End of BICGSTAB */
265 return status;
266 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26