/[escript]/trunk/paso/src/BiCGStab.c
ViewVC logotype

Contents of /trunk/paso/src/BiCGStab.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1986 - (show annotations)
Thu Nov 6 23:33:14 2008 UTC (10 years, 9 months ago) by jfenwick
File MIME type: text/plain
File size: 8298 byte(s)
Warning removal
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2008 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /*
16 Crude modifications and translations for Paso by Matt Davies and Lutz Gross
17 */
18
19 #include "Paso.h"
20 #include "SystemMatrix.h"
21 #include "Solver.h"
22 #ifdef _OPENMP
23 #include <omp.h>
24 #endif
25
26 /* -- Iterative template routine --
27 * Univ. of Tennessee and Oak Ridge National Laboratory
28 * October 1, 1993
29 * Details of this algorithm are described in "Templates for the
30 * Solution of Linear Systems: Building Blocks for Iterative
31 * Methods", Barrett, Berry, Chan, Demmel, Donato, Dongarra,
32 * Eijkhout, Pozo, Romine, and van der Vorst, SIAM Publications,
33 * 1993. (ftp netlib2.cs.utk.edu; cd linalg; get templates.ps).
34 *
35 * Purpose
36 * =======
37 *
38 * BICGSTAB solves the linear system A*x = b using the
39 * BiConjugate Gradient Stabilized iterative method with
40 * preconditioning.
41 *
42 * Convergence test: norm( b - A*x )< TOL.
43 * For other measures, see the above reference.
44 *
45 * Arguments
46 * =========
47 *
48 * A (input)
49 *
50 * R (input) DOUBLE PRECISION array, dimension N.
51 * On entry, residual of inital guess X
52 *
53 * X (input/output) DOUBLE PRECISION array, dimension N.
54 * On input, the initial guess.
55 *
56 * ITER (input/output) INT
57 * On input, the maximum iterations to be performed.
58 * On output, actual number of iterations performed.
59 *
60 * RESID (input/output) DOUBLE PRECISION
61 * On input, the allowable convergence measure for
62 * norm( b - A*x )
63 * On output, the final value of this measure.
64 *
65 * return value
66 *
67 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
68 * = SOLVER_MAXITER_REACHED
69 * = SOLVER_INPUT_ERROR Illegal parameter:
70 * = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller
71 * = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller
72 *
73 * ==============================================================
74 */
75
76 err_t Paso_Solver_BiCGStab(
77 Paso_SystemMatrix * A,
78 double * r,
79 double * x,
80 dim_t *iter,
81 double * tolerance,
82 Paso_Performance* pp) {
83
84
85 /* Local variables */
86 double *rtld=NULL,*p=NULL,*v=NULL,*t=NULL,*phat=NULL,*shat=NULL,*s=NULL;/*, *buf1=NULL, *buf0=NULL;*/
87 double beta,norm_of_residual=0,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global=0;
88 double alpha=0, omega=0, omegaNumtr, omegaDenumtr, rho, tol, rho1=0;
89 #ifdef PASO_MPI
90 double loc_sum[2], sum[2];
91 #endif
92 dim_t num_iter=0,maxit,num_iter_global=0;
93 dim_t i0;
94 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
95 dim_t status = SOLVER_NO_ERROR;
96 double *resid = tolerance;
97 dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
98
99 /* Executable Statements */
100
101 /* allocate memory: */
102 rtld=TMPMEMALLOC(n,double);
103 p=TMPMEMALLOC(n,double);
104 v=TMPMEMALLOC(n,double);
105 t=TMPMEMALLOC(n,double);
106 phat=TMPMEMALLOC(n,double);
107 shat=TMPMEMALLOC(n,double);
108 s=TMPMEMALLOC(n,double);
109 /* Test the input parameters. */
110
111 if (n < 0) {
112 status = SOLVER_INPUT_ERROR;
113 } else if (rtld==NULL || p==NULL || v==NULL || t==NULL || phat==NULL || shat==NULL || s==NULL) {
114 status = SOLVER_MEMORY_ERROR;
115 } else {
116
117 /* now bicgstab starts : */
118 maxit = *iter;
119 tol = *resid;
120
121 num_iter =0;
122 convergeFlag=FALSE;
123 maxIterFlag=FALSE;
124 breakFlag=FALSE;
125
126 /* initialize arrays */
127
128 #pragma omp parallel for private(i0) schedule(static)
129 for (i0 = 0; i0 < n; i0++) {
130 rtld[i0]=0;
131 p[i0]=0;
132 v[i0]=0;
133 t[i0]=0;
134 phat[i0]=0;
135 shat[i0]=0;
136 rtld[i0] = r[i0];
137 }
138
139 /* Perform BiConjugate Gradient Stabilized iteration. */
140
141 L10:
142 ++(num_iter);
143 sum_1 = 0;
144 sum_2 = 0;
145 sum_3 = 0;
146 sum_4 = 0;
147 omegaNumtr = 0.0;
148 omegaDenumtr = 0.0;
149 #pragma omp parallel for private(i0) reduction(+:sum_1) schedule(static)
150 for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0];
151 #ifdef PASO_MPI
152 loc_sum[0] = sum_1;
153 MPI_Allreduce(loc_sum, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
154 #endif
155 rho = sum_1;
156
157 if (! (breakFlag = (ABS(rho) <= TOLERANCE_FOR_SCALARS))) {
158 /* Compute vector P. */
159
160 if (num_iter > 1) {
161 beta = rho / rho1 * (alpha / omega);
162 #pragma omp parallel for private(i0) schedule(static)
163 for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]);
164 } else {
165 #pragma omp parallel for private(i0) schedule(static)
166 for (i0 = 0; i0 < n; i0++) p[i0] = r[i0];
167 }
168
169 /* Compute direction adjusting vector PHAT and scalar ALPHA. */
170
171 Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]);
172 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, &phat[0],PASO_ZERO, &v[0]);
173
174 #pragma omp parallel for private(i0) reduction(+:sum_2) schedule(static)
175 for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0];
176 #ifdef PASO_MPI
177 loc_sum[0] = sum_2;
178 MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
179 #endif
180 if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) {
181 alpha = rho / sum_2;
182
183 #pragma omp parallel for private(i0) reduction(+:sum_3) schedule(static)
184 for (i0 = 0; i0 < n; i0++) {
185 r[i0] -= alpha * v[i0];
186 s[i0] = r[i0];
187 sum_3 += s[i0] * s[i0];
188 }
189 #ifdef PASO_MPI
190 loc_sum[0] = sum_3;
191 MPI_Allreduce(loc_sum, &sum_3, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
192 #endif
193 norm_of_residual = sqrt(sum_3);
194
195 /* Early check for tolerance. */
196 if ( (convergeFlag = (norm_of_residual <= tol)) ) {
197 #pragma omp parallel for private(i0) schedule(static)
198 for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0];
199 maxIterFlag = FALSE;
200 breakFlag = FALSE;
201 } else {
202 /* Compute stabilizer vector SHAT and scalar OMEGA. */
203 Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]);
204 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, &shat[0],PASO_ZERO,&t[0]);
205
206 #pragma omp parallel for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static)
207 for (i0 = 0; i0 < n; i0++) {
208 omegaNumtr +=t[i0] * s[i0];
209 omegaDenumtr += t[i0] * t[i0];
210 }
211 #ifdef PASO_MPI
212 loc_sum[0] = omegaNumtr;
213 loc_sum[1] = omegaDenumtr;
214 MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
215 omegaNumtr=sum[0];
216 omegaDenumtr=sum[1];
217 #endif
218 if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) {
219 omega = omegaNumtr / omegaDenumtr;
220
221 #pragma omp parallel for private(i0) reduction(+:sum_4) schedule(static)
222 for (i0 = 0; i0 < n; i0++) {
223 x[i0] += alpha * phat[i0] + omega * shat[i0];
224 r[i0] = s[i0]-omega * t[i0];
225 sum_4 += r[i0] * r[i0];
226 }
227 #ifdef PASO_MPI
228 loc_sum[0] = sum_4;
229 MPI_Allreduce(loc_sum, &sum_4, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
230 #endif
231 norm_of_residual = sqrt(sum_4);
232 convergeFlag = norm_of_residual <= tol;
233 maxIterFlag = num_iter == maxit;
234 breakFlag = (ABS(omega) <= TOLERANCE_FOR_SCALARS);
235 }
236 }
237 }
238 if (!(convergeFlag || maxIterFlag || breakFlag)) {
239 rho1 = rho;
240 goto L10;
241 }
242 }
243 /* end of iteration */
244 num_iter_global=num_iter;
245 norm_of_residual_global=norm_of_residual;
246 if (maxIterFlag) {
247 status = SOLVER_MAXITER_REACHED;
248 } else if (breakFlag) {
249 status = SOLVER_BREAKDOWN;
250 }
251 }
252 TMPMEMFREE(rtld);
253 TMPMEMFREE(p);
254 TMPMEMFREE(v);
255 TMPMEMFREE(t);
256 TMPMEMFREE(phat);
257 TMPMEMFREE(shat);
258 TMPMEMFREE(s);
259 *iter=num_iter_global;
260 *resid=norm_of_residual_global;
261
262 /* End of BICGSTAB */
263 return status;
264 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26