/[escript]/trunk/paso/src/BiCGStab.c
ViewVC logotype

Annotation of /trunk/paso/src/BiCGStab.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2484 - (hide annotations)
Mon Jun 22 04:22:19 2009 UTC (9 years, 9 months ago) by gross
File MIME type: text/plain
File size: 8297 byte(s)
numarray removed from docu; Locator revised.
1 ksteube 1312
2     /*******************************************************
3 ksteube 1811 *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13 dhawcroft 631
14 ksteube 1811
15 dhawcroft 631 /*
16 jgs 150 Crude modifications and translations for Paso by Matt Davies and Lutz Gross
17     */
18    
19 gross 700 #include "Paso.h"
20     #include "SystemMatrix.h"
21 jgs 150 #include "Solver.h"
22     #ifdef _OPENMP
23     #include <omp.h>
24     #endif
25    
26     /* -- Iterative template routine --
27     * Univ. of Tennessee and Oak Ridge National Laboratory
28     * October 1, 1993
29     * Details of this algorithm are described in "Templates for the
30     * Solution of Linear Systems: Building Blocks for Iterative
31     * Methods", Barrett, Berry, Chan, Demmel, Donato, Dongarra,
32     * Eijkhout, Pozo, Romine, and van der Vorst, SIAM Publications,
33     * 1993. (ftp netlib2.cs.utk.edu; cd linalg; get templates.ps).
34     *
35     * Purpose
36     * =======
37     *
38     * BICGSTAB solves the linear system A*x = b using the
39     * BiConjugate Gradient Stabilized iterative method with
40     * preconditioning.
41     *
42     * Convergence test: norm( b - A*x )< TOL.
43     * For other measures, see the above reference.
44     *
45     * Arguments
46     * =========
47     *
48     * A (input)
49     *
50     * R (input) DOUBLE PRECISION array, dimension N.
51     * On entry, residual of inital guess X
52     *
53     * X (input/output) DOUBLE PRECISION array, dimension N.
54     * On input, the initial guess.
55     *
56     * ITER (input/output) INT
57     * On input, the maximum iterations to be performed.
58     * On output, actual number of iterations performed.
59     *
60     * RESID (input/output) DOUBLE PRECISION
61     * On input, the allowable convergence measure for
62     * norm( b - A*x )
63     * On output, the final value of this measure.
64     *
65     * return value
66     *
67     * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
68     * = SOLVER_MAXITER_REACHED
69     * = SOLVER_INPUT_ERROR Illegal parameter:
70     * = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller
71     * = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller
72     *
73     * ==============================================================
74     */
75    
76     err_t Paso_Solver_BiCGStab(
77     Paso_SystemMatrix * A,
78     double * r,
79     double * x,
80     dim_t *iter,
81 gross 584 double * tolerance,
82     Paso_Performance* pp) {
83 jgs 150
84    
85     /* Local variables */
86 jfenwick 1986 double *rtld=NULL,*p=NULL,*v=NULL,*t=NULL,*phat=NULL,*shat=NULL,*s=NULL;/*, *buf1=NULL, *buf0=NULL;*/
87     double beta,norm_of_residual=0,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global=0;
88     double alpha=0, omega=0, omegaNumtr, omegaDenumtr, rho, tol, rho1=0;
89 phornby 1628 #ifdef PASO_MPI
90     double loc_sum[2], sum[2];
91     #endif
92 jfenwick 1986 dim_t num_iter=0,maxit,num_iter_global=0;
93 ksteube 1312 dim_t i0;
94 jgs 150 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
95     dim_t status = SOLVER_NO_ERROR;
96 gross 1028 double *resid = tolerance;
97 ksteube 1312 dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
98 jgs 150
99     /* Executable Statements */
100    
101     /* allocate memory: */
102     rtld=TMPMEMALLOC(n,double);
103     p=TMPMEMALLOC(n,double);
104     v=TMPMEMALLOC(n,double);
105     t=TMPMEMALLOC(n,double);
106     phat=TMPMEMALLOC(n,double);
107     shat=TMPMEMALLOC(n,double);
108     s=TMPMEMALLOC(n,double);
109     /* Test the input parameters. */
110    
111     if (n < 0) {
112     status = SOLVER_INPUT_ERROR;
113     } else if (rtld==NULL || p==NULL || v==NULL || t==NULL || phat==NULL || shat==NULL || s==NULL) {
114     status = SOLVER_MEMORY_ERROR;
115     } else {
116    
117     /* now bicgstab starts : */
118     maxit = *iter;
119     tol = *resid;
120    
121 gross 1556 num_iter =0;
122     convergeFlag=FALSE;
123     maxIterFlag=FALSE;
124     breakFlag=FALSE;
125 jgs 150
126 gross 1556 /* initialize arrays */
127    
128     #pragma omp parallel for private(i0) schedule(static)
129     for (i0 = 0; i0 < n; i0++) {
130 jgs 150 rtld[i0]=0;
131     p[i0]=0;
132     v[i0]=0;
133     t[i0]=0;
134     phat[i0]=0;
135     shat[i0]=0;
136 gross 1556 rtld[i0] = r[i0];
137     }
138 jgs 150
139 gross 1556 /* Perform BiConjugate Gradient Stabilized iteration. */
140 jgs 150
141     L10:
142     ++(num_iter);
143     sum_1 = 0;
144     sum_2 = 0;
145     sum_3 = 0;
146     sum_4 = 0;
147     omegaNumtr = 0.0;
148 gross 1556 omegaDenumtr = 0.0;
149     #pragma omp parallel for private(i0) reduction(+:sum_1) schedule(static)
150 jgs 150 for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0];
151 gross 1556 #ifdef PASO_MPI
152     loc_sum[0] = sum_1;
153     MPI_Allreduce(loc_sum, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
154 gross 1553 #endif
155 jgs 150 rho = sum_1;
156    
157     if (! (breakFlag = (ABS(rho) <= TOLERANCE_FOR_SCALARS))) {
158     /* Compute vector P. */
159    
160     if (num_iter > 1) {
161     beta = rho / rho1 * (alpha / omega);
162 gross 1556 #pragma omp parallel for private(i0) schedule(static)
163 jgs 150 for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]);
164     } else {
165 gross 1556 #pragma omp parallel for private(i0) schedule(static)
166 jgs 150 for (i0 = 0; i0 < n; i0++) p[i0] = r[i0];
167     }
168    
169     /* Compute direction adjusting vector PHAT and scalar ALPHA. */
170    
171     Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]);
172 jfenwick 1974 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, &phat[0],PASO_ZERO, &v[0]);
173 jgs 150
174 gross 1556 #pragma omp parallel for private(i0) reduction(+:sum_2) schedule(static)
175 jgs 150 for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0];
176 gross 1553 #ifdef PASO_MPI
177 gross 1556 loc_sum[0] = sum_2;
178     MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
179 gross 1553 #endif
180 jgs 150 if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) {
181     alpha = rho / sum_2;
182    
183 gross 1556 #pragma omp parallel for private(i0) reduction(+:sum_3) schedule(static)
184 jgs 150 for (i0 = 0; i0 < n; i0++) {
185     r[i0] -= alpha * v[i0];
186     s[i0] = r[i0];
187     sum_3 += s[i0] * s[i0];
188     }
189 gross 1553 #ifdef PASO_MPI
190 gross 1556 loc_sum[0] = sum_3;
191     MPI_Allreduce(loc_sum, &sum_3, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
192 gross 1553 #endif
193 jgs 150 norm_of_residual = sqrt(sum_3);
194    
195     /* Early check for tolerance. */
196     if ( (convergeFlag = (norm_of_residual <= tol)) ) {
197 gross 1556 #pragma omp parallel for private(i0) schedule(static)
198 jgs 150 for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0];
199     maxIterFlag = FALSE;
200     breakFlag = FALSE;
201     } else {
202     /* Compute stabilizer vector SHAT and scalar OMEGA. */
203     Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]);
204 jfenwick 1974 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, &shat[0],PASO_ZERO,&t[0]);
205 jgs 150
206 gross 1556 #pragma omp parallel for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static)
207 jgs 150 for (i0 = 0; i0 < n; i0++) {
208     omegaNumtr +=t[i0] * s[i0];
209     omegaDenumtr += t[i0] * t[i0];
210     }
211 gross 1553 #ifdef PASO_MPI
212 gross 1556 loc_sum[0] = omegaNumtr;
213     loc_sum[1] = omegaDenumtr;
214     MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
215     omegaNumtr=sum[0];
216     omegaDenumtr=sum[1];
217 gross 1553 #endif
218 jgs 150 if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) {
219     omega = omegaNumtr / omegaDenumtr;
220    
221 gross 1556 #pragma omp parallel for private(i0) reduction(+:sum_4) schedule(static)
222 jgs 150 for (i0 = 0; i0 < n; i0++) {
223     x[i0] += alpha * phat[i0] + omega * shat[i0];
224 artak 1425 r[i0] = s[i0]-omega * t[i0];
225 jgs 150 sum_4 += r[i0] * r[i0];
226     }
227 gross 1553 #ifdef PASO_MPI
228 gross 1556 loc_sum[0] = sum_4;
229     MPI_Allreduce(loc_sum, &sum_4, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
230 gross 1553 #endif
231 jgs 150 norm_of_residual = sqrt(sum_4);
232     convergeFlag = norm_of_residual <= tol;
233 gross 2484 maxIterFlag = num_iter > maxit;
234 jgs 150 breakFlag = (ABS(omega) <= TOLERANCE_FOR_SCALARS);
235     }
236     }
237     }
238     if (!(convergeFlag || maxIterFlag || breakFlag)) {
239     rho1 = rho;
240     goto L10;
241     }
242     }
243     /* end of iteration */
244 gross 1556 num_iter_global=num_iter;
245     norm_of_residual_global=norm_of_residual;
246     if (maxIterFlag) {
247 jgs 150 status = SOLVER_MAXITER_REACHED;
248 gross 1556 } else if (breakFlag) {
249 jgs 150 status = SOLVER_BREAKDOWN;
250     }
251     }
252     TMPMEMFREE(rtld);
253     TMPMEMFREE(p);
254     TMPMEMFREE(v);
255     TMPMEMFREE(t);
256     TMPMEMFREE(phat);
257     TMPMEMFREE(shat);
258     TMPMEMFREE(s);
259     *iter=num_iter_global;
260     *resid=norm_of_residual_global;
261    
262     /* End of BICGSTAB */
263     return status;
264     }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26