/[escript]/trunk/paso/src/BiCGStab.c
ViewVC logotype

Annotation of /trunk/paso/src/BiCGStab.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1553 - (hide annotations)
Thu May 8 09:38:07 2008 UTC (11 years, 6 months ago) by gross
File MIME type: text/plain
File size: 8960 byte(s)
some small bugs fixed to get MPI going with the modification. MPI version of BiCGStab added.
1 ksteube 1312
2 jgs 150 /* $Id$ */
3    
4 ksteube 1312 /*******************************************************
5     *
6     * Copyright 2003-2007 by ACceSS MNRF
7     * Copyright 2007 by University of Queensland
8     *
9     * http://esscc.uq.edu.au
10     * Primary Business: Queensland, Australia
11     * Licensed under the Open Software License version 3.0
12     * http://www.opensource.org/licenses/osl-3.0.php
13     *
14     *******************************************************/
15 dhawcroft 631
16     /*
17 jgs 150 Crude modifications and translations for Paso by Matt Davies and Lutz Gross
18     */
19    
20 gross 700 #include "Paso.h"
21     #include "SystemMatrix.h"
22 jgs 150 #include "Solver.h"
23     #ifdef _OPENMP
24     #include <omp.h>
25     #endif
26    
27     /* -- Iterative template routine --
28     * Univ. of Tennessee and Oak Ridge National Laboratory
29     * October 1, 1993
30     * Details of this algorithm are described in "Templates for the
31     * Solution of Linear Systems: Building Blocks for Iterative
32     * Methods", Barrett, Berry, Chan, Demmel, Donato, Dongarra,
33     * Eijkhout, Pozo, Romine, and van der Vorst, SIAM Publications,
34     * 1993. (ftp netlib2.cs.utk.edu; cd linalg; get templates.ps).
35     *
36     * Purpose
37     * =======
38     *
39     * BICGSTAB solves the linear system A*x = b using the
40     * BiConjugate Gradient Stabilized iterative method with
41     * preconditioning.
42     *
43     * Convergence test: norm( b - A*x )< TOL.
44     * For other measures, see the above reference.
45     *
46     * Arguments
47     * =========
48     *
49     * A (input)
50     *
51     * R (input) DOUBLE PRECISION array, dimension N.
52     * On entry, residual of inital guess X
53     *
54     * X (input/output) DOUBLE PRECISION array, dimension N.
55     * On input, the initial guess.
56     *
57     * ITER (input/output) INT
58     * On input, the maximum iterations to be performed.
59     * On output, actual number of iterations performed.
60     *
61     * RESID (input/output) DOUBLE PRECISION
62     * On input, the allowable convergence measure for
63     * norm( b - A*x )
64     * On output, the final value of this measure.
65     *
66     * return value
67     *
68     * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
69     * = SOLVER_MAXITER_REACHED
70     * = SOLVER_INPUT_ERROR Illegal parameter:
71     * = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller
72     * = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller
73     *
74     * ==============================================================
75     */
76    
77     err_t Paso_Solver_BiCGStab(
78     Paso_SystemMatrix * A,
79     double * r,
80     double * x,
81     dim_t *iter,
82 gross 584 double * tolerance,
83     Paso_Performance* pp) {
84 jgs 150
85    
86     /* Local variables */
87 ksteube 1312 double *rtld=NULL,*p=NULL,*v=NULL,*t=NULL,*phat=NULL,*shat=NULL,*s=NULL, *buf1=NULL, *buf0=NULL;
88 jgs 150 double beta,norm_of_residual,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global;
89 gross 1553 double alpha, omega, omegaNumtr, omegaDenumtr, rho, tol, rho1, loc_sum[2], sum[2];
90 jgs 150 dim_t num_iter=0,maxit,num_iter_global;
91 ksteube 1312 dim_t i0;
92 jgs 150 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
93     dim_t status = SOLVER_NO_ERROR;
94 gross 1028 double *resid = tolerance;
95 ksteube 1312 dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
96 jgs 150
97     /* Executable Statements */
98    
99     /* allocate memory: */
100     rtld=TMPMEMALLOC(n,double);
101     p=TMPMEMALLOC(n,double);
102     v=TMPMEMALLOC(n,double);
103     t=TMPMEMALLOC(n,double);
104     phat=TMPMEMALLOC(n,double);
105     shat=TMPMEMALLOC(n,double);
106     s=TMPMEMALLOC(n,double);
107     /* Test the input parameters. */
108    
109     if (n < 0) {
110     status = SOLVER_INPUT_ERROR;
111     } else if (rtld==NULL || p==NULL || v==NULL || t==NULL || phat==NULL || shat==NULL || s==NULL) {
112     status = SOLVER_MEMORY_ERROR;
113     } else {
114    
115     /* now bicgstab starts : */
116     maxit = *iter;
117     tol = *resid;
118    
119 gross 929 #pragma omp parallel firstprivate(maxit,tol) \
120     private(rho,omega,num_iter,norm_of_residual,beta,alpha,rho1, convergeFlag,maxIterFlag,breakFlag)
121 jgs 150 {
122     num_iter =0;
123 gross 929 convergeFlag=FALSE;
124     maxIterFlag=FALSE;
125     breakFlag=FALSE;
126 jgs 150
127     /* initialize arrays */
128    
129     #pragma omp for private(i0) schedule(static)
130     for (i0 = 0; i0 < n; i0++) {
131     rtld[i0]=0;
132     p[i0]=0;
133     v[i0]=0;
134     t[i0]=0;
135     phat[i0]=0;
136     shat[i0]=0;
137     }
138     #pragma omp for private(i0) schedule(static)
139     for (i0 = 0; i0 < n; i0++) rtld[i0] = r[i0];
140    
141     /* Perform BiConjugate Gradient Stabilized iteration. */
142    
143     L10:
144     ++(num_iter);
145     #pragma omp barrier
146     #pragma omp master
147     {
148     sum_1 = 0;
149     sum_2 = 0;
150     sum_3 = 0;
151     sum_4 = 0;
152     omegaNumtr = 0.0;
153     omegaDenumtr = 0.0;
154     }
155     #pragma omp barrier
156     #pragma omp for private(i0) reduction(+:sum_1) schedule(static)
157     for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0];
158 gross 1553 #ifdef PASO_MPI
159     #pragma omp master
160     {
161     loc_sum[0] = sum_1;
162     MPI_Allreduce(loc_sum, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
163     }
164     #endif
165 jgs 150 rho = sum_1;
166    
167     if (! (breakFlag = (ABS(rho) <= TOLERANCE_FOR_SCALARS))) {
168     /* Compute vector P. */
169    
170     if (num_iter > 1) {
171     beta = rho / rho1 * (alpha / omega);
172     #pragma omp for private(i0) schedule(static)
173     for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]);
174     } else {
175     #pragma omp for private(i0) schedule(static)
176     for (i0 = 0; i0 < n; i0++) p[i0] = r[i0];
177     }
178    
179     /* Compute direction adjusting vector PHAT and scalar ALPHA. */
180    
181     Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]);
182 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &phat[0],ZERO, &v[0]);
183 jgs 150
184     #pragma omp for private(i0) reduction(+:sum_2) schedule(static)
185     for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0];
186 gross 1553 #ifdef PASO_MPI
187     #pragma omp master
188     {
189     loc_sum[0] = sum_2;
190     MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
191     }
192     #endif
193 jgs 150 if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) {
194     alpha = rho / sum_2;
195    
196     #pragma omp for private(i0) reduction(+:sum_3) schedule(static)
197     for (i0 = 0; i0 < n; i0++) {
198     r[i0] -= alpha * v[i0];
199     s[i0] = r[i0];
200     sum_3 += s[i0] * s[i0];
201     }
202 gross 1553 #ifdef PASO_MPI
203     #pragma omp master
204     {
205     loc_sum[0] = sum_3;
206     MPI_Allreduce(loc_sum, &sum_3, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
207     }
208     #endif
209 jgs 150 norm_of_residual = sqrt(sum_3);
210    
211     /* Early check for tolerance. */
212     if ( (convergeFlag = (norm_of_residual <= tol)) ) {
213     #pragma omp for private(i0) schedule(static)
214     for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0];
215     maxIterFlag = FALSE;
216     breakFlag = FALSE;
217     } else {
218     /* Compute stabilizer vector SHAT and scalar OMEGA. */
219     Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]);
220 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &shat[0],ZERO,&t[0]);
221 jgs 150
222     #pragma omp for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static)
223     for (i0 = 0; i0 < n; i0++) {
224     omegaNumtr +=t[i0] * s[i0];
225     omegaDenumtr += t[i0] * t[i0];
226     }
227 gross 1553 #ifdef PASO_MPI
228     #pragma omp master
229     {
230     loc_sum[0] = omegaNumtr;
231     loc_sum[1] = omegaDenumtr;
232     MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
233     omegaNumtr=sum[0];
234     omegaDenumtr=sum[1];
235     }
236     #endif
237 jgs 150 if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) {
238     omega = omegaNumtr / omegaDenumtr;
239    
240     #pragma omp for private(i0) reduction(+:sum_4) schedule(static)
241     for (i0 = 0; i0 < n; i0++) {
242     x[i0] += alpha * phat[i0] + omega * shat[i0];
243 artak 1425 r[i0] = s[i0]-omega * t[i0];
244 jgs 150 sum_4 += r[i0] * r[i0];
245     }
246 gross 1553 #ifdef PASO_MPI
247     #pragma omp master
248     {
249     loc_sum[0] = sum_4;
250     MPI_Allreduce(loc_sum, &sum_4, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
251     }
252     #endif
253 jgs 150 norm_of_residual = sqrt(sum_4);
254     convergeFlag = norm_of_residual <= tol;
255     maxIterFlag = num_iter == maxit;
256     breakFlag = (ABS(omega) <= TOLERANCE_FOR_SCALARS);
257     }
258     }
259     }
260     if (!(convergeFlag || maxIterFlag || breakFlag)) {
261     rho1 = rho;
262     goto L10;
263     }
264     }
265     /* end of iteration */
266     #pragma omp master
267     {
268     num_iter_global=num_iter;
269     norm_of_residual_global=norm_of_residual;
270     if (maxIterFlag) {
271     status = SOLVER_MAXITER_REACHED;
272     } else if (breakFlag) {
273     status = SOLVER_BREAKDOWN;
274     }
275     }
276     } /* end of parallel region */
277     }
278     TMPMEMFREE(rtld);
279     TMPMEMFREE(p);
280     TMPMEMFREE(v);
281     TMPMEMFREE(t);
282     TMPMEMFREE(phat);
283     TMPMEMFREE(shat);
284     TMPMEMFREE(s);
285     *iter=num_iter_global;
286     *resid=norm_of_residual_global;
287    
288     /* End of BICGSTAB */
289     return status;
290     }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26