/[escript]/trunk-mpi-branch/paso/src/BiCGStab.c
ViewVC logotype

Contents of /trunk-mpi-branch/paso/src/BiCGStab.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1087 - (show annotations)
Thu Apr 12 10:01:47 2007 UTC (12 years, 2 months ago) by gross
File MIME type: text/plain
File size: 8374 byte(s)
the MPI version of PASO.PCG is running. 
There is a bug in the rectangular mesh generators but they need to be
revised in any case to clean up the code.


1 /* $Id$ */
2
3 /*
4 ********************************************************************************
5 * Copyright 2006 by ACcESS MNRF *
6 * *
7 * http://www.access.edu.au *
8 * Primary Business: Queensland, Australia *
9 * Licensed under the Open Software License version 3.0 *
10 * http://www.opensource.org/licenses/osl-3.0.php *
11 ********************************************************************************
12 */
13
14 /*
15 Crude modifications and translations for Paso by Matt Davies and Lutz Gross
16 */
17
18 #include "Paso.h"
19 #include "SystemMatrix.h"
20 #include "Solver.h"
21 #ifdef _OPENMP
22 #include <omp.h>
23 #endif
24
25 /* -- Iterative template routine --
26 * Univ. of Tennessee and Oak Ridge National Laboratory
27 * October 1, 1993
28 * Details of this algorithm are described in "Templates for the
29 * Solution of Linear Systems: Building Blocks for Iterative
30 * Methods", Barrett, Berry, Chan, Demmel, Donato, Dongarra,
31 * Eijkhout, Pozo, Romine, and van der Vorst, SIAM Publications,
32 * 1993. (ftp netlib2.cs.utk.edu; cd linalg; get templates.ps).
33 *
34 * Purpose
35 * =======
36 *
37 * BICGSTAB solves the linear system A*x = b using the
38 * BiConjugate Gradient Stabilized iterative method with
39 * preconditioning.
40 *
41 * Convergence test: norm( b - A*x )< TOL.
42 * For other measures, see the above reference.
43 *
44 * Arguments
45 * =========
46 *
47 * A (input)
48 *
49 * R (input) DOUBLE PRECISION array, dimension N.
50 * On entry, residual of inital guess X
51 *
52 * X (input/output) DOUBLE PRECISION array, dimension N.
53 * On input, the initial guess.
54 *
55 * ITER (input/output) INT
56 * On input, the maximum iterations to be performed.
57 * On output, actual number of iterations performed.
58 *
59 * RESID (input/output) DOUBLE PRECISION
60 * On input, the allowable convergence measure for
61 * norm( b - A*x )
62 * On output, the final value of this measure.
63 *
64 * return value
65 *
66 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
67 * = SOLVER_MAXITER_REACHED
68 * = SOLVER_INPUT_ERROR Illegal parameter:
69 * = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller
70 * = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller
71 *
72 * ==============================================================
73 */
74
75 err_t Paso_Solver_BiCGStab(
76 Paso_SystemMatrix * A,
77 double * r,
78 double * x,
79 dim_t *iter,
80 double * tolerance,
81 double *buffer0, double *buffer1,
82 Paso_Performance* pp) {
83
84
85 /* Local variables */
86 double *rtld=NULL,*p=NULL,*v=NULL,*t=NULL,*phat=NULL,*shat=NULL,*s=NULL, *buf1=NULL, *buf0=NULL;
87 double beta,norm_of_residual,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global;
88 double alpha, omega, omegaNumtr, omegaDenumtr, rho, tol, rho1;
89 dim_t num_iter=0,maxit,num_iter_global;
90 dim_t i0;
91 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
92 dim_t status = SOLVER_NO_ERROR;
93
94 /* adapt original routine parameters */
95 dim_t l = A->maxNumCols * A-> col_block_size;;
96 dim_t n = A->myNumCols * A-> col_block_size;;
97 double * resid = tolerance;
98
99 /* Executable Statements */
100
101 /* allocate memory: */
102 rtld=TMPMEMALLOC(l,double);
103 p=TMPMEMALLOC(l,double);
104 v=TMPMEMALLOC(l,double);
105 t=TMPMEMALLOC(l,double);
106 phat=TMPMEMALLOC(l,double);
107 shat=TMPMEMALLOC(l,double);
108 s=TMPMEMALLOC(l,double);
109 /* Test the input parameters. */
110
111 if (n < 0) {
112 status = SOLVER_INPUT_ERROR;
113 } else if (rtld==NULL || p==NULL || v==NULL || t==NULL || phat==NULL || shat==NULL || s==NULL) {
114 status = SOLVER_MEMORY_ERROR;
115 } else {
116
117 /* now bicgstab starts : */
118 maxit = *iter;
119 tol = *resid;
120
121 #pragma omp parallel firstprivate(maxit,tol) \
122 private(rho,omega,num_iter,norm_of_residual,beta,alpha,rho1, convergeFlag,maxIterFlag,breakFlag)
123 {
124 num_iter =0;
125 convergeFlag=FALSE;
126 maxIterFlag=FALSE;
127 breakFlag=FALSE;
128
129 /* initialize arrays */
130
131 #pragma omp for private(i0) schedule(static)
132 for (i0 = 0; i0 < n; i0++) {
133 rtld[i0]=0;
134 p[i0]=0;
135 v[i0]=0;
136 t[i0]=0;
137 phat[i0]=0;
138 shat[i0]=0;
139 }
140 #pragma omp for private(i0) schedule(static)
141 for (i0 = 0; i0 < n; i0++) rtld[i0] = r[i0];
142
143 /* Perform BiConjugate Gradient Stabilized iteration. */
144
145 L10:
146 ++(num_iter);
147 #pragma omp barrier
148 #pragma omp master
149 {
150 sum_1 = 0;
151 sum_2 = 0;
152 sum_3 = 0;
153 sum_4 = 0;
154 omegaNumtr = 0.0;
155 omegaDenumtr = 0.0;
156 }
157 #pragma omp barrier
158 #pragma omp for private(i0) reduction(+:sum_1) schedule(static)
159 for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0];
160 rho = sum_1;
161
162 if (! (breakFlag = (ABS(rho) <= TOLERANCE_FOR_SCALARS))) {
163 /* Compute vector P. */
164
165 if (num_iter > 1) {
166 beta = rho / rho1 * (alpha / omega);
167 #pragma omp for private(i0) schedule(static)
168 for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]);
169 } else {
170 #pragma omp for private(i0) schedule(static)
171 for (i0 = 0; i0 < n; i0++) p[i0] = r[i0];
172 }
173
174 /* Compute direction adjusting vector PHAT and scalar ALPHA. */
175
176 Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]);
177 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &phat[0],ZERO, &v[0], buffer0, buffer1);
178
179 #pragma omp for private(i0) reduction(+:sum_2) schedule(static)
180 for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0];
181 if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) {
182 alpha = rho / sum_2;
183
184 #pragma omp for private(i0) reduction(+:sum_3) schedule(static)
185 for (i0 = 0; i0 < n; i0++) {
186 r[i0] -= alpha * v[i0];
187 s[i0] = r[i0];
188 sum_3 += s[i0] * s[i0];
189 }
190 norm_of_residual = sqrt(sum_3);
191
192 /* Early check for tolerance. */
193 if ( (convergeFlag = (norm_of_residual <= tol)) ) {
194 #pragma omp for private(i0) schedule(static)
195 for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0];
196 maxIterFlag = FALSE;
197 breakFlag = FALSE;
198 } else {
199 /* Compute stabilizer vector SHAT and scalar OMEGA. */
200 Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]);
201 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &shat[0],ZERO,&t[0], buffer0, buffer1);
202
203 #pragma omp for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static)
204 for (i0 = 0; i0 < n; i0++) {
205 omegaNumtr +=t[i0] * s[i0];
206 omegaDenumtr += t[i0] * t[i0];
207 }
208 if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) {
209 omega = omegaNumtr / omegaDenumtr;
210
211 #pragma omp for private(i0) reduction(+:sum_4) schedule(static)
212 for (i0 = 0; i0 < n; i0++) {
213 x[i0] += alpha * phat[i0] + omega * shat[i0];
214 r[i0] -= omega * t[i0];
215 sum_4 += r[i0] * r[i0];
216 }
217 norm_of_residual = sqrt(sum_4);
218 convergeFlag = norm_of_residual <= tol;
219 maxIterFlag = num_iter == maxit;
220 breakFlag = (ABS(omega) <= TOLERANCE_FOR_SCALARS);
221 }
222 }
223 }
224 if (!(convergeFlag || maxIterFlag || breakFlag)) {
225 rho1 = rho;
226 goto L10;
227 }
228 }
229 /* end of iteration */
230 #pragma omp master
231 {
232 num_iter_global=num_iter;
233 norm_of_residual_global=norm_of_residual;
234 if (maxIterFlag) {
235 status = SOLVER_MAXITER_REACHED;
236 } else if (breakFlag) {
237 status = SOLVER_BREAKDOWN;
238 }
239 }
240 } /* end of parallel region */
241 }
242 TMPMEMFREE(rtld);
243 TMPMEMFREE(p);
244 TMPMEMFREE(v);
245 TMPMEMFREE(t);
246 TMPMEMFREE(phat);
247 TMPMEMFREE(shat);
248 TMPMEMFREE(s);
249 *iter=num_iter_global;
250 *resid=norm_of_residual_global;
251
252 /* End of BICGSTAB */
253 return status;
254 }
255 /*
256 * $Log$
257 * Revision 1.2 2005/09/15 03:44:40 jgs
258 * Merge of development branch dev-02 back to main trunk on 2005-09-15
259 *
260 * Revision 1.1.2.1 2005/09/05 06:29:49 gross
261 * These files have been extracted from finley to define a stand alone libray for iterative
262 * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
263 * has not been tested yet.
264 *
265 *
266 */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26