/[escript]/branches/windows_from_1456_trunk_1574_merged_in/paso/src/BiCGStab.c
ViewVC logotype

Contents of /branches/windows_from_1456_trunk_1574_merged_in/paso/src/BiCGStab.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1580 - (show annotations)
Tue Jun 3 14:03:40 2008 UTC (11 years, 4 months ago) by phornby
File MIME type: text/plain
File size: 8296 byte(s)
This once again compiles and links under windows after merging to trunk version 1574. scons run_tests also passes now. py_tests still fails in the same way.  Had to link against winsock2 to get gethostname(), and had to re-do some work on eliminating unused vars. Also eliminated signed/unsigned comparisons where I saw them.
1
2 /* $Id$ */
3
4 /*******************************************************
5 *
6 * Copyright 2003-2007 by ACceSS MNRF
7 * Copyright 2007 by University of Queensland
8 *
9 * http://esscc.uq.edu.au
10 * Primary Business: Queensland, Australia
11 * Licensed under the Open Software License version 3.0
12 * http://www.opensource.org/licenses/osl-3.0.php
13 *
14 *******************************************************/
15
16 /*
17 Crude modifications and translations for Paso by Matt Davies and Lutz Gross
18 */
19
20 #include "Paso.h"
21 #include "SystemMatrix.h"
22 #include "Solver.h"
23 #ifdef _OPENMP
24 #include <omp.h>
25 #endif
26
27 /* -- Iterative template routine --
28 * Univ. of Tennessee and Oak Ridge National Laboratory
29 * October 1, 1993
30 * Details of this algorithm are described in "Templates for the
31 * Solution of Linear Systems: Building Blocks for Iterative
32 * Methods", Barrett, Berry, Chan, Demmel, Donato, Dongarra,
33 * Eijkhout, Pozo, Romine, and van der Vorst, SIAM Publications,
34 * 1993. (ftp netlib2.cs.utk.edu; cd linalg; get templates.ps).
35 *
36 * Purpose
37 * =======
38 *
39 * BICGSTAB solves the linear system A*x = b using the
40 * BiConjugate Gradient Stabilized iterative method with
41 * preconditioning.
42 *
43 * Convergence test: norm( b - A*x )< TOL.
44 * For other measures, see the above reference.
45 *
46 * Arguments
47 * =========
48 *
49 * A (input)
50 *
51 * R (input) DOUBLE PRECISION array, dimension N.
52 * On entry, residual of inital guess X
53 *
54 * X (input/output) DOUBLE PRECISION array, dimension N.
55 * On input, the initial guess.
56 *
57 * ITER (input/output) INT
58 * On input, the maximum iterations to be performed.
59 * On output, actual number of iterations performed.
60 *
61 * RESID (input/output) DOUBLE PRECISION
62 * On input, the allowable convergence measure for
63 * norm( b - A*x )
64 * On output, the final value of this measure.
65 *
66 * return value
67 *
68 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
69 * = SOLVER_MAXITER_REACHED
70 * = SOLVER_INPUT_ERROR Illegal parameter:
71 * = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller
72 * = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller
73 *
74 * ==============================================================
75 */
76
77 err_t Paso_Solver_BiCGStab(
78 Paso_SystemMatrix * A,
79 double * r,
80 double * x,
81 dim_t *iter,
82 double * tolerance,
83 Paso_Performance* pp) {
84
85
86 /* Local variables */
87 double *rtld=NULL,*p=NULL,*v=NULL,*t=NULL,*phat=NULL,*shat=NULL,*s=NULL, *buf1=NULL, *buf0=NULL;
88 double beta,norm_of_residual,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global;
89 double alpha, omega, omegaNumtr, omegaDenumtr, rho, tol, rho1;
90 #ifdef PASO_MPI
91 double loc_sum[2], sum[2];
92 #endif
93 dim_t num_iter=0,maxit,num_iter_global;
94 dim_t i0;
95 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
96 dim_t status = SOLVER_NO_ERROR;
97 double *resid = tolerance;
98 dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
99
100 /* Executable Statements */
101
102 /* allocate memory: */
103 rtld=TMPMEMALLOC(n,double);
104 p=TMPMEMALLOC(n,double);
105 v=TMPMEMALLOC(n,double);
106 t=TMPMEMALLOC(n,double);
107 phat=TMPMEMALLOC(n,double);
108 shat=TMPMEMALLOC(n,double);
109 s=TMPMEMALLOC(n,double);
110 /* Test the input parameters. */
111
112 if (n < 0) {
113 status = SOLVER_INPUT_ERROR;
114 } else if (rtld==NULL || p==NULL || v==NULL || t==NULL || phat==NULL || shat==NULL || s==NULL) {
115 status = SOLVER_MEMORY_ERROR;
116 } else {
117
118 /* now bicgstab starts : */
119 maxit = *iter;
120 tol = *resid;
121
122 num_iter =0;
123 convergeFlag=FALSE;
124 maxIterFlag=FALSE;
125 breakFlag=FALSE;
126
127 /* initialize arrays */
128
129 #pragma omp parallel for private(i0) schedule(static)
130 for (i0 = 0; i0 < n; i0++) {
131 rtld[i0]=0;
132 p[i0]=0;
133 v[i0]=0;
134 t[i0]=0;
135 phat[i0]=0;
136 shat[i0]=0;
137 rtld[i0] = r[i0];
138 }
139
140 /* Perform BiConjugate Gradient Stabilized iteration. */
141
142 L10:
143 ++(num_iter);
144 sum_1 = 0;
145 sum_2 = 0;
146 sum_3 = 0;
147 sum_4 = 0;
148 omegaNumtr = 0.0;
149 omegaDenumtr = 0.0;
150 #pragma omp parallel for private(i0) reduction(+:sum_1) schedule(static)
151 for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0];
152 #ifdef PASO_MPI
153 loc_sum[0] = sum_1;
154 MPI_Allreduce(loc_sum, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
155 #endif
156 rho = sum_1;
157
158 if (! (breakFlag = (ABS(rho) <= TOLERANCE_FOR_SCALARS))) {
159 /* Compute vector P. */
160
161 if (num_iter > 1) {
162 beta = rho / rho1 * (alpha / omega);
163 #pragma omp parallel for private(i0) schedule(static)
164 for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]);
165 } else {
166 #pragma omp parallel for private(i0) schedule(static)
167 for (i0 = 0; i0 < n; i0++) p[i0] = r[i0];
168 }
169
170 /* Compute direction adjusting vector PHAT and scalar ALPHA. */
171
172 Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]);
173 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &phat[0],ZERO, &v[0]);
174
175 #pragma omp parallel for private(i0) reduction(+:sum_2) schedule(static)
176 for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0];
177 #ifdef PASO_MPI
178 loc_sum[0] = sum_2;
179 MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
180 #endif
181 if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) {
182 alpha = rho / sum_2;
183
184 #pragma omp parallel for private(i0) reduction(+:sum_3) schedule(static)
185 for (i0 = 0; i0 < n; i0++) {
186 r[i0] -= alpha * v[i0];
187 s[i0] = r[i0];
188 sum_3 += s[i0] * s[i0];
189 }
190 #ifdef PASO_MPI
191 loc_sum[0] = sum_3;
192 MPI_Allreduce(loc_sum, &sum_3, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
193 #endif
194 norm_of_residual = sqrt(sum_3);
195
196 /* Early check for tolerance. */
197 if ( (convergeFlag = (norm_of_residual <= tol)) ) {
198 #pragma omp parallel for private(i0) schedule(static)
199 for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0];
200 maxIterFlag = FALSE;
201 breakFlag = FALSE;
202 } else {
203 /* Compute stabilizer vector SHAT and scalar OMEGA. */
204 Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]);
205 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &shat[0],ZERO,&t[0]);
206
207 #pragma omp parallel for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static)
208 for (i0 = 0; i0 < n; i0++) {
209 omegaNumtr +=t[i0] * s[i0];
210 omegaDenumtr += t[i0] * t[i0];
211 }
212 #ifdef PASO_MPI
213 loc_sum[0] = omegaNumtr;
214 loc_sum[1] = omegaDenumtr;
215 MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
216 omegaNumtr=sum[0];
217 omegaDenumtr=sum[1];
218 #endif
219 if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) {
220 omega = omegaNumtr / omegaDenumtr;
221
222 #pragma omp parallel for private(i0) reduction(+:sum_4) schedule(static)
223 for (i0 = 0; i0 < n; i0++) {
224 x[i0] += alpha * phat[i0] + omega * shat[i0];
225 r[i0] = s[i0]-omega * t[i0];
226 sum_4 += r[i0] * r[i0];
227 }
228 #ifdef PASO_MPI
229 loc_sum[0] = sum_4;
230 MPI_Allreduce(loc_sum, &sum_4, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
231 #endif
232 norm_of_residual = sqrt(sum_4);
233 convergeFlag = norm_of_residual <= tol;
234 maxIterFlag = num_iter == maxit;
235 breakFlag = (ABS(omega) <= TOLERANCE_FOR_SCALARS);
236 }
237 }
238 }
239 if (!(convergeFlag || maxIterFlag || breakFlag)) {
240 rho1 = rho;
241 goto L10;
242 }
243 }
244 /* end of iteration */
245 num_iter_global=num_iter;
246 norm_of_residual_global=norm_of_residual;
247 if (maxIterFlag) {
248 status = SOLVER_MAXITER_REACHED;
249 } else if (breakFlag) {
250 status = SOLVER_BREAKDOWN;
251 }
252 }
253 TMPMEMFREE(rtld);
254 TMPMEMFREE(p);
255 TMPMEMFREE(v);
256 TMPMEMFREE(t);
257 TMPMEMFREE(phat);
258 TMPMEMFREE(shat);
259 TMPMEMFREE(s);
260 *iter=num_iter_global;
261 *resid=norm_of_residual_global;
262
263 /* End of BICGSTAB */
264 return status;
265 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26