/[escript]/trunk/paso/src/BiCGStab.c
ViewVC logotype

Annotation of /trunk/paso/src/BiCGStab.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1628 - (hide annotations)
Fri Jul 11 13:12:46 2008 UTC (11 years, 7 months ago) by phornby
File MIME type: text/plain
File size: 8316 byte(s)

Merge in /branches/windows_from_1456_trunk_1620_merged_in branch.

You will find a preserved pre-merge trunk in tags under tags/trunk_at_1625.
That will be useful for diffing & checking on my stupidity.

Here is a list of the conflicts and their resolution at this
point in time.


=================================================================================
(LLWS == looks like white space).

finley/src/Assemble_addToSystemMatrix.c - resolve to branch - unused var. may be wrong.....
finley/src/CPPAdapter/SystemMatrixAdapter.cpp - resolve to branch - LLWS
finley/src/CPPAdapter/MeshAdapter.cpp - resolve to branch - LLWS
paso/src/PCG.c - resolve to branch - unused var fixes.
paso/src/SolverFCT.c - resolve to branch - LLWS
paso/src/FGMRES.c - resolve to branch - LLWS
paso/src/Common.h - resolve to trunk version. It's omp.h's include... not sure it's needed,
but for the sake of saftey.....
paso/src/Functions.c - resolve to branch version, indentation/tab removal and return error
on bad unimplemented Paso_FunctionCall.
paso/src/SolverFCT_solve.c - resolve to branch version, unused vars
paso/src/SparseMatrix_MatrixVector.c - resolve to branch version, unused vars.
escript/src/Utils.cpp - resloved to branch, needs WinSock2.h
escript/src/DataExpanded.cpp - resolved to branch version - LLWS
escript/src/DataFactory.cpp - resolve to branch version
=================================================================================

This currently passes tests on linux (debian), but is not checked on windows or Altix yet.

This checkin is to make a trunk I can check out for windows to do tests on it.

Known outstanding problem is in the operator=() method of exceptions
causing warning messages on the intel compilers.

May the God of doughnuts have mercy on my soul.


1 ksteube 1312
2 jgs 150 /* $Id$ */
3    
4 ksteube 1312 /*******************************************************
5     *
6     * Copyright 2003-2007 by ACceSS MNRF
7     * Copyright 2007 by University of Queensland
8     *
9     * http://esscc.uq.edu.au
10     * Primary Business: Queensland, Australia
11     * Licensed under the Open Software License version 3.0
12     * http://www.opensource.org/licenses/osl-3.0.php
13     *
14     *******************************************************/
15 dhawcroft 631
16     /*
17 jgs 150 Crude modifications and translations for Paso by Matt Davies and Lutz Gross
18     */
19    
20 gross 700 #include "Paso.h"
21     #include "SystemMatrix.h"
22 jgs 150 #include "Solver.h"
23     #ifdef _OPENMP
24     #include <omp.h>
25     #endif
26    
27     /* -- Iterative template routine --
28     * Univ. of Tennessee and Oak Ridge National Laboratory
29     * October 1, 1993
30     * Details of this algorithm are described in "Templates for the
31     * Solution of Linear Systems: Building Blocks for Iterative
32     * Methods", Barrett, Berry, Chan, Demmel, Donato, Dongarra,
33     * Eijkhout, Pozo, Romine, and van der Vorst, SIAM Publications,
34     * 1993. (ftp netlib2.cs.utk.edu; cd linalg; get templates.ps).
35     *
36     * Purpose
37     * =======
38     *
39     * BICGSTAB solves the linear system A*x = b using the
40     * BiConjugate Gradient Stabilized iterative method with
41     * preconditioning.
42     *
43     * Convergence test: norm( b - A*x )< TOL.
44     * For other measures, see the above reference.
45     *
46     * Arguments
47     * =========
48     *
49     * A (input)
50     *
51     * R (input) DOUBLE PRECISION array, dimension N.
52     * On entry, residual of inital guess X
53     *
54     * X (input/output) DOUBLE PRECISION array, dimension N.
55     * On input, the initial guess.
56     *
57     * ITER (input/output) INT
58     * On input, the maximum iterations to be performed.
59     * On output, actual number of iterations performed.
60     *
61     * RESID (input/output) DOUBLE PRECISION
62     * On input, the allowable convergence measure for
63     * norm( b - A*x )
64     * On output, the final value of this measure.
65     *
66     * return value
67     *
68     * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
69     * = SOLVER_MAXITER_REACHED
70     * = SOLVER_INPUT_ERROR Illegal parameter:
71     * = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller
72     * = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller
73     *
74     * ==============================================================
75     */
76    
77     err_t Paso_Solver_BiCGStab(
78     Paso_SystemMatrix * A,
79     double * r,
80     double * x,
81     dim_t *iter,
82 gross 584 double * tolerance,
83     Paso_Performance* pp) {
84 jgs 150
85    
86     /* Local variables */
87 ksteube 1312 double *rtld=NULL,*p=NULL,*v=NULL,*t=NULL,*phat=NULL,*shat=NULL,*s=NULL, *buf1=NULL, *buf0=NULL;
88 jgs 150 double beta,norm_of_residual,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global;
89 gross 1553 double alpha, omega, omegaNumtr, omegaDenumtr, rho, tol, rho1, loc_sum[2], sum[2];
90 phornby 1628 #ifdef PASO_MPI
91     double loc_sum[2], sum[2];
92     #endif
93 jgs 150 dim_t num_iter=0,maxit,num_iter_global;
94 ksteube 1312 dim_t i0;
95 jgs 150 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
96     dim_t status = SOLVER_NO_ERROR;
97 gross 1028 double *resid = tolerance;
98 ksteube 1312 dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
99 jgs 150
100     /* Executable Statements */
101    
102     /* allocate memory: */
103     rtld=TMPMEMALLOC(n,double);
104     p=TMPMEMALLOC(n,double);
105     v=TMPMEMALLOC(n,double);
106     t=TMPMEMALLOC(n,double);
107     phat=TMPMEMALLOC(n,double);
108     shat=TMPMEMALLOC(n,double);
109     s=TMPMEMALLOC(n,double);
110     /* Test the input parameters. */
111    
112     if (n < 0) {
113     status = SOLVER_INPUT_ERROR;
114     } else if (rtld==NULL || p==NULL || v==NULL || t==NULL || phat==NULL || shat==NULL || s==NULL) {
115     status = SOLVER_MEMORY_ERROR;
116     } else {
117    
118     /* now bicgstab starts : */
119     maxit = *iter;
120     tol = *resid;
121    
122 gross 1556 num_iter =0;
123     convergeFlag=FALSE;
124     maxIterFlag=FALSE;
125     breakFlag=FALSE;
126 jgs 150
127 gross 1556 /* initialize arrays */
128    
129     #pragma omp parallel for private(i0) schedule(static)
130     for (i0 = 0; i0 < n; i0++) {
131 jgs 150 rtld[i0]=0;
132     p[i0]=0;
133     v[i0]=0;
134     t[i0]=0;
135     phat[i0]=0;
136     shat[i0]=0;
137 gross 1556 rtld[i0] = r[i0];
138     }
139 jgs 150
140 gross 1556 /* Perform BiConjugate Gradient Stabilized iteration. */
141 jgs 150
142     L10:
143     ++(num_iter);
144     sum_1 = 0;
145     sum_2 = 0;
146     sum_3 = 0;
147     sum_4 = 0;
148     omegaNumtr = 0.0;
149 gross 1556 omegaDenumtr = 0.0;
150     #pragma omp parallel for private(i0) reduction(+:sum_1) schedule(static)
151 jgs 150 for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0];
152 gross 1556 #ifdef PASO_MPI
153     loc_sum[0] = sum_1;
154     MPI_Allreduce(loc_sum, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
155 gross 1553 #endif
156 jgs 150 rho = sum_1;
157    
158     if (! (breakFlag = (ABS(rho) <= TOLERANCE_FOR_SCALARS))) {
159     /* Compute vector P. */
160    
161     if (num_iter > 1) {
162     beta = rho / rho1 * (alpha / omega);
163 gross 1556 #pragma omp parallel for private(i0) schedule(static)
164 jgs 150 for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]);
165     } else {
166 gross 1556 #pragma omp parallel for private(i0) schedule(static)
167 jgs 150 for (i0 = 0; i0 < n; i0++) p[i0] = r[i0];
168     }
169    
170     /* Compute direction adjusting vector PHAT and scalar ALPHA. */
171    
172     Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]);
173 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &phat[0],ZERO, &v[0]);
174 jgs 150
175 gross 1556 #pragma omp parallel for private(i0) reduction(+:sum_2) schedule(static)
176 jgs 150 for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0];
177 gross 1553 #ifdef PASO_MPI
178 gross 1556 loc_sum[0] = sum_2;
179     MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
180 gross 1553 #endif
181 jgs 150 if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) {
182     alpha = rho / sum_2;
183    
184 gross 1556 #pragma omp parallel for private(i0) reduction(+:sum_3) schedule(static)
185 jgs 150 for (i0 = 0; i0 < n; i0++) {
186     r[i0] -= alpha * v[i0];
187     s[i0] = r[i0];
188     sum_3 += s[i0] * s[i0];
189     }
190 gross 1553 #ifdef PASO_MPI
191 gross 1556 loc_sum[0] = sum_3;
192     MPI_Allreduce(loc_sum, &sum_3, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
193 gross 1553 #endif
194 jgs 150 norm_of_residual = sqrt(sum_3);
195    
196     /* Early check for tolerance. */
197     if ( (convergeFlag = (norm_of_residual <= tol)) ) {
198 gross 1556 #pragma omp parallel for private(i0) schedule(static)
199 jgs 150 for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0];
200     maxIterFlag = FALSE;
201     breakFlag = FALSE;
202     } else {
203     /* Compute stabilizer vector SHAT and scalar OMEGA. */
204     Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]);
205 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &shat[0],ZERO,&t[0]);
206 jgs 150
207 gross 1556 #pragma omp parallel for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static)
208 jgs 150 for (i0 = 0; i0 < n; i0++) {
209     omegaNumtr +=t[i0] * s[i0];
210     omegaDenumtr += t[i0] * t[i0];
211     }
212 gross 1553 #ifdef PASO_MPI
213 gross 1556 loc_sum[0] = omegaNumtr;
214     loc_sum[1] = omegaDenumtr;
215     MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
216     omegaNumtr=sum[0];
217     omegaDenumtr=sum[1];
218 gross 1553 #endif
219 jgs 150 if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) {
220     omega = omegaNumtr / omegaDenumtr;
221    
222 gross 1556 #pragma omp parallel for private(i0) reduction(+:sum_4) schedule(static)
223 jgs 150 for (i0 = 0; i0 < n; i0++) {
224     x[i0] += alpha * phat[i0] + omega * shat[i0];
225 artak 1425 r[i0] = s[i0]-omega * t[i0];
226 jgs 150 sum_4 += r[i0] * r[i0];
227     }
228 gross 1553 #ifdef PASO_MPI
229 gross 1556 loc_sum[0] = sum_4;
230     MPI_Allreduce(loc_sum, &sum_4, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
231 gross 1553 #endif
232 jgs 150 norm_of_residual = sqrt(sum_4);
233     convergeFlag = norm_of_residual <= tol;
234     maxIterFlag = num_iter == maxit;
235     breakFlag = (ABS(omega) <= TOLERANCE_FOR_SCALARS);
236     }
237     }
238     }
239     if (!(convergeFlag || maxIterFlag || breakFlag)) {
240     rho1 = rho;
241     goto L10;
242     }
243     }
244     /* end of iteration */
245 gross 1556 num_iter_global=num_iter;
246     norm_of_residual_global=norm_of_residual;
247     if (maxIterFlag) {
248 jgs 150 status = SOLVER_MAXITER_REACHED;
249 gross 1556 } else if (breakFlag) {
250 jgs 150 status = SOLVER_BREAKDOWN;
251     }
252     }
253     TMPMEMFREE(rtld);
254     TMPMEMFREE(p);
255     TMPMEMFREE(v);
256     TMPMEMFREE(t);
257     TMPMEMFREE(phat);
258     TMPMEMFREE(shat);
259     TMPMEMFREE(s);
260     *iter=num_iter_global;
261     *resid=norm_of_residual_global;
262    
263     /* End of BICGSTAB */
264     return status;
265     }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26