/[escript]/trunk/paso/src/TFQMR.c
ViewVC logotype

Annotation of /trunk/paso/src/TFQMR.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1703 - (hide annotations)
Thu Aug 14 05:34:25 2008 UTC (11 years, 3 months ago) by artak
File MIME type: text/plain
File size: 7482 byte(s)
TFQMR solver is added to PASO solver. It is not parallelised yet.
1 artak 1703
2     /*******************************************************
3     *
4     * Copyright 2003-2007 by ACceSS MNRF
5     * Copyright 2007 by University of Queensland
6     *
7     * http://esscc.uq.edu.au
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     /* TFQMR iterations */
15    
16     #include "SystemMatrix.h"
17     #include "Paso.h"
18     #include "Solver.h"
19     #include "PasoUtil.h"
20    
21     #ifdef _OPENMP
22     #include <omp.h>
23     #endif
24    
25     #ifdef PASO_MPI
26     #include <mpi.h>
27     #endif
28    
29     /*
30     *
31     * Purpose
32     * =======
33     *
34     * TFQMR solves the linear system A*x = b
35     *
36     * Convergence test: norm( b - A*x )< TOL.
37     * For other measures, see the above reference.
38     *
39     * Arguments
40     * =========
41     *
42     * r (input) DOUBLE PRECISION array, dimension N.
43     * On entry, residual of inital guess x
44     *
45     * x (input/output) DOUBLE PRECISION array, dimension N.
46     * On input, the initial guess.
47     *
48     * ITER (input/output) INT
49     * On input, the maximum iterations to be performed.
50     * On output, actual number of iterations performed.
51     *
52     * INFO (output) INT
53     *
54     * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
55     * = SOLVEr_MAXITER_REACHED
56     * = SOLVER_INPUT_ERROR Illegal parameter:
57     * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller
58     * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller
59     *
60     * ==============================================================
61     */
62    
63     /* #define PASO_DYNAMIC_SCHEDULING_MVM */
64    
65     #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP
66     #define USE_DYNAMIC_SCHEDULING
67     #endif
68    
69     err_t Paso_Solver_TFQMR(
70     Paso_SystemMatrix * A,
71     double * r,
72     double * x,
73     dim_t *iter,
74     double * tolerance,
75     Paso_Performance* pp) {
76    
77     /* Local variables */
78     dim_t num_iter=0,maxit;
79     bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
80     err_t status = SOLVER_NO_ERROR;
81     dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
82     double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *v_old=NULL, *tmp=NULL ;
83    
84     double eta,theta,tau,rho,beta,alpha,sigma,rhon,c;
85    
86     double norm_of_residual;
87    
88     /* */
89     /*-----------------------------------------------------------------*/
90     /* */
91     /* Start of Calculation : */
92     /* --------------------- */
93     /* */
94     /* */
95     u1=TMPMEMALLOC(n,double);
96     u2=TMPMEMALLOC(n,double);
97     y1=TMPMEMALLOC(n,double);
98     y2=TMPMEMALLOC(n,double);
99     d=TMPMEMALLOC(n,double);
100     w=TMPMEMALLOC(n,double);
101     v=TMPMEMALLOC(n,double);
102     v_old=TMPMEMALLOC(n,double);
103    
104    
105     tmp=TMPMEMALLOC(n,double);
106    
107    
108     if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL || v_old==NULL) {
109     status=SOLVER_MEMORY_ERROR;
110     }
111    
112     maxit = *iter;
113    
114     /* Test the input parameters. */
115     if (n < 0 || maxit<=0 ) {
116     status=SOLVER_INPUT_ERROR;
117     }
118    
119     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
120     Paso_Solver_solvePreconditioner(A,r,r);
121     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
122    
123     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
124    
125     Paso_zeroes(n,u2);
126     Paso_zeroes(n,y2);
127    
128     Paso_Copy(n,w,r);
129     Paso_Copy(n,y1,r);
130    
131     Paso_zeroes(n,d);
132    
133     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
134     Performance_startMonitor(pp,PERFORMANCE_MVM);
135     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y1,ZERO,v);
136     Performance_stopMonitor(pp,PERFORMANCE_MVM);
137     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
138    
139     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
140     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
141     Paso_Solver_solvePreconditioner(A,v,v);
142     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
143     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
144    
145     Paso_Copy(n,u1,v);
146    
147     theta = 0.0;
148     eta = 0.0;
149    
150     tau = Paso_l2(n,r,A->mpi_info);
151    
152     rho = tau * tau;
153     int m=1;
154     int j=0;
155    
156     norm_of_residual=tau*sqrt ( m + 1 );
157    
158     while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) ))
159     {
160    
161    
162     sigma=Paso_InnerProduct(n,r,v,A->mpi_info);
163    
164     if (! (breakFlag = (ABS(sigma) == 0.))) {
165    
166     alpha = rho / sigma;
167    
168     for (j=0; j<=1; j=j+1)
169     {
170     /* Compute y2 and u2 only if you have to */
171     if ( j == 1 ){
172     Paso_LinearCombination(n,y2,1.,y1,-alpha,v);
173    
174     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
175     Performance_startMonitor(pp,PERFORMANCE_MVM);
176     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y2,ZERO,u2);
177     Performance_stopMonitor(pp,PERFORMANCE_MVM);
178     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
179    
180     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
181     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
182     Paso_Solver_solvePreconditioner(A,u2,u2);
183     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
184     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
185     }
186     m = 2 * (num_iter+1) - 2 + (j+1);
187     if (j==0) {
188     Paso_Update(n,1.,w,-alpha,u1);
189     Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1);
190     }
191     if (j==1) {
192     Paso_Update(n,1.,w,-alpha,u2);
193     Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2);
194     }
195    
196     theta =Paso_l2(n,w,A->mpi_info)/tau;
197     c = 1.0 / sqrt ( 1.0 + theta * theta );
198     tau = tau * theta * c;
199     eta = c * c * alpha;
200     Paso_Update(n,1.,x,eta,d);
201     }
202    
203     breakFlag = (ABS(rho) == 0);
204    
205     rhon = Paso_InnerProduct(n,r,w,A->mpi_info);
206     beta = rhon / rho;
207     rho = rhon;
208    
209     Paso_LinearCombination(n,y1,1.,w,beta,y2);
210    
211     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
212     Performance_startMonitor(pp,PERFORMANCE_MVM);
213     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y1,ZERO,u1);
214     Performance_stopMonitor(pp,PERFORMANCE_MVM);
215     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
216    
217     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
218     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
219     Paso_Solver_solvePreconditioner(A,u1,u1);
220     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
221     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
222    
223     Paso_Copy(n,v_old,v);
224    
225     Paso_Update(n,beta,v_old,1,u2);
226     Paso_LinearCombination(n,v,1.,u1,beta,v_old);
227     }
228     maxIterFlag = (num_iter > maxit);
229     norm_of_residual=tau*sqrt ( m + 1 );
230     convergeFlag=(norm_of_residual<(*tolerance));
231    
232    
233     if (maxIterFlag) {
234     status = SOLVER_MAXITER_REACHED;
235     } else if (breakFlag) {
236     status = SOLVER_BREAKDOWN;
237     }
238     ++(num_iter);
239     }
240     /* end of iteration */
241    
242     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
243     TMPMEMFREE(u1);
244     TMPMEMFREE(u2);
245     TMPMEMFREE(y1);
246     TMPMEMFREE(y2);
247     TMPMEMFREE(d);
248     TMPMEMFREE(w);
249     TMPMEMFREE(v);
250     TMPMEMFREE(v_old);
251    
252     *iter=num_iter;
253     *tolerance=norm_of_residual;
254    
255     /* End of TFQMR */
256     return status;
257     }

  ViewVC Help
Powered by ViewVC 1.1.26