/[escript]/branches/doubleplusgood/paso/src/TFQMR.cpp
ViewVC logotype

Annotation of /branches/doubleplusgood/paso/src/TFQMR.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1778 - (hide annotations)
Tue Sep 9 07:46:02 2008 UTC (11 years, 4 months ago) by gross
Original Path: trunk/paso/src/TFQMR.c
File MIME type: text/plain
File size: 7351 byte(s)
memry leak fixed
1 artak 1703
2     /*******************************************************
3     *
4     * Copyright 2003-2007 by ACceSS MNRF
5     * Copyright 2007 by University of Queensland
6     *
7     * http://esscc.uq.edu.au
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     /* TFQMR iterations */
15    
16     #include "SystemMatrix.h"
17     #include "Paso.h"
18     #include "Solver.h"
19     #include "PasoUtil.h"
20    
21     #ifdef _OPENMP
22     #include <omp.h>
23     #endif
24    
25     #ifdef PASO_MPI
26     #include <mpi.h>
27     #endif
28    
29     /*
30     *
31     * Purpose
32     * =======
33     *
34     * TFQMR solves the linear system A*x = b
35     *
36     * Convergence test: norm( b - A*x )< TOL.
37     * For other measures, see the above reference.
38     *
39     * Arguments
40     * =========
41     *
42     * r (input) DOUBLE PRECISION array, dimension N.
43     * On entry, residual of inital guess x
44     *
45     * x (input/output) DOUBLE PRECISION array, dimension N.
46     * On input, the initial guess.
47     *
48     * ITER (input/output) INT
49     * On input, the maximum iterations to be performed.
50     * On output, actual number of iterations performed.
51     *
52     * INFO (output) INT
53     *
54     * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
55     * = SOLVEr_MAXITER_REACHED
56     * = SOLVER_INPUT_ERROR Illegal parameter:
57     * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller
58     * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller
59     *
60     * ==============================================================
61     */
62    
63     /* #define PASO_DYNAMIC_SCHEDULING_MVM */
64    
65     #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP
66     #define USE_DYNAMIC_SCHEDULING
67     #endif
68    
69     err_t Paso_Solver_TFQMR(
70     Paso_SystemMatrix * A,
71     double * r,
72     double * x,
73     dim_t *iter,
74     double * tolerance,
75     Paso_Performance* pp) {
76    
77     /* Local variables */
78 artak 1706
79     int m=1;
80     int j=0;
81    
82 artak 1703 dim_t num_iter=0,maxit;
83     bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
84     err_t status = SOLVER_NO_ERROR;
85     dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
86 gross 1778 double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *v_old=NULL;
87 artak 1703
88     double eta,theta,tau,rho,beta,alpha,sigma,rhon,c;
89    
90     double norm_of_residual;
91 artak 1706
92 artak 1703 /* */
93     /*-----------------------------------------------------------------*/
94     /* */
95     /* Start of Calculation : */
96     /* --------------------- */
97     /* */
98     /* */
99     u1=TMPMEMALLOC(n,double);
100     u2=TMPMEMALLOC(n,double);
101     y1=TMPMEMALLOC(n,double);
102     y2=TMPMEMALLOC(n,double);
103     d=TMPMEMALLOC(n,double);
104     w=TMPMEMALLOC(n,double);
105     v=TMPMEMALLOC(n,double);
106     v_old=TMPMEMALLOC(n,double);
107    
108    
109     if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL || v_old==NULL) {
110     status=SOLVER_MEMORY_ERROR;
111     }
112    
113     maxit = *iter;
114    
115     /* Test the input parameters. */
116     if (n < 0 || maxit<=0 ) {
117     status=SOLVER_INPUT_ERROR;
118     }
119    
120     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
121     Paso_Solver_solvePreconditioner(A,r,r);
122     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
123    
124     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
125    
126     Paso_zeroes(n,u2);
127     Paso_zeroes(n,y2);
128    
129     Paso_Copy(n,w,r);
130     Paso_Copy(n,y1,r);
131    
132     Paso_zeroes(n,d);
133    
134     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
135     Performance_startMonitor(pp,PERFORMANCE_MVM);
136     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y1,ZERO,v);
137     Performance_stopMonitor(pp,PERFORMANCE_MVM);
138     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
139    
140     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
141     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
142     Paso_Solver_solvePreconditioner(A,v,v);
143     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
144     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
145    
146     Paso_Copy(n,u1,v);
147    
148     theta = 0.0;
149     eta = 0.0;
150    
151     tau = Paso_l2(n,r,A->mpi_info);
152    
153     rho = tau * tau;
154 artak 1706
155 artak 1703 norm_of_residual=tau*sqrt ( m + 1 );
156    
157     while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) ))
158     {
159    
160    
161     sigma=Paso_InnerProduct(n,r,v,A->mpi_info);
162    
163     if (! (breakFlag = (ABS(sigma) == 0.))) {
164    
165     alpha = rho / sigma;
166    
167     for (j=0; j<=1; j=j+1)
168     {
169     /* Compute y2 and u2 only if you have to */
170     if ( j == 1 ){
171     Paso_LinearCombination(n,y2,1.,y1,-alpha,v);
172    
173     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
174     Performance_startMonitor(pp,PERFORMANCE_MVM);
175     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y2,ZERO,u2);
176     Performance_stopMonitor(pp,PERFORMANCE_MVM);
177     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
178    
179     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
180     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
181     Paso_Solver_solvePreconditioner(A,u2,u2);
182     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
183     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
184     }
185     m = 2 * (num_iter+1) - 2 + (j+1);
186     if (j==0) {
187     Paso_Update(n,1.,w,-alpha,u1);
188     Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1);
189     }
190     if (j==1) {
191     Paso_Update(n,1.,w,-alpha,u2);
192     Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2);
193     }
194    
195     theta =Paso_l2(n,w,A->mpi_info)/tau;
196     c = 1.0 / sqrt ( 1.0 + theta * theta );
197     tau = tau * theta * c;
198     eta = c * c * alpha;
199     Paso_Update(n,1.,x,eta,d);
200     }
201    
202     breakFlag = (ABS(rho) == 0);
203    
204     rhon = Paso_InnerProduct(n,r,w,A->mpi_info);
205     beta = rhon / rho;
206     rho = rhon;
207    
208     Paso_LinearCombination(n,y1,1.,w,beta,y2);
209    
210     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
211 artak 1711
212 artak 1703 Performance_startMonitor(pp,PERFORMANCE_MVM);
213     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y1,ZERO,u1);
214     Performance_stopMonitor(pp,PERFORMANCE_MVM);
215    
216     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
217     Paso_Solver_solvePreconditioner(A,u1,u1);
218     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
219 artak 1711
220 artak 1703 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
221    
222     Paso_Copy(n,v_old,v);
223    
224     Paso_Update(n,beta,v_old,1,u2);
225     Paso_LinearCombination(n,v,1.,u1,beta,v_old);
226     }
227     maxIterFlag = (num_iter > maxit);
228     norm_of_residual=tau*sqrt ( m + 1 );
229     convergeFlag=(norm_of_residual<(*tolerance));
230    
231    
232     if (maxIterFlag) {
233     status = SOLVER_MAXITER_REACHED;
234     } else if (breakFlag) {
235     status = SOLVER_BREAKDOWN;
236     }
237     ++(num_iter);
238     }
239     /* end of iteration */
240    
241     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
242     TMPMEMFREE(u1);
243     TMPMEMFREE(u2);
244     TMPMEMFREE(y1);
245     TMPMEMFREE(y2);
246     TMPMEMFREE(d);
247     TMPMEMFREE(w);
248     TMPMEMFREE(v);
249     TMPMEMFREE(v_old);
250    
251     *iter=num_iter;
252     *tolerance=norm_of_residual;
253    
254     /* End of TFQMR */
255     return status;
256 ksteube 1708 }

  ViewVC Help
Powered by ViewVC 1.1.26