/[escript]/trunk/paso/src/TFQMR.c
ViewVC logotype

Annotation of /trunk/paso/src/TFQMR.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2826 - (hide annotations)
Fri Dec 18 01:33:35 2009 UTC (10 years, 2 months ago) by artak
File MIME type: text/plain
File size: 7521 byte(s)
MINRES anf TFQMR add initioal guess. Tolerance for both solvers changed. However tolerance computaion needs to be more involved than it is now to avoid unnecessary very small tolerances.
1 artak 1703
2     /*******************************************************
3 ksteube 1811 *
4 jfenwick 2548 * Copyright (c) 2003-2009 by University of Queensland
5 ksteube 1811 * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13 artak 1703
14 ksteube 1811
15 artak 1703 /* TFQMR iterations */
16    
17     #include "SystemMatrix.h"
18     #include "Paso.h"
19     #include "Solver.h"
20     #include "PasoUtil.h"
21    
22     #ifdef _OPENMP
23     #include <omp.h>
24     #endif
25    
26     #ifdef PASO_MPI
27     #include <mpi.h>
28     #endif
29    
30     /*
31     *
32     * Purpose
33     * =======
34     *
35     * TFQMR solves the linear system A*x = b
36     *
37     * Convergence test: norm( b - A*x )< TOL.
38     * For other measures, see the above reference.
39     *
40     * Arguments
41     * =========
42     *
43     * r (input) DOUBLE PRECISION array, dimension N.
44 artak 2557 *
45 artak 1703 *
46     * x (input/output) DOUBLE PRECISION array, dimension N.
47 artak 2557 *
48 artak 1703 *
49     * ITER (input/output) INT
50     * On input, the maximum iterations to be performed.
51     * On output, actual number of iterations performed.
52     *
53     * INFO (output) INT
54     *
55     * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
56     * = SOLVEr_MAXITER_REACHED
57     * = SOLVER_INPUT_ERROR Illegal parameter:
58     * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller
59     * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller
60     *
61     * ==============================================================
62     */
63    
64     /* #define PASO_DYNAMIC_SCHEDULING_MVM */
65    
66     #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP
67     #define USE_DYNAMIC_SCHEDULING
68     #endif
69    
70     err_t Paso_Solver_TFQMR(
71     Paso_SystemMatrix * A,
72     double * r,
73     double * x,
74     dim_t *iter,
75     double * tolerance,
76     Paso_Performance* pp) {
77    
78     /* Local variables */
79 artak 1706
80     int m=1;
81     int j=0;
82    
83 artak 1703 dim_t num_iter=0,maxit;
84     bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
85     err_t status = SOLVER_NO_ERROR;
86     dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
87 artak 2562 double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *v_old=NULL, *temp_vector=NULL,*res=NULL;
88 artak 1703
89     double eta,theta,tau,rho,beta,alpha,sigma,rhon,c;
90    
91     double norm_of_residual;
92 artak 1706
93 artak 1703 /* */
94     /*-----------------------------------------------------------------*/
95     /* */
96     /* Start of Calculation : */
97     /* --------------------- */
98     /* */
99     /* */
100     u1=TMPMEMALLOC(n,double);
101     u2=TMPMEMALLOC(n,double);
102     y1=TMPMEMALLOC(n,double);
103     y2=TMPMEMALLOC(n,double);
104     d=TMPMEMALLOC(n,double);
105     w=TMPMEMALLOC(n,double);
106     v=TMPMEMALLOC(n,double);
107     v_old=TMPMEMALLOC(n,double);
108 artak 2562 temp_vector=TMPMEMALLOC(n,double);
109     res=TMPMEMALLOC(n,double);
110 artak 1703
111     if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL || v_old==NULL) {
112     status=SOLVER_MEMORY_ERROR;
113     }
114    
115     maxit = *iter;
116    
117     /* Test the input parameters. */
118     if (n < 0 || maxit<=0 ) {
119     status=SOLVER_INPUT_ERROR;
120     }
121    
122 artak 2826 Paso_zeroes(n,x);
123    
124 artak 1703 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
125 artak 2562 Paso_Solver_solvePreconditioner(A,res,r);
126 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
127    
128     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
129    
130     Paso_zeroes(n,u2);
131     Paso_zeroes(n,y2);
132    
133 artak 2562 Paso_Copy(n,w,res);
134     Paso_Copy(n,y1,res);
135 artak 1703
136     Paso_zeroes(n,d);
137    
138     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
139     Performance_startMonitor(pp,PERFORMANCE_MVM);
140 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector);
141 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM);
142     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
143    
144     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
145     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
146 artak 2562 Paso_Solver_solvePreconditioner(A,v,temp_vector);
147 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
148     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
149    
150     Paso_Copy(n,u1,v);
151    
152     theta = 0.0;
153     eta = 0.0;
154    
155 artak 2562 tau = Paso_l2(n,res,A->mpi_info);
156 artak 1703
157     rho = tau * tau;
158 artak 1706
159 artak 1703 norm_of_residual=tau*sqrt ( m + 1 );
160    
161     while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) ))
162     {
163    
164    
165 artak 2562 sigma=Paso_InnerProduct(n,res,v,A->mpi_info);
166 artak 1703
167     if (! (breakFlag = (ABS(sigma) == 0.))) {
168    
169     alpha = rho / sigma;
170    
171     for (j=0; j<=1; j=j+1)
172     {
173     /* Compute y2 and u2 only if you have to */
174     if ( j == 1 ){
175     Paso_LinearCombination(n,y2,1.,y1,-alpha,v);
176    
177     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
178     Performance_startMonitor(pp,PERFORMANCE_MVM);
179 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y2,PASO_ZERO,temp_vector);
180 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM);
181     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
182    
183     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
184     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
185 artak 2562 Paso_Solver_solvePreconditioner(A,u2,temp_vector);
186 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
187     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
188     }
189     m = 2 * (num_iter+1) - 2 + (j+1);
190     if (j==0) {
191     Paso_Update(n,1.,w,-alpha,u1);
192     Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1);
193     }
194     if (j==1) {
195     Paso_Update(n,1.,w,-alpha,u2);
196     Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2);
197     }
198    
199     theta =Paso_l2(n,w,A->mpi_info)/tau;
200     c = 1.0 / sqrt ( 1.0 + theta * theta );
201     tau = tau * theta * c;
202     eta = c * c * alpha;
203     Paso_Update(n,1.,x,eta,d);
204     }
205    
206     breakFlag = (ABS(rho) == 0);
207    
208 artak 2562 rhon = Paso_InnerProduct(n,res,w,A->mpi_info);
209 artak 1703 beta = rhon / rho;
210     rho = rhon;
211    
212     Paso_LinearCombination(n,y1,1.,w,beta,y2);
213    
214     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
215 artak 1711
216 artak 1703 Performance_startMonitor(pp,PERFORMANCE_MVM);
217 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector);
218 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM);
219    
220     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
221 artak 2562 Paso_Solver_solvePreconditioner(A,u1,temp_vector);
222 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
223 artak 1711
224 artak 1703 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
225    
226     Paso_Copy(n,v_old,v);
227    
228     Paso_Update(n,beta,v_old,1,u2);
229     Paso_LinearCombination(n,v,1.,u1,beta,v_old);
230     }
231     maxIterFlag = (num_iter > maxit);
232     norm_of_residual=tau*sqrt ( m + 1 );
233     convergeFlag=(norm_of_residual<(*tolerance));
234    
235    
236     if (maxIterFlag) {
237     status = SOLVER_MAXITER_REACHED;
238     } else if (breakFlag) {
239     status = SOLVER_BREAKDOWN;
240     }
241     ++(num_iter);
242     }
243     /* end of iteration */
244    
245     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
246     TMPMEMFREE(u1);
247     TMPMEMFREE(u2);
248     TMPMEMFREE(y1);
249     TMPMEMFREE(y2);
250     TMPMEMFREE(d);
251     TMPMEMFREE(w);
252     TMPMEMFREE(v);
253     TMPMEMFREE(v_old);
254 artak 2562 TMPMEMFREE(temp_vector);
255     TMPMEMFREE(res);
256 artak 1703 *iter=num_iter;
257     *tolerance=norm_of_residual;
258    
259     /* End of TFQMR */
260     return status;
261 ksteube 1708 }

  ViewVC Help
Powered by ViewVC 1.1.26