/[escript]/trunk/paso/src/TFQMR.c
ViewVC logotype

Annotation of /trunk/paso/src/TFQMR.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1974 - (hide annotations)
Thu Nov 6 02:40:10 2008 UTC (10 years, 11 months ago) by jfenwick
File MIME type: text/plain
File size: 7358 byte(s)
Changing ZERO and ONE in Solver.h to PASO_ZERO and PASO_ONE.
Changed three static vars to #defines
1 artak 1703
2     /*******************************************************
3 ksteube 1811 *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13 artak 1703
14 ksteube 1811
15 artak 1703 /* TFQMR iterations */
16    
17     #include "SystemMatrix.h"
18     #include "Paso.h"
19     #include "Solver.h"
20     #include "PasoUtil.h"
21    
22     #ifdef _OPENMP
23     #include <omp.h>
24     #endif
25    
26     #ifdef PASO_MPI
27     #include <mpi.h>
28     #endif
29    
30     /*
31     *
32     * Purpose
33     * =======
34     *
35     * TFQMR solves the linear system A*x = b
36     *
37     * Convergence test: norm( b - A*x )< TOL.
38     * For other measures, see the above reference.
39     *
40     * Arguments
41     * =========
42     *
43     * r (input) DOUBLE PRECISION array, dimension N.
44     * On entry, residual of inital guess x
45     *
46     * x (input/output) DOUBLE PRECISION array, dimension N.
47     * On input, the initial guess.
48     *
49     * ITER (input/output) INT
50     * On input, the maximum iterations to be performed.
51     * On output, actual number of iterations performed.
52     *
53     * INFO (output) INT
54     *
55     * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
56     * = SOLVEr_MAXITER_REACHED
57     * = SOLVER_INPUT_ERROR Illegal parameter:
58     * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller
59     * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller
60     *
61     * ==============================================================
62     */
63    
64     /* #define PASO_DYNAMIC_SCHEDULING_MVM */
65    
66     #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP
67     #define USE_DYNAMIC_SCHEDULING
68     #endif
69    
70     err_t Paso_Solver_TFQMR(
71     Paso_SystemMatrix * A,
72     double * r,
73     double * x,
74     dim_t *iter,
75     double * tolerance,
76     Paso_Performance* pp) {
77    
78     /* Local variables */
79 artak 1706
80     int m=1;
81     int j=0;
82    
83 artak 1703 dim_t num_iter=0,maxit;
84     bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
85     err_t status = SOLVER_NO_ERROR;
86     dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
87 gross 1778 double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *v_old=NULL;
88 artak 1703
89     double eta,theta,tau,rho,beta,alpha,sigma,rhon,c;
90    
91     double norm_of_residual;
92 artak 1706
93 artak 1703 /* */
94     /*-----------------------------------------------------------------*/
95     /* */
96     /* Start of Calculation : */
97     /* --------------------- */
98     /* */
99     /* */
100     u1=TMPMEMALLOC(n,double);
101     u2=TMPMEMALLOC(n,double);
102     y1=TMPMEMALLOC(n,double);
103     y2=TMPMEMALLOC(n,double);
104     d=TMPMEMALLOC(n,double);
105     w=TMPMEMALLOC(n,double);
106     v=TMPMEMALLOC(n,double);
107     v_old=TMPMEMALLOC(n,double);
108    
109    
110     if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL || v_old==NULL) {
111     status=SOLVER_MEMORY_ERROR;
112     }
113    
114     maxit = *iter;
115    
116     /* Test the input parameters. */
117     if (n < 0 || maxit<=0 ) {
118     status=SOLVER_INPUT_ERROR;
119     }
120    
121     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
122     Paso_Solver_solvePreconditioner(A,r,r);
123     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
124    
125     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
126    
127     Paso_zeroes(n,u2);
128     Paso_zeroes(n,y2);
129    
130     Paso_Copy(n,w,r);
131     Paso_Copy(n,y1,r);
132    
133     Paso_zeroes(n,d);
134    
135     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
136     Performance_startMonitor(pp,PERFORMANCE_MVM);
137 jfenwick 1974 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,v);
138 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM);
139     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
140    
141     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
142     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
143     Paso_Solver_solvePreconditioner(A,v,v);
144     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
145     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
146    
147     Paso_Copy(n,u1,v);
148    
149     theta = 0.0;
150     eta = 0.0;
151    
152     tau = Paso_l2(n,r,A->mpi_info);
153    
154     rho = tau * tau;
155 artak 1706
156 artak 1703 norm_of_residual=tau*sqrt ( m + 1 );
157    
158     while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) ))
159     {
160    
161    
162     sigma=Paso_InnerProduct(n,r,v,A->mpi_info);
163    
164     if (! (breakFlag = (ABS(sigma) == 0.))) {
165    
166     alpha = rho / sigma;
167    
168     for (j=0; j<=1; j=j+1)
169     {
170     /* Compute y2 and u2 only if you have to */
171     if ( j == 1 ){
172     Paso_LinearCombination(n,y2,1.,y1,-alpha,v);
173    
174     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
175     Performance_startMonitor(pp,PERFORMANCE_MVM);
176 jfenwick 1974 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y2,PASO_ZERO,u2);
177 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM);
178     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
179    
180     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
181     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
182     Paso_Solver_solvePreconditioner(A,u2,u2);
183     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
184     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
185     }
186     m = 2 * (num_iter+1) - 2 + (j+1);
187     if (j==0) {
188     Paso_Update(n,1.,w,-alpha,u1);
189     Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1);
190     }
191     if (j==1) {
192     Paso_Update(n,1.,w,-alpha,u2);
193     Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2);
194     }
195    
196     theta =Paso_l2(n,w,A->mpi_info)/tau;
197     c = 1.0 / sqrt ( 1.0 + theta * theta );
198     tau = tau * theta * c;
199     eta = c * c * alpha;
200     Paso_Update(n,1.,x,eta,d);
201     }
202    
203     breakFlag = (ABS(rho) == 0);
204    
205     rhon = Paso_InnerProduct(n,r,w,A->mpi_info);
206     beta = rhon / rho;
207     rho = rhon;
208    
209     Paso_LinearCombination(n,y1,1.,w,beta,y2);
210    
211     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
212 artak 1711
213 artak 1703 Performance_startMonitor(pp,PERFORMANCE_MVM);
214 jfenwick 1974 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,u1);
215 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM);
216    
217     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
218     Paso_Solver_solvePreconditioner(A,u1,u1);
219     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
220 artak 1711
221 artak 1703 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
222    
223     Paso_Copy(n,v_old,v);
224    
225     Paso_Update(n,beta,v_old,1,u2);
226     Paso_LinearCombination(n,v,1.,u1,beta,v_old);
227     }
228     maxIterFlag = (num_iter > maxit);
229     norm_of_residual=tau*sqrt ( m + 1 );
230     convergeFlag=(norm_of_residual<(*tolerance));
231    
232    
233     if (maxIterFlag) {
234     status = SOLVER_MAXITER_REACHED;
235     } else if (breakFlag) {
236     status = SOLVER_BREAKDOWN;
237     }
238     ++(num_iter);
239     }
240     /* end of iteration */
241    
242     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
243     TMPMEMFREE(u1);
244     TMPMEMFREE(u2);
245     TMPMEMFREE(y1);
246     TMPMEMFREE(y2);
247     TMPMEMFREE(d);
248     TMPMEMFREE(w);
249     TMPMEMFREE(v);
250     TMPMEMFREE(v_old);
251    
252     *iter=num_iter;
253     *tolerance=norm_of_residual;
254    
255     /* End of TFQMR */
256     return status;
257 ksteube 1708 }

  ViewVC Help
Powered by ViewVC 1.1.26