/[escript]/trunk/paso/src/TFQMR.c
ViewVC logotype

Annotation of /trunk/paso/src/TFQMR.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3981 - (hide annotations)
Fri Sep 21 02:47:54 2012 UTC (7 years, 6 months ago) by jfenwick
File MIME type: text/plain
File size: 7657 byte(s)
First pass of updating copyright notices
1 artak 1703
2 jfenwick 3981 /*****************************************************************************
3 ksteube 1811 *
4 jfenwick 3911 * Copyright (c) 2003-2012 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 ksteube 1811 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12     * Development since 2012 by School of Earth Sciences
13     *
14     *****************************************************************************/
15 artak 1703
16 ksteube 1811
17 artak 1703 /* TFQMR iterations */
18    
19     #include "SystemMatrix.h"
20     #include "Paso.h"
21     #include "Solver.h"
22     #include "PasoUtil.h"
23    
24     #ifdef _OPENMP
25     #include <omp.h>
26     #endif
27    
28 jfenwick 3259 #ifdef ESYS_MPI
29 artak 1703 #include <mpi.h>
30     #endif
31    
32     /*
33     *
34     * Purpose
35     * =======
36     *
37     * TFQMR solves the linear system A*x = b
38     *
39     * Convergence test: norm( b - A*x )< TOL.
40     * For other measures, see the above reference.
41     *
42     * Arguments
43     * =========
44     *
45     * r (input) DOUBLE PRECISION array, dimension N.
46 artak 2557 *
47 artak 1703 *
48     * x (input/output) DOUBLE PRECISION array, dimension N.
49 artak 2557 *
50 artak 1703 *
51     * ITER (input/output) INT
52     * On input, the maximum iterations to be performed.
53     * On output, actual number of iterations performed.
54     *
55     * INFO (output) INT
56     *
57     * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
58 caltinay 3642 * = SOLVER_MAXITER_REACHED
59 artak 1703 * = SOLVER_INPUT_ERROR Illegal parameter:
60 caltinay 3642 * = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller
61     * = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller
62 artak 1703 *
63     * ==============================================================
64     */
65    
66     /* #define PASO_DYNAMIC_SCHEDULING_MVM */
67    
68     #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP
69     #define USE_DYNAMIC_SCHEDULING
70     #endif
71    
72     err_t Paso_Solver_TFQMR(
73     Paso_SystemMatrix * A,
74     double * r,
75     double * x,
76     dim_t *iter,
77     double * tolerance,
78     Paso_Performance* pp) {
79    
80     /* Local variables */
81 artak 1706
82     int m=1;
83     int j=0;
84    
85 artak 1703 dim_t num_iter=0,maxit;
86     bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
87     err_t status = SOLVER_NO_ERROR;
88     dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
89 artak 2562 double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *v_old=NULL, *temp_vector=NULL,*res=NULL;
90 artak 1703
91     double eta,theta,tau,rho,beta,alpha,sigma,rhon,c;
92    
93     double norm_of_residual;
94 artak 1706
95 artak 1703 /* */
96     /*-----------------------------------------------------------------*/
97     /* */
98     /* Start of Calculation : */
99     /* --------------------- */
100     /* */
101     /* */
102     u1=TMPMEMALLOC(n,double);
103     u2=TMPMEMALLOC(n,double);
104     y1=TMPMEMALLOC(n,double);
105     y2=TMPMEMALLOC(n,double);
106     d=TMPMEMALLOC(n,double);
107     w=TMPMEMALLOC(n,double);
108     v=TMPMEMALLOC(n,double);
109     v_old=TMPMEMALLOC(n,double);
110 artak 2562 temp_vector=TMPMEMALLOC(n,double);
111     res=TMPMEMALLOC(n,double);
112 artak 1703
113     if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL || v_old==NULL) {
114     status=SOLVER_MEMORY_ERROR;
115     }
116    
117     maxit = *iter;
118    
119     /* Test the input parameters. */
120     if (n < 0 || maxit<=0 ) {
121     status=SOLVER_INPUT_ERROR;
122     }
123    
124 artak 2826 Paso_zeroes(n,x);
125    
126 artak 1703 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
127 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,res,r);
128 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
129    
130     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
131    
132     Paso_zeroes(n,u2);
133     Paso_zeroes(n,y2);
134    
135 artak 2562 Paso_Copy(n,w,res);
136     Paso_Copy(n,y1,res);
137 artak 1703
138     Paso_zeroes(n,d);
139    
140     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
141     Performance_startMonitor(pp,PERFORMANCE_MVM);
142 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector);
143 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM);
144     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
145    
146     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
147     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
148 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,v,temp_vector);
149 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
150     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
151    
152     Paso_Copy(n,u1,v);
153    
154     theta = 0.0;
155     eta = 0.0;
156    
157 artak 2562 tau = Paso_l2(n,res,A->mpi_info);
158 artak 1703
159     rho = tau * tau;
160 artak 1706
161 artak 1703 norm_of_residual=tau*sqrt ( m + 1 );
162    
163     while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) ))
164     {
165    
166    
167 artak 2562 sigma=Paso_InnerProduct(n,res,v,A->mpi_info);
168 artak 1703
169     if (! (breakFlag = (ABS(sigma) == 0.))) {
170    
171     alpha = rho / sigma;
172    
173     for (j=0; j<=1; j=j+1)
174     {
175     /* Compute y2 and u2 only if you have to */
176     if ( j == 1 ){
177     Paso_LinearCombination(n,y2,1.,y1,-alpha,v);
178    
179     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
180     Performance_startMonitor(pp,PERFORMANCE_MVM);
181 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y2,PASO_ZERO,temp_vector);
182 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM);
183     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
184    
185     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
186     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
187 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,u2,temp_vector);
188 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
189     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
190     }
191     m = 2 * (num_iter+1) - 2 + (j+1);
192     if (j==0) {
193     Paso_Update(n,1.,w,-alpha,u1);
194     Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1);
195     }
196     if (j==1) {
197     Paso_Update(n,1.,w,-alpha,u2);
198     Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2);
199     }
200    
201     theta =Paso_l2(n,w,A->mpi_info)/tau;
202     c = 1.0 / sqrt ( 1.0 + theta * theta );
203     tau = tau * theta * c;
204     eta = c * c * alpha;
205     Paso_Update(n,1.,x,eta,d);
206     }
207    
208     breakFlag = (ABS(rho) == 0);
209    
210 artak 2562 rhon = Paso_InnerProduct(n,res,w,A->mpi_info);
211 artak 1703 beta = rhon / rho;
212     rho = rhon;
213    
214     Paso_LinearCombination(n,y1,1.,w,beta,y2);
215    
216     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
217 artak 1711
218 artak 1703 Performance_startMonitor(pp,PERFORMANCE_MVM);
219 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector);
220 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM);
221    
222     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
223 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,u1,temp_vector);
224 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
225 artak 1711
226 artak 1703 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
227    
228     Paso_Copy(n,v_old,v);
229    
230     Paso_Update(n,beta,v_old,1,u2);
231     Paso_LinearCombination(n,v,1.,u1,beta,v_old);
232     }
233     maxIterFlag = (num_iter > maxit);
234     norm_of_residual=tau*sqrt ( m + 1 );
235     convergeFlag=(norm_of_residual<(*tolerance));
236    
237    
238     if (maxIterFlag) {
239     status = SOLVER_MAXITER_REACHED;
240     } else if (breakFlag) {
241     status = SOLVER_BREAKDOWN;
242     }
243     ++(num_iter);
244     }
245     /* end of iteration */
246    
247     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
248     TMPMEMFREE(u1);
249     TMPMEMFREE(u2);
250     TMPMEMFREE(y1);
251     TMPMEMFREE(y2);
252     TMPMEMFREE(d);
253     TMPMEMFREE(w);
254     TMPMEMFREE(v);
255     TMPMEMFREE(v_old);
256 artak 2562 TMPMEMFREE(temp_vector);
257     TMPMEMFREE(res);
258 artak 1703 *iter=num_iter;
259     *tolerance=norm_of_residual;
260    
261     /* End of TFQMR */
262     return status;
263 ksteube 1708 }

  ViewVC Help
Powered by ViewVC 1.1.26