/[escript]/trunk/paso/src/TFQMR.c
ViewVC logotype

Annotation of /trunk/paso/src/TFQMR.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4154 - (hide annotations)
Tue Jan 22 09:30:23 2013 UTC (7 years, 2 months ago) by jfenwick
File MIME type: text/plain
File size: 8009 byte(s)
Round 1 of copyright fixes
1 artak 1703
2 jfenwick 3981 /*****************************************************************************
3 ksteube 1811 *
4 jfenwick 4154 * Copyright (c) 2003-2013 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 ksteube 1811 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12     * Development since 2012 by School of Earth Sciences
13     *
14     *****************************************************************************/
15 artak 1703
16 ksteube 1811
17 artak 1703 /* TFQMR iterations */
18    
19     #include "SystemMatrix.h"
20     #include "Paso.h"
21     #include "Solver.h"
22     #include "PasoUtil.h"
23    
24     #ifdef _OPENMP
25     #include <omp.h>
26     #endif
27    
28 jfenwick 3259 #ifdef ESYS_MPI
29 artak 1703 #include <mpi.h>
30     #endif
31    
32     /*
33     *
34     * Purpose
35     * =======
36     *
37     * TFQMR solves the linear system A*x = b
38     *
39     * Convergence test: norm( b - A*x )< TOL.
40     * For other measures, see the above reference.
41     *
42     * Arguments
43     * =========
44     *
45     * r (input) DOUBLE PRECISION array, dimension N.
46 artak 2557 *
47 artak 1703 *
48     * x (input/output) DOUBLE PRECISION array, dimension N.
49 artak 2557 *
50 artak 1703 *
51     * ITER (input/output) INT
52     * On input, the maximum iterations to be performed.
53     * On output, actual number of iterations performed.
54     *
55     * INFO (output) INT
56     *
57     * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
58 caltinay 3642 * = SOLVER_MAXITER_REACHED
59 artak 1703 * = SOLVER_INPUT_ERROR Illegal parameter:
60 caltinay 3642 * = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller
61     * = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller
62 artak 1703 *
63     * ==============================================================
64     */
65    
66     /* #define PASO_DYNAMIC_SCHEDULING_MVM */
67    
68     #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP
69     #define USE_DYNAMIC_SCHEDULING
70     #endif
71    
72     err_t Paso_Solver_TFQMR(
73     Paso_SystemMatrix * A,
74     double * r,
75     double * x,
76     dim_t *iter,
77     double * tolerance,
78     Paso_Performance* pp) {
79    
80     /* Local variables */
81 artak 1706
82     int m=1;
83     int j=0;
84    
85 artak 1703 dim_t num_iter=0,maxit;
86     bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
87     err_t status = SOLVER_NO_ERROR;
88     dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
89 gross 4093 double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *temp_vector=NULL,*res=NULL;
90 artak 1703
91     double eta,theta,tau,rho,beta,alpha,sigma,rhon,c;
92    
93     double norm_of_residual;
94 artak 1706
95 artak 1703 /* */
96     /*-----------------------------------------------------------------*/
97     /* */
98     /* Start of Calculation : */
99     /* --------------------- */
100     /* */
101     /* */
102     u1=TMPMEMALLOC(n,double);
103     u2=TMPMEMALLOC(n,double);
104     y1=TMPMEMALLOC(n,double);
105     y2=TMPMEMALLOC(n,double);
106     d=TMPMEMALLOC(n,double);
107     w=TMPMEMALLOC(n,double);
108     v=TMPMEMALLOC(n,double);
109 artak 2562 temp_vector=TMPMEMALLOC(n,double);
110     res=TMPMEMALLOC(n,double);
111 artak 1703
112 gross 4093 if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL ) {
113 artak 1703 status=SOLVER_MEMORY_ERROR;
114     }
115    
116     maxit = *iter;
117    
118     /* Test the input parameters. */
119     if (n < 0 || maxit<=0 ) {
120     status=SOLVER_INPUT_ERROR;
121     }
122    
123 artak 2826 Paso_zeroes(n,x);
124    
125 artak 1703 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
126 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,res,r);
127 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
128    
129     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
130    
131     Paso_zeroes(n,u2);
132     Paso_zeroes(n,y2);
133    
134 artak 2562 Paso_Copy(n,w,res);
135     Paso_Copy(n,y1,res);
136 artak 1703
137     Paso_zeroes(n,d);
138    
139     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
140     Performance_startMonitor(pp,PERFORMANCE_MVM);
141 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector);
142 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM);
143     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
144    
145     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
146     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
147 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,v,temp_vector);
148 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
149     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
150 gross 4093 /* v = P^{-1} * A y1 */
151 artak 1703
152     Paso_Copy(n,u1,v);
153    
154     theta = 0.0;
155     eta = 0.0;
156    
157 artak 2562 tau = Paso_l2(n,res,A->mpi_info);
158 artak 1703
159     rho = tau * tau;
160 artak 1706
161 gross 4093 norm_of_residual=tau;
162 artak 1703
163     while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) ))
164     {
165    
166    
167 artak 2562 sigma=Paso_InnerProduct(n,res,v,A->mpi_info);
168 artak 1703
169     if (! (breakFlag = (ABS(sigma) == 0.))) {
170    
171     alpha = rho / sigma;
172    
173     for (j=0; j<=1; j=j+1)
174     {
175     /* Compute y2 and u2 only if you have to */
176     if ( j == 1 ){
177 gross 4093 Paso_LinearCombination(n,y2,PASO_ONE,y1,-alpha,v); /* y2 = y1 - alpha*v */
178 artak 1703
179     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
180     Performance_startMonitor(pp,PERFORMANCE_MVM);
181 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y2,PASO_ZERO,temp_vector);
182 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM);
183     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
184    
185     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
186     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
187 gross 4093 Paso_SystemMatrix_solvePreconditioner(A,u2,temp_vector);
188 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
189     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
190 gross 4093 /* u2 = P^{-1} * A y2 */
191 artak 1703 }
192     m = 2 * (num_iter+1) - 2 + (j+1);
193 gross 4093
194 artak 1703 if (j==0) {
195 gross 4093 Paso_Update(n,1.,w,-alpha,u1); /* w = w - alpha * u1 */
196     Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1); /* d = ( theta * theta * eta / alpha )*d + y1 */
197 artak 1703 }
198     if (j==1) {
199 gross 4093 Paso_Update(n,1.,w,-alpha,u2); /* w = w - -alpha * u2 */
200     Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2); /* d = ( theta * theta * eta / alpha )*d + y2 */
201 artak 1703 }
202    
203     theta =Paso_l2(n,w,A->mpi_info)/tau;
204 caltinay 4095 /*printf("tau = %e, %e %e\n",tau, Paso_l2(n,w,A->mpi_info)/tau, theta);*/
205 gross 4093 c = PASO_ONE / sqrt ( PASO_ONE + theta * theta );
206 artak 1703 tau = tau * theta * c;
207     eta = c * c * alpha;
208     Paso_Update(n,1.,x,eta,d);
209     }
210    
211     breakFlag = (ABS(rho) == 0);
212    
213 artak 2562 rhon = Paso_InnerProduct(n,res,w,A->mpi_info);
214 artak 1703 beta = rhon / rho;
215     rho = rhon;
216    
217 gross 4093 Paso_LinearCombination(n,y1, PASO_ONE,w,beta,y2); /* y1 = w + beta * y2 */
218 artak 1703
219     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
220     Performance_startMonitor(pp,PERFORMANCE_MVM);
221 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector);
222 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM);
223    
224     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
225 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,u1,temp_vector);
226 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
227     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
228 gross 4093 /* u1 = P^{-1} * A y1 */
229 artak 1703
230 gross 4093 Paso_LinearCombination(n,temp_vector,PASO_ONE,u2,beta,v); /* t = u2 + beta * v */
231     Paso_LinearCombination(n,v,PASO_ONE,u1,beta,temp_vector); /* v = u1 + beta * t */
232 artak 1703 }
233     maxIterFlag = (num_iter > maxit);
234 gross 4093 norm_of_residual=tau*sqrt ( (double) (m + 1 ) );
235 artak 1703 convergeFlag=(norm_of_residual<(*tolerance));
236    
237    
238     if (maxIterFlag) {
239     status = SOLVER_MAXITER_REACHED;
240     } else if (breakFlag) {
241     status = SOLVER_BREAKDOWN;
242     }
243     ++(num_iter);
244     }
245     /* end of iteration */
246    
247     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
248     TMPMEMFREE(u1);
249     TMPMEMFREE(u2);
250     TMPMEMFREE(y1);
251     TMPMEMFREE(y2);
252     TMPMEMFREE(d);
253     TMPMEMFREE(w);
254     TMPMEMFREE(v);
255 artak 2562 TMPMEMFREE(temp_vector);
256     TMPMEMFREE(res);
257 artak 1703 *iter=num_iter;
258     *tolerance=norm_of_residual;
259    
260     /* End of TFQMR */
261     return status;
262 ksteube 1708 }

  ViewVC Help
Powered by ViewVC 1.1.26