/[escript]/trunk/paso/src/TFQMR.c
ViewVC logotype

Annotation of /trunk/paso/src/TFQMR.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3259 - (hide annotations)
Mon Oct 11 01:48:14 2010 UTC (9 years, 4 months ago) by jfenwick
File MIME type: text/plain
File size: 7538 byte(s)
Merging dudley and scons updates from branches

1 artak 1703
2     /*******************************************************
3 ksteube 1811 *
4 jfenwick 2881 * Copyright (c) 2003-2010 by University of Queensland
5 ksteube 1811 * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13 artak 1703
14 ksteube 1811
15 artak 1703 /* TFQMR iterations */
16    
17     #include "SystemMatrix.h"
18     #include "Paso.h"
19     #include "Solver.h"
20     #include "PasoUtil.h"
21    
22     #ifdef _OPENMP
23     #include <omp.h>
24     #endif
25    
26 jfenwick 3259 #ifdef ESYS_MPI
27 artak 1703 #include <mpi.h>
28     #endif
29    
30     /*
31     *
32     * Purpose
33     * =======
34     *
35     * TFQMR solves the linear system A*x = b
36     *
37     * Convergence test: norm( b - A*x )< TOL.
38     * For other measures, see the above reference.
39     *
40     * Arguments
41     * =========
42     *
43     * r (input) DOUBLE PRECISION array, dimension N.
44 artak 2557 *
45 artak 1703 *
46     * x (input/output) DOUBLE PRECISION array, dimension N.
47 artak 2557 *
48 artak 1703 *
49     * ITER (input/output) INT
50     * On input, the maximum iterations to be performed.
51     * On output, actual number of iterations performed.
52     *
53     * INFO (output) INT
54     *
55     * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
56     * = SOLVEr_MAXITER_REACHED
57     * = SOLVER_INPUT_ERROR Illegal parameter:
58     * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller
59     * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller
60     *
61     * ==============================================================
62     */
63    
64     /* #define PASO_DYNAMIC_SCHEDULING_MVM */
65    
66     #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP
67     #define USE_DYNAMIC_SCHEDULING
68     #endif
69    
70     err_t Paso_Solver_TFQMR(
71     Paso_SystemMatrix * A,
72     double * r,
73     double * x,
74     dim_t *iter,
75     double * tolerance,
76     Paso_Performance* pp) {
77    
78     /* Local variables */
79 artak 1706
80     int m=1;
81     int j=0;
82    
83 artak 1703 dim_t num_iter=0,maxit;
84     bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
85     err_t status = SOLVER_NO_ERROR;
86     dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
87 artak 2562 double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *v_old=NULL, *temp_vector=NULL,*res=NULL;
88 artak 1703
89     double eta,theta,tau,rho,beta,alpha,sigma,rhon,c;
90    
91     double norm_of_residual;
92 artak 1706
93 artak 1703 /* */
94     /*-----------------------------------------------------------------*/
95     /* */
96     /* Start of Calculation : */
97     /* --------------------- */
98     /* */
99     /* */
100     u1=TMPMEMALLOC(n,double);
101     u2=TMPMEMALLOC(n,double);
102     y1=TMPMEMALLOC(n,double);
103     y2=TMPMEMALLOC(n,double);
104     d=TMPMEMALLOC(n,double);
105     w=TMPMEMALLOC(n,double);
106     v=TMPMEMALLOC(n,double);
107     v_old=TMPMEMALLOC(n,double);
108 artak 2562 temp_vector=TMPMEMALLOC(n,double);
109     res=TMPMEMALLOC(n,double);
110 artak 1703
111     if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL || v_old==NULL) {
112     status=SOLVER_MEMORY_ERROR;
113     }
114    
115     maxit = *iter;
116    
117     /* Test the input parameters. */
118     if (n < 0 || maxit<=0 ) {
119     status=SOLVER_INPUT_ERROR;
120     }
121    
122 artak 2826 Paso_zeroes(n,x);
123    
124 artak 1703 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
125 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,res,r);
126 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
127    
128     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
129    
130     Paso_zeroes(n,u2);
131     Paso_zeroes(n,y2);
132    
133 artak 2562 Paso_Copy(n,w,res);
134     Paso_Copy(n,y1,res);
135 artak 1703
136     Paso_zeroes(n,d);
137    
138     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
139     Performance_startMonitor(pp,PERFORMANCE_MVM);
140 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector);
141 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM);
142     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
143    
144     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
145     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
146 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,v,temp_vector);
147 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
148     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
149    
150     Paso_Copy(n,u1,v);
151    
152     theta = 0.0;
153     eta = 0.0;
154    
155 artak 2562 tau = Paso_l2(n,res,A->mpi_info);
156 artak 1703
157     rho = tau * tau;
158 artak 1706
159 artak 1703 norm_of_residual=tau*sqrt ( m + 1 );
160    
161     while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) ))
162     {
163    
164    
165 artak 2562 sigma=Paso_InnerProduct(n,res,v,A->mpi_info);
166 artak 1703
167     if (! (breakFlag = (ABS(sigma) == 0.))) {
168    
169     alpha = rho / sigma;
170    
171     for (j=0; j<=1; j=j+1)
172     {
173     /* Compute y2 and u2 only if you have to */
174     if ( j == 1 ){
175     Paso_LinearCombination(n,y2,1.,y1,-alpha,v);
176    
177     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
178     Performance_startMonitor(pp,PERFORMANCE_MVM);
179 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y2,PASO_ZERO,temp_vector);
180 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM);
181     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
182    
183     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
184     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
185 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,u2,temp_vector);
186 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
187     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
188     }
189     m = 2 * (num_iter+1) - 2 + (j+1);
190     if (j==0) {
191     Paso_Update(n,1.,w,-alpha,u1);
192     Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1);
193     }
194     if (j==1) {
195     Paso_Update(n,1.,w,-alpha,u2);
196     Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2);
197     }
198    
199     theta =Paso_l2(n,w,A->mpi_info)/tau;
200     c = 1.0 / sqrt ( 1.0 + theta * theta );
201     tau = tau * theta * c;
202     eta = c * c * alpha;
203     Paso_Update(n,1.,x,eta,d);
204     }
205    
206     breakFlag = (ABS(rho) == 0);
207    
208 artak 2562 rhon = Paso_InnerProduct(n,res,w,A->mpi_info);
209 artak 1703 beta = rhon / rho;
210     rho = rhon;
211    
212     Paso_LinearCombination(n,y1,1.,w,beta,y2);
213    
214     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
215 artak 1711
216 artak 1703 Performance_startMonitor(pp,PERFORMANCE_MVM);
217 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector);
218 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM);
219    
220     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
221 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,u1,temp_vector);
222 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
223 artak 1711
224 artak 1703 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
225    
226     Paso_Copy(n,v_old,v);
227    
228     Paso_Update(n,beta,v_old,1,u2);
229     Paso_LinearCombination(n,v,1.,u1,beta,v_old);
230     }
231     maxIterFlag = (num_iter > maxit);
232     norm_of_residual=tau*sqrt ( m + 1 );
233     convergeFlag=(norm_of_residual<(*tolerance));
234    
235    
236     if (maxIterFlag) {
237     status = SOLVER_MAXITER_REACHED;
238     } else if (breakFlag) {
239     status = SOLVER_BREAKDOWN;
240     }
241     ++(num_iter);
242     }
243     /* end of iteration */
244    
245     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
246     TMPMEMFREE(u1);
247     TMPMEMFREE(u2);
248     TMPMEMFREE(y1);
249     TMPMEMFREE(y2);
250     TMPMEMFREE(d);
251     TMPMEMFREE(w);
252     TMPMEMFREE(v);
253     TMPMEMFREE(v_old);
254 artak 2562 TMPMEMFREE(temp_vector);
255     TMPMEMFREE(res);
256 artak 1703 *iter=num_iter;
257     *tolerance=norm_of_residual;
258    
259     /* End of TFQMR */
260     return status;
261 ksteube 1708 }

  ViewVC Help
Powered by ViewVC 1.1.26