/[escript]/trunk/paso/src/TFQMR.c
ViewVC logotype

Annotation of /trunk/paso/src/TFQMR.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1708 - (hide annotations)
Thu Aug 14 22:42:24 2008 UTC (11 years, 4 months ago) by ksteube
File MIME type: text/plain
File size: 7493 byte(s)
Added newline at end of file TFQMR.c because it is an error for icc.
Turned useumfpack off by default.

1 artak 1703
2     /*******************************************************
3     *
4     * Copyright 2003-2007 by ACceSS MNRF
5     * Copyright 2007 by University of Queensland
6     *
7     * http://esscc.uq.edu.au
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     /* TFQMR iterations */
15    
16     #include "SystemMatrix.h"
17     #include "Paso.h"
18     #include "Solver.h"
19     #include "PasoUtil.h"
20    
21     #ifdef _OPENMP
22     #include <omp.h>
23     #endif
24    
25     #ifdef PASO_MPI
26     #include <mpi.h>
27     #endif
28    
29     /*
30     *
31     * Purpose
32     * =======
33     *
34     * TFQMR solves the linear system A*x = b
35     *
36     * Convergence test: norm( b - A*x )< TOL.
37     * For other measures, see the above reference.
38     *
39     * Arguments
40     * =========
41     *
42     * r (input) DOUBLE PRECISION array, dimension N.
43     * On entry, residual of inital guess x
44     *
45     * x (input/output) DOUBLE PRECISION array, dimension N.
46     * On input, the initial guess.
47     *
48     * ITER (input/output) INT
49     * On input, the maximum iterations to be performed.
50     * On output, actual number of iterations performed.
51     *
52     * INFO (output) INT
53     *
54     * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
55     * = SOLVEr_MAXITER_REACHED
56     * = SOLVER_INPUT_ERROR Illegal parameter:
57     * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller
58     * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller
59     *
60     * ==============================================================
61     */
62    
63     /* #define PASO_DYNAMIC_SCHEDULING_MVM */
64    
65     #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP
66     #define USE_DYNAMIC_SCHEDULING
67     #endif
68    
69     err_t Paso_Solver_TFQMR(
70     Paso_SystemMatrix * A,
71     double * r,
72     double * x,
73     dim_t *iter,
74     double * tolerance,
75     Paso_Performance* pp) {
76    
77     /* Local variables */
78 artak 1706
79     int m=1;
80     int j=0;
81    
82 artak 1703 dim_t num_iter=0,maxit;
83     bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
84     err_t status = SOLVER_NO_ERROR;
85     dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
86     double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *v_old=NULL, *tmp=NULL ;
87    
88     double eta,theta,tau,rho,beta,alpha,sigma,rhon,c;
89    
90     double norm_of_residual;
91 artak 1706
92 artak 1703 /* */
93     /*-----------------------------------------------------------------*/
94     /* */
95     /* Start of Calculation : */
96     /* --------------------- */
97     /* */
98     /* */
99     u1=TMPMEMALLOC(n,double);
100     u2=TMPMEMALLOC(n,double);
101     y1=TMPMEMALLOC(n,double);
102     y2=TMPMEMALLOC(n,double);
103     d=TMPMEMALLOC(n,double);
104     w=TMPMEMALLOC(n,double);
105     v=TMPMEMALLOC(n,double);
106     v_old=TMPMEMALLOC(n,double);
107    
108    
109     tmp=TMPMEMALLOC(n,double);
110    
111    
112     if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL || v_old==NULL) {
113     status=SOLVER_MEMORY_ERROR;
114     }
115    
116     maxit = *iter;
117    
118     /* Test the input parameters. */
119     if (n < 0 || maxit<=0 ) {
120     status=SOLVER_INPUT_ERROR;
121     }
122    
123     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
124     Paso_Solver_solvePreconditioner(A,r,r);
125     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
126    
127     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
128    
129     Paso_zeroes(n,u2);
130     Paso_zeroes(n,y2);
131    
132     Paso_Copy(n,w,r);
133     Paso_Copy(n,y1,r);
134    
135     Paso_zeroes(n,d);
136    
137     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
138     Performance_startMonitor(pp,PERFORMANCE_MVM);
139     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y1,ZERO,v);
140     Performance_stopMonitor(pp,PERFORMANCE_MVM);
141     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
142    
143     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
144     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
145     Paso_Solver_solvePreconditioner(A,v,v);
146     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
147     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
148    
149     Paso_Copy(n,u1,v);
150    
151     theta = 0.0;
152     eta = 0.0;
153    
154     tau = Paso_l2(n,r,A->mpi_info);
155    
156     rho = tau * tau;
157 artak 1706
158 artak 1703 norm_of_residual=tau*sqrt ( m + 1 );
159    
160     while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) ))
161     {
162    
163    
164     sigma=Paso_InnerProduct(n,r,v,A->mpi_info);
165    
166     if (! (breakFlag = (ABS(sigma) == 0.))) {
167    
168     alpha = rho / sigma;
169    
170     for (j=0; j<=1; j=j+1)
171     {
172     /* Compute y2 and u2 only if you have to */
173     if ( j == 1 ){
174     Paso_LinearCombination(n,y2,1.,y1,-alpha,v);
175    
176     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
177     Performance_startMonitor(pp,PERFORMANCE_MVM);
178     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y2,ZERO,u2);
179     Performance_stopMonitor(pp,PERFORMANCE_MVM);
180     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
181    
182     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
183     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
184     Paso_Solver_solvePreconditioner(A,u2,u2);
185     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
186     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
187     }
188     m = 2 * (num_iter+1) - 2 + (j+1);
189     if (j==0) {
190     Paso_Update(n,1.,w,-alpha,u1);
191     Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1);
192     }
193     if (j==1) {
194     Paso_Update(n,1.,w,-alpha,u2);
195     Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2);
196     }
197    
198     theta =Paso_l2(n,w,A->mpi_info)/tau;
199     c = 1.0 / sqrt ( 1.0 + theta * theta );
200     tau = tau * theta * c;
201     eta = c * c * alpha;
202     Paso_Update(n,1.,x,eta,d);
203     }
204    
205     breakFlag = (ABS(rho) == 0);
206    
207     rhon = Paso_InnerProduct(n,r,w,A->mpi_info);
208     beta = rhon / rho;
209     rho = rhon;
210    
211     Paso_LinearCombination(n,y1,1.,w,beta,y2);
212    
213     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
214     Performance_startMonitor(pp,PERFORMANCE_MVM);
215     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y1,ZERO,u1);
216     Performance_stopMonitor(pp,PERFORMANCE_MVM);
217     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
218    
219     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
220     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
221     Paso_Solver_solvePreconditioner(A,u1,u1);
222     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
223     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
224    
225     Paso_Copy(n,v_old,v);
226    
227     Paso_Update(n,beta,v_old,1,u2);
228     Paso_LinearCombination(n,v,1.,u1,beta,v_old);
229     }
230     maxIterFlag = (num_iter > maxit);
231     norm_of_residual=tau*sqrt ( m + 1 );
232     convergeFlag=(norm_of_residual<(*tolerance));
233    
234    
235     if (maxIterFlag) {
236     status = SOLVER_MAXITER_REACHED;
237     } else if (breakFlag) {
238     status = SOLVER_BREAKDOWN;
239     }
240     ++(num_iter);
241     }
242     /* end of iteration */
243    
244     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
245     TMPMEMFREE(u1);
246     TMPMEMFREE(u2);
247     TMPMEMFREE(y1);
248     TMPMEMFREE(y2);
249     TMPMEMFREE(d);
250     TMPMEMFREE(w);
251     TMPMEMFREE(v);
252     TMPMEMFREE(v_old);
253    
254     *iter=num_iter;
255     *tolerance=norm_of_residual;
256    
257     /* End of TFQMR */
258     return status;
259 ksteube 1708 }

  ViewVC Help
Powered by ViewVC 1.1.26