/[escript]/trunk/paso/src/MINRES.c
ViewVC logotype

Annotation of /trunk/paso/src/MINRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3120 - (hide annotations)
Mon Aug 30 10:48:00 2010 UTC (9 years, 7 months ago) by gross
File MIME type: text/plain
File size: 7243 byte(s)
first iteration on Paso code clean up
1 artak 1787
2     /*******************************************************
3 ksteube 1811 *
4 jfenwick 2881 * Copyright (c) 2003-2010 by University of Queensland
5 ksteube 1811 * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13 artak 1787
14 ksteube 1811
15 artak 1979 /* MINRES iterations */
16 artak 1787
17     #include "SystemMatrix.h"
18     #include "Paso.h"
19     #include "Solver.h"
20     #include "PasoUtil.h"
21    
22     #ifdef _OPENMP
23     #include <omp.h>
24     #endif
25    
26     #ifdef PASO_MPI
27     #include <mpi.h>
28     #endif
29    
30     /*
31     *
32     * Purpose
33     * =======
34     *
35     * MINRES solves the linear system A*x = b
36     *
37     * Convergence test: norm( b - A*x )< TOL.
38     * For other measures, see the above reference.
39     *
40     * Arguments
41     * =========
42     *
43     * r (input) DOUBLE PRECISION array, dimension N.
44     * On entry, residual of inital guess x
45     *
46     * x (input/output) DOUBLE PRECISION array, dimension N.
47     * On input, the initial guess.
48     *
49     * ITER (input/output) INT
50     * On input, the maximum iterations to be performed.
51     * On output, actual number of iterations performed.
52     *
53     * INFO (output) INT
54     *
55     * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
56     * = SOLVER_MAXITER_REACHED
57     * = SOLVER_INPUT_ERROR Illegal parameter:
58     * = SOLVER_MEMORY_ERROR :
59     * = SOLVER_NEGATIVE_NORM_ERROR :
60     *
61     * ==============================================================
62     */
63    
64     /* #define PASO_DYNAMIC_SCHEDULING_MVM */
65    
66     #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP
67     #define USE_DYNAMIC_SCHEDULING
68     #endif
69    
70     err_t Paso_Solver_MINRES(
71     Paso_SystemMatrix * A,
72     double * r,
73     double * x,
74     dim_t *iter,
75     double * tolerance,
76     Paso_Performance* pp) {
77    
78     /* Local variables */
79    
80     dim_t num_iter=0,maxit;
81     bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
82     err_t status = SOLVER_NO_ERROR;
83     dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
84     double *w=NULL, *w1=NULL, *w2=NULL, *r1=NULL, *r2=NULL, *y=NULL, *v=NULL;
85    
86 artak 2565 double Anorm,Arnorm,ynorm,oldb,dbar,epsln,phibar,rhs1,rhs2,rnorm,tnorm2,ynorm2,cs,sn,eps,s,alfa,denom,z,beta1,beta;
87     double gmax,gmin,oldeps,delta,gbar,gamma,phi,root,epsx;
88 artak 1787
89 artak 1979 double norm_of_residual=0;
90 artak 1787
91     /* */
92     /*-----------------------------------------------------------------*/
93     /* */
94     /* Start of Calculation : */
95     /* --------------------- */
96     /* */
97     /* */
98     w=TMPMEMALLOC(n,double);
99     w1=TMPMEMALLOC(n,double);
100     w2=TMPMEMALLOC(n,double);
101     r1=TMPMEMALLOC(n,double);
102     r2=TMPMEMALLOC(n,double);
103     y=TMPMEMALLOC(n,double);
104     v=TMPMEMALLOC(n,double);
105    
106    
107     if (w ==NULL || w1== NULL || w2== NULL || r1 == NULL || r2== NULL || y==NULL || v==NULL ) {
108     status=SOLVER_MEMORY_ERROR;
109     }
110    
111     maxit = *iter;
112    
113     /* Test the input parameters. */
114     if (n < 0 || maxit<=0 ) {
115     status=SOLVER_INPUT_ERROR;
116     }
117    
118     Paso_Copy(n,r1,r);
119    
120     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
121 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,y,r1);
122 artak 1787 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
123    
124     beta1=Paso_InnerProduct(n,r1,y,A->mpi_info);
125     if (beta1<0) {
126     status=SOLVER_NEGATIVE_NORM_ERROR;
127     }
128    
129     if (beta1>0) {
130     beta1=sqrt(beta1);
131     }
132    
133     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
134    
135     Paso_zeroes(n,w);
136     Paso_zeroes(n,w2);
137    
138 artak 2826 Paso_zeroes(n,x);
139    
140 artak 1787 Paso_Copy(n,r2,r1);
141    
142     Anorm = 0;
143     ynorm = 0;
144     oldb = 0;
145     beta = beta1;
146     dbar = 0;
147     epsln = 0;
148     phibar = beta1;
149     rhs1 = beta1;
150     rhs2 = 0;
151     rnorm = phibar;
152     tnorm2 = 0;
153     ynorm2 = 0;
154     cs = -1;
155     sn = 0;
156 artak 2565 eps = 0.000001;
157 artak 1787
158 artak 2826 while (!(convergeFlag || (status !=SOLVER_NO_ERROR) ))
159 artak 1787 {
160    
161     s=1/beta;
162     Paso_Update(n, 0., v, s, y);
163    
164     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
165     Performance_startMonitor(pp,PERFORMANCE_MVM);
166 jfenwick 1974 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, v,PASO_ZERO,y);
167 artak 1787 Performance_stopMonitor(pp,PERFORMANCE_MVM);
168     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
169    
170     if (num_iter >= 1) {
171     Paso_Update(n, 1., y, -(beta/oldb), r1);
172     }
173    
174     alfa = Paso_InnerProduct(n,v,y,A->mpi_info);
175 artak 1860 Paso_Update(n, 1., y, (-alfa/beta), r2);
176 artak 1787 Paso_Copy(n,r1,r2);
177     Paso_Copy(n,r2,y);
178    
179     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
180     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
181 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,y,r2);
182 artak 1787 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
183     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
184    
185     oldb = beta;
186     beta = Paso_InnerProduct(n,y,r2,A->mpi_info);
187     if (beta<0) {
188     status=SOLVER_NEGATIVE_NORM_ERROR;
189     }
190    
191     beta = sqrt( beta );
192     tnorm2 = tnorm2 + alfa*alfa + oldb*oldb + beta*beta;
193    
194     if (num_iter==0) {
195     gmax = ABS(alfa);
196     gmin = gmax;
197     }
198    
199     /* Apply previous rotation Qk-1 to get
200     [deltak epslnk+1] = [cs sn][dbark 0 ]
201     [gbar k dbar k+1] [sn -cs][alfak betak+1]. */
202    
203     oldeps = epsln;
204     delta = cs * dbar + sn * alfa ;
205     gbar = sn * dbar - cs * alfa ;
206     epsln = sn * beta ;
207     dbar = - cs * beta;
208    
209 artak 2565 root = sqrt(gbar*gbar+dbar*dbar) ;
210     Arnorm = phibar*root;
211    
212 artak 1787 gamma = sqrt(gbar*gbar+beta*beta) ;
213     gamma = MAX(gamma,eps) ;
214     cs = gbar / gamma ;
215     sn = beta / gamma ;
216     phi = cs * phibar ;
217     phibar = sn * phibar ;
218    
219     /* Update x. */
220    
221     denom = 1/gamma ;
222     Paso_Copy(n,w1,w2);
223     Paso_Copy(n,w2,w);
224    
225     Paso_LinearCombination(n,w,denom,v,-(denom*oldeps),w1);
226     Paso_Update(n, 1., w, -(delta*denom), w2) ;
227     Paso_Update(n, 1., x, phi, w) ;
228    
229     /* Go round again. */
230    
231     gmax = MAX(gmax,gamma);
232     gmin = MIN(gmin,gamma);
233     z = rhs1 / gamma;
234     ynorm2 = z*z + ynorm2;
235     rhs1 = rhs2 - delta*z;
236     rhs2 = - epsln*z;
237    
238     Anorm = sqrt( tnorm2 ) ;
239     ynorm = sqrt( ynorm2 ) ;
240    
241     rnorm = phibar;
242 artak 2565 epsx = Anorm*ynorm*eps;
243 artak 2826
244    
245     if (status==SOLVER_NO_ERROR) {
246     maxIterFlag = (num_iter > maxit);
247     norm_of_residual=rnorm;
248     convergeFlag=((norm_of_residual/(Anorm*ynorm))<(*tolerance) || 1+(norm_of_residual/(Anorm*ynorm)) <=1);
249     if (maxIterFlag) {
250     status = SOLVER_MAXITER_REACHED;
251     } else if (breakFlag) {
252     status = SOLVER_BREAKDOWN;
253     }
254 artak 1787 }
255     ++(num_iter);
256     }
257     /* end of iteration */
258    
259     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
260     TMPMEMFREE(w);
261     TMPMEMFREE(w1);
262     TMPMEMFREE(w2);
263     TMPMEMFREE(r1);
264     TMPMEMFREE(r2);
265     TMPMEMFREE(y);
266     TMPMEMFREE(v);
267    
268     *iter=num_iter;
269 artak 1862 *tolerance=norm_of_residual;
270 artak 1787
271     /* End of MINRES */
272     return status;
273     }

  ViewVC Help
Powered by ViewVC 1.1.26