/[escript]/branches/diaplayground/cusplibrary/cusp/krylov/lsqr.h
ViewVC logotype

Annotation of /branches/diaplayground/cusplibrary/cusp/krylov/lsqr.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5130 - (hide annotations)
Tue Sep 2 06:09:54 2014 UTC (6 years ago) by caltinay
File MIME type: text/plain
File size: 11251 byte(s)
Implemented transposed SpMV for DIA and CDS on CPU and GPU.
Added LSQR solver, not optimized yet but works in general.

1 caltinay 5130 /*
2     * Copyright 2011 The Regents of the University of California
3     *
4     * Licensed under the Apache License, Version 2.0 (the "License");
5     * you may not use this file except in compliance with the License.
6     * You may obtain a copy of the License at
7     *
8     * http://www.apache.org/licenses/LICENSE-2.0
9     *
10     * Unless required by applicable law or agreed to in writing, software
11     * distributed under the License is distributed on an "AS IS" BASIS,
12     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     * See the License for the specific language governing permissions and
14     * limitations under the License.
15     */
16    
17     #pragma once
18    
19     #include <cusp/detail/config.h>
20    
21     namespace cusp
22     {
23     namespace krylov
24     {
25    
26     /*! \addtogroup iterative_solvers Iterative Solvers
27     * \addtogroup krylov_methods Krylov Methods
28     * \ingroup iterative_solvers
29     * \{
30     */
31     /*!
32     This class is used to specify more detailed parameters to the LSQR solver.
33    
34     The following quantities are used in discussing the structure:
35     \f[ \mathrm{Abar} =
36     \left[ {\begin{array}{cc}
37     A \\
38     \mathrm{damp} \cdot I\\
39     \end{array} } \right]
40     \f]
41    
42     relpr = the relative precision of floating-point arithmetic
43     on the machine being used. On most machines,
44     relpr is about 1.0e-7 and 1.0d-16 in single and double
45     precision respectively.
46     */
47     template <typename Real>
48     struct lsqr_parameters
49     {
50     /*!
51     \param damp The damping parameter for problem 3 above.
52     (damp should be 0.0 for problems 1 and 2.)
53     If the system A*x = b is incompatible, values
54     of damp in the range 0 to sqrt(relpr)*norm(A)
55     will probably have a negligible effect.
56     Larger values of damp will tend to decrease
57     the norm of x and reduce the number of
58     iterations required by LSQR.
59     The work per iteration and the storage needed
60     by LSQR are the same for all values of damp.
61     \param atol An estimate of the relative error in the data
62     defining the matrix A. For example,
63     if A is accurate to about 6 digits, set
64     atol = 1.0e-6. If 0 machine precision will be used.
65     \param btol An estimate of the relative error in the data
66     defining the rhs vector b. For example,
67     if b is accurate to about 6 digits, set
68     btol = 1.0e-6. If 0 machine precision will be used.
69     If both atol and btol are 0 the relative tolerance from the monitor
70     parameter to lsqr will be used instead.
71     \param conlim An upper limit on cond(Abar), the apparent
72     condition number of the matrix Abar.
73     Iterations will be terminated if a computed
74     estimate of cond(Abar) exceeds conlim.
75     This is intended to prevent certain small or
76     zero singular values of A or Abar from
77     coming into effect and causing unwanted growth
78     in the computed solution.
79     conlim and damp may be used separately or
80     together to regularize ill-conditioned systems.
81     Normally, conlim should be in the range
82     1000 to 1/relpr.
83     Suggested value:
84     conlim = 1/(100*relpr) for compatible systems,
85     conlim = 1/(10*sqrt(relpr)) for least squares.
86     If set to 0 then 1/(machine precision) will be used.
87     */
88     lsqr_parameters(Real _damp = 0, Real _atol = 0, Real _btol = 0, Real _conlim = 0):
89     damp(_damp),
90     atol(_atol),
91     btol(_btol),
92     conlim(_conlim)
93     {
94     }
95     Real damp;
96     Real atol;
97     Real btol;
98     Real conlim;
99     };
100    
101     /*! This class holds several results from of an LSQR run
102    
103     The following quantities are used in discussing the structure:
104     \f[ \mathrm{Abar} =
105     \left[ {\begin{array}{cc}
106     A \\
107     \mathrm{damp} \cdot I \\
108     \end{array} } \right]
109     \hspace{1cm}
110     \mathrm{rbar} = \left[ {\begin{array}{cc}
111     b \\
112     0 \\
113     \end{array} } \right] - \mathrm{Abar} \cdot x
114     \f]
115    
116     relpr = the relative precision of floating-point arithmetic
117     on the machine being used. On most machines,
118     relpr is about 1.0e-7 and 1.0d-16 in single and double
119     precision respectively.
120     */
121     template <typename Real>
122     struct lsqr_results
123     {
124     /*! This enumeration describes the reason why the lsqr solver stopped.
125     */
126     enum StopCondition{
127     /*! x = 0 is the exact solution.
128     No iterations were performed. */
129     TrivialSolution=0,
130     /*! The equations A*x = b are probably
131     compatible. Norm(A*x - b) is sufficiently
132     small, given the values of atol and btol. */
133     ExactSolution,
134     /*! damp is zero. The system A*x = b is probably
135     not compatible. A least-squares solution has
136     been obtained that is sufficiently accurate,
137     given the value of atol. */
138     LeastSquaresSolution,
139     /*! damp is nonzero. A damped
140     least-squares solution has been
141     obtained that is sufficiently
142     accurate, given the value of atol.*/
143     DampedLeastSquaresSolution,
144     /*! An estimate of cond(Abar) has exceeded
145     the condition number limit. The system A*x = b appears to
146     be ill-conditioned. */
147     AbarConditionNumberExceeded,
148     /*! The iteration limit was reached */
149     IterationLimitReached
150     };
151     lsqr_results(int _istop, Real _anorm, Real _acond, Real _rnorm,
152     Real _arnorm, Real _xnorm):
153     anorm(_anorm),
154     acond(_acond),
155     rnorm(_rnorm),
156     arnorm(_arnorm),
157     xnorm(_xnorm)
158     {
159     istop = StopCondition(_istop);
160     }
161     /*!
162     The reason why the lsqr solver stopped.
163     \sa StopCondition
164     */
165     StopCondition istop;
166     /*! An estimate of the Frobenius norm of Abar.
167     This is the square-root of the sum of squares
168     of the elements of Abar.
169     If damp is small and if the columns of A
170     have all been scaled to have length 1.0,
171     anorm should increase to roughly sqrt(n).
172     A radically different value for anorm may
173     indicate an error in subroutine aprod (there
174     may be an inconsistency between modes 1 and 2).
175     */
176     Real anorm;
177     /*! An estimate of cond(Abar), the condition
178     number of Abar. A very high value of acond
179     may again indicate a problem in the spmv.
180     */
181     Real acond;
182     /*! An estimate of the final value of
183     norm( Abar(transpose)*rbar ), the norm of
184     the residual for the usual normal equations.
185     This should be small in all cases. (arnorm
186     will often be smaller than the true value
187     computed from the output vector x.)
188     */
189     Real rnorm;
190     /*! An estimate of the final value of
191     norm( Abar(transpose)*rbar ), the norm of
192     the residual for the usual normal equations.
193     This should be small in all cases. (arnorm
194     will often be smaller than the true value
195     computed from the output vector x.)
196     */
197     Real arnorm;
198     /*! An estimate of the norm of the final
199     solution vector x.
200     */
201     Real xnorm;
202     };
203    
204    
205     /*! \p lsqr : LSQR solver
206     *
207     * Solves:
208     * Ax = b
209     * in the case of square A
210     * or
211     * minimize ||Ax - b||^2 (least squares)
212     * for rectangular A when d == 0
213     * or
214     * minimize ||Ax - b||^2 + d^2 ||x||^2 (dampened least squares)
215     * for rectangular A when d > 0
216     *
217     * using the default convergence criteria.
218     */
219     template <class LinearOperator, class Vector, typename RealType>
220     lsqr_results<RealType> lsqr(LinearOperator& A,
221     Vector& x,
222     Vector& b,
223     const lsqr_parameters<RealType> & p);
224    
225     /*! \p lsqr : LSQR solver
226     *
227     * Solves:
228     * Ax = b
229     * for square A when d == 0
230     * or
231     * minimize ||Ax - b||^2 (least squares)
232     * for rectangular A when d == 0
233     * or
234     * minimize ||Ax - b||^2 + d^2 ||x||^2 (dampened least squares)
235     * when d > 0
236     *
237     * \param A matrix of the linear system
238     * \param At transpose of the matrix of the linear system
239     * \param x approximate solution of the linear system
240     * \param b right-hand side of the linear system
241     * \param d the damping factor
242     * \param monitor montiors iteration and determines stopping conditions
243     *
244     * \tparam LinearOperator is a matrix or subclass of \p linear_operator
245     * \tparam Vector vector
246     * \tparam Monitor is a monitor such as \p default_monitor or \p verbose_monitor
247     *
248     * The following code snippet demonstrates how to use \p lsqr to
249     * solve a 10x10 Poisson problem.
250     *
251     * \code
252     * #include <cusp/csr_matrix.h>
253     * #include <cusp/monitor.h>
254     * #include <cusp/krylov/lsqr.h>
255     * #include <cusp/gallery/poisson.h>
256     *
257     * int main(void)
258     * {
259     * // create an empty sparse matrix structure (CSR format)
260     * cusp::csr_matrix<int, float, cusp::device_memory> A;
261     *
262     * // initialize matrix
263     * cusp::gallery::poisson5pt(A, 10, 10);
264     *
265     * // allocate storage for solution (x) and right hand side (b)
266     * cusp::array1d<float, cusp::device_memory> x(A.num_rows, 0);
267     * cusp::array1d<float, cusp::device_memory> b(A.num_rows, 1);
268     *
269     * // set stopping criteria:
270     * // iteration_limit = 100
271     * // relative_tolerance = 1e-6
272     * cusp::verbose_monitor<float> monitor(b, 100, 1e-6);
273     *
274     * // solve the linear system A x = b
275     * // because A is hermitian we can use
276     * // it for its own conjugate transpose
277     * cusp::krylov::lsqr(A, A, x, b, cusp::krylov::lsqr_paramters<float>(), monitor);
278     *
279     * return 0;
280     * }
281     * \endcode
282    
283     * \see \p default_monitor
284     * \see \p verbose_monitor
285     *
286     */
287     template <class LinearOperator,
288     class Vector,
289     typename RealType,
290     class Monitor>
291     lsqr_results<RealType> lsqr(LinearOperator& A,
292     Vector& x,
293     Vector& b,
294     const lsqr_parameters<RealType> & p,
295     Monitor& monitor);
296    
297     /*! \}
298     */
299    
300     } // end namespace krylov
301     } // end namespace cusp
302    
303     #include <cusp/krylov/detail/lsqr.inl>
304    

  ViewVC Help
Powered by ViewVC 1.1.26