/[escript]/branches/diaplayground/cusplibrary/cusp/krylov/lsqr.h
ViewVC logotype

Contents of /branches/diaplayground/cusplibrary/cusp/krylov/lsqr.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5130 - (show annotations)
Tue Sep 2 06:09:54 2014 UTC (5 years, 11 months ago) by caltinay
File MIME type: text/plain
File size: 11251 byte(s)
Implemented transposed SpMV for DIA and CDS on CPU and GPU.
Added LSQR solver, not optimized yet but works in general.

1 /*
2 * Copyright 2011 The Regents of the University of California
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #pragma once
18
19 #include <cusp/detail/config.h>
20
21 namespace cusp
22 {
23 namespace krylov
24 {
25
26 /*! \addtogroup iterative_solvers Iterative Solvers
27 * \addtogroup krylov_methods Krylov Methods
28 * \ingroup iterative_solvers
29 * \{
30 */
31 /*!
32 This class is used to specify more detailed parameters to the LSQR solver.
33
34 The following quantities are used in discussing the structure:
35 \f[ \mathrm{Abar} =
36 \left[ {\begin{array}{cc}
37 A \\
38 \mathrm{damp} \cdot I\\
39 \end{array} } \right]
40 \f]
41
42 relpr = the relative precision of floating-point arithmetic
43 on the machine being used. On most machines,
44 relpr is about 1.0e-7 and 1.0d-16 in single and double
45 precision respectively.
46 */
47 template <typename Real>
48 struct lsqr_parameters
49 {
50 /*!
51 \param damp The damping parameter for problem 3 above.
52 (damp should be 0.0 for problems 1 and 2.)
53 If the system A*x = b is incompatible, values
54 of damp in the range 0 to sqrt(relpr)*norm(A)
55 will probably have a negligible effect.
56 Larger values of damp will tend to decrease
57 the norm of x and reduce the number of
58 iterations required by LSQR.
59 The work per iteration and the storage needed
60 by LSQR are the same for all values of damp.
61 \param atol An estimate of the relative error in the data
62 defining the matrix A. For example,
63 if A is accurate to about 6 digits, set
64 atol = 1.0e-6. If 0 machine precision will be used.
65 \param btol An estimate of the relative error in the data
66 defining the rhs vector b. For example,
67 if b is accurate to about 6 digits, set
68 btol = 1.0e-6. If 0 machine precision will be used.
69 If both atol and btol are 0 the relative tolerance from the monitor
70 parameter to lsqr will be used instead.
71 \param conlim An upper limit on cond(Abar), the apparent
72 condition number of the matrix Abar.
73 Iterations will be terminated if a computed
74 estimate of cond(Abar) exceeds conlim.
75 This is intended to prevent certain small or
76 zero singular values of A or Abar from
77 coming into effect and causing unwanted growth
78 in the computed solution.
79 conlim and damp may be used separately or
80 together to regularize ill-conditioned systems.
81 Normally, conlim should be in the range
82 1000 to 1/relpr.
83 Suggested value:
84 conlim = 1/(100*relpr) for compatible systems,
85 conlim = 1/(10*sqrt(relpr)) for least squares.
86 If set to 0 then 1/(machine precision) will be used.
87 */
88 lsqr_parameters(Real _damp = 0, Real _atol = 0, Real _btol = 0, Real _conlim = 0):
89 damp(_damp),
90 atol(_atol),
91 btol(_btol),
92 conlim(_conlim)
93 {
94 }
95 Real damp;
96 Real atol;
97 Real btol;
98 Real conlim;
99 };
100
101 /*! This class holds several results from of an LSQR run
102
103 The following quantities are used in discussing the structure:
104 \f[ \mathrm{Abar} =
105 \left[ {\begin{array}{cc}
106 A \\
107 \mathrm{damp} \cdot I \\
108 \end{array} } \right]
109 \hspace{1cm}
110 \mathrm{rbar} = \left[ {\begin{array}{cc}
111 b \\
112 0 \\
113 \end{array} } \right] - \mathrm{Abar} \cdot x
114 \f]
115
116 relpr = the relative precision of floating-point arithmetic
117 on the machine being used. On most machines,
118 relpr is about 1.0e-7 and 1.0d-16 in single and double
119 precision respectively.
120 */
121 template <typename Real>
122 struct lsqr_results
123 {
124 /*! This enumeration describes the reason why the lsqr solver stopped.
125 */
126 enum StopCondition{
127 /*! x = 0 is the exact solution.
128 No iterations were performed. */
129 TrivialSolution=0,
130 /*! The equations A*x = b are probably
131 compatible. Norm(A*x - b) is sufficiently
132 small, given the values of atol and btol. */
133 ExactSolution,
134 /*! damp is zero. The system A*x = b is probably
135 not compatible. A least-squares solution has
136 been obtained that is sufficiently accurate,
137 given the value of atol. */
138 LeastSquaresSolution,
139 /*! damp is nonzero. A damped
140 least-squares solution has been
141 obtained that is sufficiently
142 accurate, given the value of atol.*/
143 DampedLeastSquaresSolution,
144 /*! An estimate of cond(Abar) has exceeded
145 the condition number limit. The system A*x = b appears to
146 be ill-conditioned. */
147 AbarConditionNumberExceeded,
148 /*! The iteration limit was reached */
149 IterationLimitReached
150 };
151 lsqr_results(int _istop, Real _anorm, Real _acond, Real _rnorm,
152 Real _arnorm, Real _xnorm):
153 anorm(_anorm),
154 acond(_acond),
155 rnorm(_rnorm),
156 arnorm(_arnorm),
157 xnorm(_xnorm)
158 {
159 istop = StopCondition(_istop);
160 }
161 /*!
162 The reason why the lsqr solver stopped.
163 \sa StopCondition
164 */
165 StopCondition istop;
166 /*! An estimate of the Frobenius norm of Abar.
167 This is the square-root of the sum of squares
168 of the elements of Abar.
169 If damp is small and if the columns of A
170 have all been scaled to have length 1.0,
171 anorm should increase to roughly sqrt(n).
172 A radically different value for anorm may
173 indicate an error in subroutine aprod (there
174 may be an inconsistency between modes 1 and 2).
175 */
176 Real anorm;
177 /*! An estimate of cond(Abar), the condition
178 number of Abar. A very high value of acond
179 may again indicate a problem in the spmv.
180 */
181 Real acond;
182 /*! An estimate of the final value of
183 norm( Abar(transpose)*rbar ), the norm of
184 the residual for the usual normal equations.
185 This should be small in all cases. (arnorm
186 will often be smaller than the true value
187 computed from the output vector x.)
188 */
189 Real rnorm;
190 /*! An estimate of the final value of
191 norm( Abar(transpose)*rbar ), the norm of
192 the residual for the usual normal equations.
193 This should be small in all cases. (arnorm
194 will often be smaller than the true value
195 computed from the output vector x.)
196 */
197 Real arnorm;
198 /*! An estimate of the norm of the final
199 solution vector x.
200 */
201 Real xnorm;
202 };
203
204
205 /*! \p lsqr : LSQR solver
206 *
207 * Solves:
208 * Ax = b
209 * in the case of square A
210 * or
211 * minimize ||Ax - b||^2 (least squares)
212 * for rectangular A when d == 0
213 * or
214 * minimize ||Ax - b||^2 + d^2 ||x||^2 (dampened least squares)
215 * for rectangular A when d > 0
216 *
217 * using the default convergence criteria.
218 */
219 template <class LinearOperator, class Vector, typename RealType>
220 lsqr_results<RealType> lsqr(LinearOperator& A,
221 Vector& x,
222 Vector& b,
223 const lsqr_parameters<RealType> & p);
224
225 /*! \p lsqr : LSQR solver
226 *
227 * Solves:
228 * Ax = b
229 * for square A when d == 0
230 * or
231 * minimize ||Ax - b||^2 (least squares)
232 * for rectangular A when d == 0
233 * or
234 * minimize ||Ax - b||^2 + d^2 ||x||^2 (dampened least squares)
235 * when d > 0
236 *
237 * \param A matrix of the linear system
238 * \param At transpose of the matrix of the linear system
239 * \param x approximate solution of the linear system
240 * \param b right-hand side of the linear system
241 * \param d the damping factor
242 * \param monitor montiors iteration and determines stopping conditions
243 *
244 * \tparam LinearOperator is a matrix or subclass of \p linear_operator
245 * \tparam Vector vector
246 * \tparam Monitor is a monitor such as \p default_monitor or \p verbose_monitor
247 *
248 * The following code snippet demonstrates how to use \p lsqr to
249 * solve a 10x10 Poisson problem.
250 *
251 * \code
252 * #include <cusp/csr_matrix.h>
253 * #include <cusp/monitor.h>
254 * #include <cusp/krylov/lsqr.h>
255 * #include <cusp/gallery/poisson.h>
256 *
257 * int main(void)
258 * {
259 * // create an empty sparse matrix structure (CSR format)
260 * cusp::csr_matrix<int, float, cusp::device_memory> A;
261 *
262 * // initialize matrix
263 * cusp::gallery::poisson5pt(A, 10, 10);
264 *
265 * // allocate storage for solution (x) and right hand side (b)
266 * cusp::array1d<float, cusp::device_memory> x(A.num_rows, 0);
267 * cusp::array1d<float, cusp::device_memory> b(A.num_rows, 1);
268 *
269 * // set stopping criteria:
270 * // iteration_limit = 100
271 * // relative_tolerance = 1e-6
272 * cusp::verbose_monitor<float> monitor(b, 100, 1e-6);
273 *
274 * // solve the linear system A x = b
275 * // because A is hermitian we can use
276 * // it for its own conjugate transpose
277 * cusp::krylov::lsqr(A, A, x, b, cusp::krylov::lsqr_paramters<float>(), monitor);
278 *
279 * return 0;
280 * }
281 * \endcode
282
283 * \see \p default_monitor
284 * \see \p verbose_monitor
285 *
286 */
287 template <class LinearOperator,
288 class Vector,
289 typename RealType,
290 class Monitor>
291 lsqr_results<RealType> lsqr(LinearOperator& A,
292 Vector& x,
293 Vector& b,
294 const lsqr_parameters<RealType> & p,
295 Monitor& monitor);
296
297 /*! \}
298 */
299
300 } // end namespace krylov
301 } // end namespace cusp
302
303 #include <cusp/krylov/detail/lsqr.inl>
304

  ViewVC Help
Powered by ViewVC 1.1.26