/[escript]/branches/diaplayground/cusplibrary/cusp/krylov/detail/lsqr.inl
ViewVC logotype

Contents of /branches/diaplayground/cusplibrary/cusp/krylov/detail/lsqr.inl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5130 - (show annotations)
Tue Sep 2 06:09:54 2014 UTC (6 years ago) by caltinay
File size: 11019 byte(s)
Implemented transposed SpMV for DIA and CDS on CPU and GPU.
Added LSQR solver, not optimized yet but works in general.

1 #include <fstream>
2 #include <sstream>
3 /*
4 * Copyright 2011 The Regents of the University of California
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18 #include <cusp/array1d.h>
19 #include <cusp/blas.h>
20 #include <cusp/multiply.h>
21 #include <cusp/monitor.h>
22 #include <cusp/linear_operator.h>
23 #include <cusp/transpose.h>
24
25 namespace blas = cusp::blas;
26
27 namespace cusp {
28 namespace krylov {
29
30 template <class LinearOperator,
31 class Vector,
32 typename RealType>
33 lsqr_results<RealType> lsqr(LinearOperator& A,
34 Vector& x,
35 Vector& b,
36 const lsqr_parameters<RealType> & p)
37 {
38 typedef typename LinearOperator::value_type ValueType;
39 cusp::default_monitor<ValueType> monitor(b);
40 return cusp::krylov::lsqr(A, x, b, p, monitor);
41 }
42
43 template <class LinearOperator,
44 class Vector,
45 typename RealType,
46 class Monitor>
47 lsqr_results<RealType> lsqr(LinearOperator& A,
48 Vector& x,
49 Vector& b,
50 const lsqr_parameters<RealType> & p,
51 Monitor& monitor)
52 {
53 typedef typename LinearOperator::value_type ValueType;
54 typedef typename LinearOperator::memory_space MemorySpace;
55
56 RealType damp = p.damp;
57 const int n = A.num_cols;
58
59 int istop = 0;
60 int nstop = 0;
61 int nconv;
62 RealType
63 anorm = 0,
64 acond = 0,
65 rnorm = 0,
66 arnorm = 0,
67 xnorm = 0,
68 atol = p.atol,
69 btol = p.btol,
70 conlim = p.conlim;
71 const bool damped = damp > 0;
72 RealType
73 alpha, beta, bnorm,
74 cs, cs1, cs2, ctol,
75 delta, dknorm, dnorm,
76 gamma, gambar, phi, phibar, psi,
77 res2, rho, rhobar, rhbar1,
78 rhs, rtol, sn, sn1, sn2,
79 tau, temp, test1, test2, test3,
80 theta, t1, t2, t3, xnorm1, z, zbar;
81
82 rtol = monitor.relative_tolerance();
83 ctol = (conlim > 0 ? 1.0 / conlim : 0.);
84 dnorm = 0;
85 test2 = 0;
86 res2 = 0;
87 psi = 0;
88 xnorm1 = 0;
89 cs2 = -1;
90 sn2 = 0;
91 z = 0;
92
93 // ------------------------------------------------------------------
94 // Set up the first vectors u and v for the bidiagonalization.
95 // These satisfy beta*u = b, alpha*v = A(transpose)*u.
96 // ------------------------------------------------------------------
97
98 cusp::array1d<ValueType,MemorySpace> w(n);
99 cusp::array1d<ValueType,MemorySpace> v(n, ValueType(0));
100 cusp::array1d<ValueType,MemorySpace> tmp(n);
101 blas::fill(x, ValueType(0));
102
103 alpha = 0;
104 beta = blas::nrm2(b);
105
106 if (beta > 0) {
107 blas::scal(b, ValueType(1.0)/beta);
108 // v = v + At*b;
109 cusp::transposed_multiply(A, b, tmp);
110 blas::axpy(tmp, v, 1);
111 alpha = blas::nrm2(v);
112 }
113
114 if (alpha > 0) {
115 blas::scal(v, ValueType(1.0)/alpha);
116 blas::copy(v, w);
117 }
118 cusp::array1d<ValueType,cusp::host_memory> residue(1);
119
120 arnorm = alpha * beta;
121 rhobar = alpha;
122 phibar = beta;
123 bnorm = beta;
124 rnorm = beta;
125 residue[0] = rnorm;
126
127 // ==================================================================
128 // Main iteration loop.
129 // ==================================================================
130 if (arnorm) {
131 while (!monitor.finished(residue)) {
132 if (istop != 0) {
133 printf("istop = %d\n",istop);
134 break;
135 }
136
137 ++monitor;
138 // -----------------------------------------------------------
139 // Perform the next step of the bidiagonalization to obtain
140 // the next beta, u, alpha, v. These satisfy the relations
141 // beta*u = A*v - alpha*u,
142 // alpha*v = A^T*u - beta*v.
143 // -----------------------------------------------------------
144 blas::scal(b, -alpha);
145 // b = b + A*v;
146 cusp::multiply(A, v, tmp);
147 blas::axpy(tmp, b, 1);
148 beta = blas::nrm2(b);
149
150 // Accumulate anorm = || Bk ||
151 // = sqrt( sum of alpha**2 + beta**2 + damp**2 )
152
153 temp = hypot( alpha, beta );
154 temp = hypot( temp , damp );
155 anorm = hypot( anorm, temp );
156
157 if (beta > 0) {
158 blas::scal(b, ValueType(1.0)/beta);
159 blas::scal (v, -beta);
160 // v = v + At*b;
161 cusp::transposed_multiply(A, b, tmp);
162 blas::axpy(tmp, v, 1);
163 // aprod ( 2, m, n, v, u, UsrWrk );
164 alpha = blas::nrm2(v);
165 if (alpha > 0) {
166 blas::scal(v, ValueType(1.0)/alpha);
167 }
168 }
169
170 // -----------------------------------------------------------
171 // Use a plane rotation to eliminate the damping parameter.
172 // This alters the diagonal (rhobar) of the lower-bidiagonal
173 // matrix.
174 // -----------------------------------------------------------
175 rhbar1 = rhobar;
176 if ( damped ) {
177 rhbar1 = hypot( rhobar, damp );
178 cs1 = rhobar / rhbar1;
179 sn1 = damp / rhbar1;
180 psi = sn1 * phibar;
181 phibar = cs1 * phibar;
182 }
183
184 // -----------------------------------------------------------
185 // Use a plane rotation to eliminate the subdiagonal element
186 // (beta) of the lower-bidiagonal matrix, giving an
187 // upper-bidiagonal matrix.
188 // -----------------------------------------------------------
189 rho = hypot( rhbar1, beta );
190 cs = rhbar1 / rho;
191 sn = beta / rho;
192 theta = sn * alpha;
193 rhobar = - cs * alpha;
194 phi = cs * phibar;
195 phibar = sn * phibar;
196 tau = sn * phi;
197
198 // -----------------------------------------------------------
199 // Update x, w and (perhaps) the standard error estimates.
200 // -----------------------------------------------------------
201 t1 = phi / rho;
202 t2 = - theta / rho;
203 t3 = 1.0 / rho;
204 dknorm = 0;
205
206 blas::axpy(w, x, t1);
207 dknorm = t3 * blas::nrm2(w);
208 blas::axpby(v, w, w, 1, t2);
209 // -----------------------------------------------------------
210 // Monitor the norm of d_k, the update to x.
211 // dknorm = norm( d_k )
212 // dnorm = norm( D_k ), where D_k = (d_1, d_2, ..., d_k )
213 // dxk = norm( phi_k d_k ), where new x = x_k + phi_k d_k.
214 // -----------------------------------------------------------
215 dnorm = hypot( dnorm, dknorm );
216 //dxk = abs( phi * dknorm );
217 //if (dxmax < dxk) {
218 // dxmax = dxk;
219 //}
220
221 // -----------------------------------------------------------
222 // Use a plane rotation on the right to eliminate the
223 // super-diagonal element (theta) of the upper-bidiagonal
224 // matrix. Then use the result to estimate norm(x).
225 // -----------------------------------------------------------
226 delta = sn2 * rho;
227 gambar = - cs2 * rho;
228 rhs = phi - delta * z;
229 zbar = rhs / gambar;
230 xnorm = hypot( xnorm1, zbar );
231 gamma = hypot( gambar, theta );
232 cs2 = gambar / gamma;
233 sn2 = theta / gamma;
234 z = rhs / gamma;
235 xnorm1 = hypot( xnorm1, z );
236
237 // -----------------------------------------------------------
238 // Test for convergence.
239 // First, estimate the norm and condition of the matrix Abar,
240 // and the norms of rbar and Abar^T*rbar.
241 // -----------------------------------------------------------
242 acond = anorm * dnorm;
243 res2 = hypot( res2, psi );
244 rnorm = hypot( res2, phibar );
245 arnorm = alpha * abs( tau );
246
247 // Now use these norms to estimate certain other quantities,
248 // some of which will be small near a solution.
249
250 test1 = rnorm / bnorm;
251 test2 = 0;
252 if (rnorm > 0) {
253 test2 = arnorm / (anorm * rnorm);
254 }
255
256 test3 = ValueType(1.0) / acond;
257 t1 = test1 / (ValueType(1.0) + anorm * xnorm / bnorm);
258 if (btol || atol) {
259 rtol = btol + atol * anorm * xnorm / bnorm;
260 }
261
262 residue[0] = rnorm;
263
264 // The following tests guard against extremely small values of
265 // atol, btol or ctol. (The user may have set any or all of
266 // the parameters atol, btol, conlim to zero.)
267 // The effect is equivalent to the normal tests using
268 // atol = relpr, btol = relpr, conlim = 1/relpr.
269
270 t3 = 1.0 + test3;
271 t2 = 1.0 + test2;
272 t1 = 1.0 + t1;
273 if (monitor.iteration_count() >= monitor.iteration_limit())
274 istop = 5;
275 if (t3 <= 1.0) istop = 4;
276 if (t2 <= 1.0) istop = 2;
277 if (t1 <= 1.0) istop = 1;
278
279 // Allow for tolerances set by the user.
280
281 if (test3 <= ctol) istop = 4;
282 if (test2 <= atol) istop = 2;
283 if (test1 <= rtol) istop = 1;
284
285 if (istop == 0) {
286 nstop = 0;
287 } else {
288 nconv = 1;
289 nstop = nstop + 1;
290 if (nstop < nconv && monitor.iteration_count() < monitor.iteration_limit()) {
291 istop = 0;
292 }
293 }
294 }
295 }
296 // ==================================================================
297 // End of iteration loop.
298 // ==================================================================
299
300 // Decide if istop = 2 or 3.
301 // Print the stopping condition.
302 if (damped && istop == 2){
303 istop = 3;
304 }
305 // Assign output variables from local copies.
306 return lsqr_results<RealType>(istop, anorm, acond, rnorm, test2, xnorm);
307 }
308
309 } // end namespace krylov
310 } // end namespace cusp
311

  ViewVC Help
Powered by ViewVC 1.1.26