/[escript]/branches/diaplayground/cusplibrary/cusp/krylov/detail/cg_m.inl
ViewVC logotype

Contents of /branches/diaplayground/cusplibrary/cusp/krylov/detail/cg_m.inl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4955 - (show annotations)
Tue May 20 04:33:15 2014 UTC (6 years, 4 months ago) by caltinay
File size: 16166 byte(s)
added pristine copy of cusplibrary (apache license) to be used by ripley.

1 /*
2 * Copyright 2008-2009 NVIDIA Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18 #include <cusp/array1d.h>
19 #include <cusp/blas.h>
20 #include <cusp/multiply.h>
21 #include <cusp/monitor.h>
22
23 #include <thrust/copy.h>
24 #include <thrust/fill.h>
25 #include <thrust/functional.h>
26 #include <thrust/transform.h>
27 #include <thrust/transform_reduce.h>
28 #include <thrust/inner_product.h>
29
30 #include <thrust/iterator/transform_iterator.h>
31
32 /*
33 * The point of these routines is to solve systems of the type
34 *
35 * (A+\sigma Id)x = b
36 *
37 * for a number of different \sigma, iteratively, for sparse A, without
38 * additional matrix-vector multiplication.
39 *
40 * The idea comes from the following paper:
41 * Krylov space solvers for shifted linear systems
42 * B. Jegerlehner
43 * http://arxiv.org/abs/hep-lat/9612014
44 *
45 * This implementation was contributed by Greg van Anders.
46 *
47 */
48
49 namespace cusp
50 {
51 namespace krylov
52 {
53
54 // structs in this namespace do things that are somewhat blas-like, but
55 // are not usual blas operations (e.g. they aren't all linear in all arguments)
56 //
57 // except for KERNEL_VCOPY all of these structs perform operations that
58 // are specific to CG-M
59 namespace detail_m
60 {
61 // computes new \zeta, \beta
62 template <typename ScalarType>
63 struct KERNEL_ZB
64 {
65 ScalarType beta_m1;
66 ScalarType beta_0;
67 ScalarType alpha_0;
68
69 KERNEL_ZB(ScalarType _beta_m1, ScalarType _beta_0, ScalarType _alpha_0)
70 : beta_m1(_beta_m1), beta_0(_beta_0), alpha_0(_alpha_0)
71 {}
72
73 template <typename Tuple>
74 __host__ __device__
75 void operator()(Tuple t)
76 {
77 typedef typename norm_type<ScalarType>::type NormType;
78 // compute \zeta_1^\sigma
79 ScalarType z1, b0, z0=thrust::get<2>(t), zm1 = thrust::get<3>(t),
80 sigma = thrust::get<4>(t);
81 z1 = z0*zm1*beta_m1/(beta_0*alpha_0*(zm1-z0)
82 +beta_m1*zm1*(ScalarType(1)-beta_0*sigma));
83 b0 = beta_0*z1/z0;
84 if ( abs(z1) < NormType(1e-30) )
85 z1 = ScalarType(1e-18);
86 thrust::get<0>(t) = z1;
87 thrust::get<1>(t) = b0;
88 }
89 };
90
91 // computes new alpha
92 template <typename ScalarType>
93 struct KERNEL_A
94 {
95 ScalarType beta_0;
96 ScalarType alpha_0;
97
98 // note: only the ratio alpha_0/beta_0 enters in the computation, it might
99 // be better just to pass this ratio
100 KERNEL_A(ScalarType _beta_0, ScalarType _alpha_0)
101 : beta_0(_beta_0), alpha_0(_alpha_0)
102 {}
103
104 template <typename Tuple>
105 __host__ __device__
106 void operator()(Tuple t)
107 {
108 // compute \alpha_0^\sigma
109 thrust::get<0>(t)=alpha_0/beta_0*thrust::get<2>(t)*thrust::get<3>(t)/
110 thrust::get<1>(t);
111 }
112 };
113
114 // computes new x
115 template <typename ScalarType>
116 struct KERNEL_XP
117 {
118 int N;
119 const ScalarType *alpha_0_s;
120 const ScalarType *beta_0_s;
121 const ScalarType *z_1_s;
122 const ScalarType *r_0;
123
124 KERNEL_XP(int _N, const ScalarType *_alpha_0_s, const ScalarType *_beta_0_s,
125 const ScalarType *_z_1_s, const ScalarType *_r_0) :
126 N(_N), alpha_0_s(_alpha_0_s),
127 beta_0_s(_beta_0_s), z_1_s(_z_1_s), r_0(_r_0) {}
128
129 template <typename Tuple>
130 __host__ __device__
131 void operator()(Tuple t)
132 {
133 // return the transformed result
134 ScalarType x = thrust::get<0>(t);
135 ScalarType p_0 = thrust::get<1>(t);
136 int index = thrust::get<2>(t);
137
138 int N_s = index / N;
139 int N_i = index % N;
140
141 x = x-beta_0_s[N_s]*p_0;
142 p_0 = z_1_s[N_s]*r_0[N_i]+alpha_0_s[N_s]*p_0;
143
144 thrust::get<0>(t) = x;
145 thrust::get<1>(t) = p_0;
146 }
147 };
148
149 // like blas::copy, but copies the same array many times into a larger array
150 template <typename ScalarType>
151 struct KERNEL_VCOPY : thrust::unary_function<int, ScalarType>
152 {
153 int N_t;
154 const ScalarType *source;
155
156 KERNEL_VCOPY(int _N_t, const ScalarType *_source) :
157 N_t(_N_t), source(_source)
158 {}
159
160 __host__ __device__
161 ScalarType operator()(int index)
162 {
163 unsigned int N = index % N_t;
164 return source[N];
165 }
166
167 };
168
169 struct KERNEL_DCOPY
170 {
171 KERNEL_DCOPY() {}
172
173 template <typename Tuple>
174 __host__ __device__
175 void operator()(Tuple t)
176 {
177 thrust::get<2>(t)=thrust::get<1>(t);
178 thrust::get<1>(t)=thrust::get<0>(t);
179 }
180 };
181
182 template <typename T>
183 struct XPAY : public thrust::binary_function<T,T,T>
184 {
185 T alpha;
186
187 XPAY(T _alpha) : alpha(_alpha) {}
188
189 __host__ __device__
190 T operator()(T x, T y)
191 {
192 return x + alpha * y;
193 }
194 };
195
196 } // end namespace detail_m
197
198 // Methods in this namespace are all routines that involve using
199 // thrust::for_each to perform some transformations on arrays of data.
200 //
201 // Except for vectorize_copy, these are specific to CG-M.
202 //
203 // Each has a version that takes Array inputs, and another that takes iterators
204 // as input. The CG-M routine only explicitly refers version with Arrays as
205 // arguments. The Array version calls the iterator version which uses
206 // a struct from cusp::krylov::detail_m.
207 namespace trans_m
208 {
209 // compute \zeta_1^\sigma, \beta_0^\sigma using iterators
210 // uses detail_m::KERNEL_ZB
211 template <typename InputIterator1, typename InputIterator2,
212 typename InputIterator3,
213 typename OutputIterator1, typename OutputIterator2,
214 typename ScalarType>
215 void compute_zb_m(InputIterator1 z_0_s_b, InputIterator1 z_0_s_e,
216 InputIterator2 z_m1_s_b, InputIterator3 sig_b,
217 OutputIterator1 z_1_s_b, OutputIterator2 b_0_s_b,
218 ScalarType beta_m1, ScalarType beta_0, ScalarType alpha_0)
219 {
220 size_t N = z_0_s_e - z_0_s_b;
221 thrust::for_each(
222 thrust::make_zip_iterator(thrust::make_tuple(z_1_s_b,b_0_s_b,z_0_s_b,z_m1_s_b,sig_b)),
223 thrust::make_zip_iterator(thrust::make_tuple(z_1_s_b,b_0_s_b,z_0_s_b,z_m1_s_b,sig_b))+N,
224 cusp::krylov::detail_m::KERNEL_ZB<ScalarType>(beta_m1,beta_0,alpha_0)
225 );
226 }
227
228 // compute \zeta_1^\sigma, \beta_0^\sigma using arrays
229 template <typename Array1, typename Array2, typename Array3,
230 typename Array4, typename Array5, typename ScalarType>
231 void compute_zb_m(const Array1& z_0_s, const Array2& z_m1_s,
232 const Array3& sig, Array4& z_1_s, Array5& b_0_s,
233 ScalarType beta_m1, ScalarType beta_0, ScalarType alpha_0)
234 {
235 // sanity checks
236 cusp::blas::detail::assert_same_dimensions(z_0_s,z_m1_s,z_1_s);
237 cusp::blas::detail::assert_same_dimensions(z_1_s,b_0_s,sig);
238
239 // compute
240 cusp::krylov::trans_m::compute_zb_m(z_0_s.begin(),z_0_s.end(),
241 z_m1_s.begin(),sig.begin(),z_1_s.begin(),b_0_s.begin(),
242 beta_m1,beta_0,alpha_0);
243
244 }
245
246 // compute \alpha_0^\sigma, and swap \zeta_i^\sigma using iterators
247 // uses detail_m::KERNEL_A
248 template <typename InputIterator1, typename InputIterator2,
249 typename InputIterator3, typename OutputIterator,
250 typename ScalarType>
251 void compute_a_m(InputIterator1 z_0_s_b, InputIterator1 z_0_s_e,
252 InputIterator2 z_1_s_b, InputIterator3 beta_0_s_b,
253 OutputIterator alpha_0_s_b,
254 ScalarType beta_0, ScalarType alpha_0)
255 {
256 size_t N = z_0_s_e - z_0_s_b;
257 thrust::for_each(
258 thrust::make_zip_iterator(thrust::make_tuple(alpha_0_s_b,z_0_s_b,z_1_s_b,beta_0_s_b)),
259 thrust::make_zip_iterator(thrust::make_tuple(alpha_0_s_b,z_0_s_b,z_1_s_b,beta_0_s_b))+N,
260 cusp::krylov::detail_m::KERNEL_A<ScalarType>(beta_0,alpha_0));
261 }
262
263 // compute \alpha_0^\sigma, and swap \zeta_i^\sigma using arrays
264 template <typename Array1, typename Array2, typename Array3,
265 typename Array4, typename ScalarType>
266 void compute_a_m(const Array1& z_0_s, const Array2& z_1_s,
267 const Array3& beta_0_s, Array4& alpha_0_s,
268 ScalarType beta_0, ScalarType alpha_0)
269 {
270 // sanity checks
271 cusp::blas::detail::assert_same_dimensions(z_0_s,z_1_s);
272 cusp::blas::detail::assert_same_dimensions(z_0_s,alpha_0_s,beta_0_s);
273
274 // compute
275 cusp::krylov::trans_m::compute_a_m(z_0_s.begin(), z_0_s.end(),
276 z_1_s.begin(), beta_0_s.begin(), alpha_0_s.begin(),
277 beta_0, alpha_0);
278 }
279
280 // compute x^\sigma, p^\sigma
281 // this is currently done by calling two different kernels... this is likely
282 // not optimal
283 // uses detail_m::KERNEL_XP
284 template <typename Array1, typename Array2, typename Array3,
285 typename Array4, typename Array5, typename Array6>
286 void compute_xp_m(const Array1& alpha_0_s, const Array2& z_1_s,
287 const Array3& beta_0_s, const Array4& r_0,
288 Array5& x_0_s, Array6& p_0_s)
289 {
290 // sanity check
291 cusp::blas::detail::assert_same_dimensions(alpha_0_s,z_1_s,beta_0_s);
292 cusp::blas::detail::assert_same_dimensions(x_0_s,p_0_s);
293 size_t N = r_0.end()-r_0.begin();
294 size_t N_s = alpha_0_s.end()-alpha_0_s.begin();
295 size_t N_t = x_0_s.end()-x_0_s.begin();
296 assert (N_t == N*N_s);
297
298 // counting iterators to pass to thrust::transform
299 thrust::counting_iterator<int> counter(0);
300
301 // get raw pointers for passing to kernels
302 typedef typename Array1::value_type ScalarType;
303 const ScalarType *raw_ptr_alpha_0_s = thrust::raw_pointer_cast(alpha_0_s.data());
304 const ScalarType *raw_ptr_z_1_s = thrust::raw_pointer_cast(z_1_s.data());
305 const ScalarType *raw_ptr_beta_0_s = thrust::raw_pointer_cast(beta_0_s.data());
306 const ScalarType *raw_ptr_r_0 = thrust::raw_pointer_cast(r_0.data());
307
308 // compute new x,p
309 thrust::for_each(
310 thrust::make_zip_iterator(thrust::make_tuple(x_0_s.begin(),p_0_s.begin(),counter)),
311 thrust::make_zip_iterator(thrust::make_tuple(x_0_s.begin(),p_0_s.begin(),counter))+N_t,
312 cusp::krylov::detail_m::KERNEL_XP<ScalarType>(N,raw_ptr_alpha_0_s,raw_ptr_beta_0_s,raw_ptr_z_1_s,raw_ptr_r_0));
313 }
314
315 template <typename Array1, typename Array2, typename Array3>
316 void doublecopy(const Array1& s, Array2& sd, Array3& d)
317 {
318 // sanity check
319 cusp::blas::detail::assert_same_dimensions(s,sd,d);
320 size_t N = s.end()-s.begin();
321
322 // recycle
323 thrust::for_each(
324 thrust::make_zip_iterator(thrust::make_tuple(s.begin(),sd.begin(),d.begin())),
325 thrust::make_zip_iterator(thrust::make_tuple(s.begin(),sd.begin(),d.begin()))+N,
326 cusp::krylov::detail_m::KERNEL_DCOPY());
327 }
328
329 // multiple copy of array to another array
330 // this is just a vectorization of blas::copy
331 // uses detail_m::KERNEL_VCOPY
332 template <typename Array1, typename Array2>
333 void vectorize_copy(const Array1& source, Array2& dest)
334 {
335 // sanity check
336 size_t N = source.end()-source.begin();
337 size_t N_t = dest.end()-dest.begin();
338 assert ( N_t%N == 0 );
339
340 // counting iterators to pass to thrust::transform
341 thrust::counting_iterator<int> counter(0);
342
343 // pointer to data
344 typedef typename Array1::value_type ScalarType;
345 const ScalarType *raw_ptr_source = thrust::raw_pointer_cast(source.data());
346
347 // compute
348 thrust::transform(counter,counter+N_t,dest.begin(),
349 cusp::krylov::detail_m::KERNEL_VCOPY<ScalarType>(N,raw_ptr_source));
350
351 }
352
353 template <typename ForwardIterator1,
354 typename ForwardIterator2,
355 typename ScalarType>
356 void xpay(ForwardIterator1 first1,
357 ForwardIterator1 last1,
358 ForwardIterator2 first2,
359 ScalarType alpha)
360 {
361 thrust::transform(first1, last1, first2, first2, detail_m::XPAY<ScalarType>(alpha));
362 }
363
364 template <typename Array1,
365 typename Array2,
366 typename ScalarType>
367 void xpay(const Array1& x,
368 Array2& y,
369 ScalarType alpha)
370 {
371 cusp::blas::detail::assert_same_dimensions(x, y);
372 cusp::krylov::trans_m::xpay(x.begin(), x.end(), y.begin(), alpha);
373 }
374
375
376 } // end namespace trans_m
377
378
379 // CG-M routine that uses the default monitor to determine completion
380 template <class LinearOperator,
381 class VectorType1,
382 class VectorType2,
383 class VectorType3>
384 void cg_m(LinearOperator& A,
385 VectorType1& x,
386 VectorType2& b,
387 VectorType3& sigma)
388 {
389 typedef typename LinearOperator::value_type ValueType;
390
391 cusp::default_monitor<ValueType> monitor(b);
392
393 return cg_m(A, x, b, sigma, monitor);
394 }
395
396 // CG-M routine that takes a user specified monitor
397 template <class LinearOperator,
398 class VectorType1,
399 class VectorType2,
400 class VectorType3,
401 class Monitor>
402 void cg_m(LinearOperator& A,
403 VectorType1& x,
404 VectorType2& b,
405 VectorType3& sigma,
406 Monitor& monitor)
407 {
408 //
409 // This bit is initialization of the solver.
410 //
411
412 // shorthand for typenames
413 typedef typename LinearOperator::value_type ValueType;
414 typedef typename LinearOperator::memory_space MemorySpace;
415
416 // sanity checking
417 const size_t N = A.num_rows;
418 const size_t N_t = x.end()-x.begin();
419 const size_t test = b.end()-b.begin();
420 const size_t N_s = sigma.end()-sigma.begin();
421
422 assert(A.num_rows == A.num_cols);
423 assert(N_t == N*N_s);
424 assert(N == test);
425
426 //clock_t start = clock();
427
428 // p has data used in computing the soln.
429 cusp::array1d<ValueType,MemorySpace> p_0_s(N_t);
430
431 // stores residuals
432 cusp::array1d<ValueType,MemorySpace> r_0(N);
433 // used in iterates
434 cusp::array1d<ValueType,MemorySpace> p_0(N);
435
436 // stores parameters used in the iteration
437 cusp::array1d<ValueType,MemorySpace> z_m1_s(N_s,ValueType(1));
438 cusp::array1d<ValueType,MemorySpace> z_0_s(N_s,ValueType(1));
439 cusp::array1d<ValueType,MemorySpace> z_1_s(N_s);
440
441 cusp::array1d<ValueType,MemorySpace> alpha_0_s(N_s,ValueType(0));
442 cusp::array1d<ValueType,MemorySpace> beta_0_s(N_s);
443
444 // stores parameters used in the iteration for the undeformed system
445 ValueType beta_m1, beta_0(ValueType(1));
446 ValueType alpha_0(ValueType(0));
447 //ValueType alpha_0_inv;
448
449 // stores the value of the matrix-vector product we have to compute
450 cusp::array1d<ValueType,MemorySpace> Ap(N);
451
452 // stores the value of the inner product (p,Ap)
453 ValueType pAp;
454
455 // store the values of (r_i,r_i) and (r_{i+1},r_{i+1})
456 ValueType rsq_0, rsq_1;
457
458 // set up the initial conditions for the iteration
459 cusp::blas::copy(b,r_0);
460 rsq_1=cusp::blas::dotc(r_0,r_0);
461
462 // set up the intitial guess
463 // cusp::blas::fill(x.begin(),x.end(),ValueType(0));
464 cusp::blas::fill(x,ValueType(0));
465
466 // set up initial value of p_0 and p_0^\sigma
467 cusp::krylov::trans_m::vectorize_copy(b,p_0_s);
468 cusp::blas::copy(b,p_0);
469
470 //
471 // Initialization is done. Solve iteratively
472 //
473 while (!monitor.finished(r_0))
474 {
475 // recycle iterates
476 rsq_0 = rsq_1;
477 beta_m1 = beta_0;
478
479 // compute the matrix-vector product Ap
480 cusp::multiply(A,p_0,Ap);
481
482 // compute the inner product (p,Ap)
483 pAp=cusp::blas::dotc(p_0,Ap);
484
485 // compute \beta_0
486 beta_0 = -rsq_0/pAp;
487
488 // compute the new residual
489 cusp::blas::axpy(Ap,r_0,beta_0);
490
491 // compute \zeta_1^\sigma, \beta_0^\sigma
492 cusp::krylov::trans_m::compute_zb_m(z_0_s, z_m1_s, sigma, z_1_s, beta_0_s,
493 beta_m1, beta_0, alpha_0);
494
495 // compute \alpha_0
496 rsq_1 = cusp::blas::dotc(r_0,r_0);
497 alpha_0 = rsq_1/rsq_0;
498 cusp::krylov::trans_m::xpay(r_0,p_0,alpha_0);
499
500 // calculate \alpha_0^\sigma
501 cusp::krylov::trans_m::compute_a_m(z_0_s, z_1_s, beta_0_s,
502 alpha_0_s, beta_0, alpha_0);
503
504 // compute x_0^\sigma, p_0^\sigma
505 cusp::krylov::trans_m::compute_xp_m(alpha_0_s, z_1_s, beta_0_s, r_0,
506 x, p_0_s);
507
508 // recycle \zeta_i^\sigma
509 cusp::krylov::trans_m::doublecopy(z_1_s,z_0_s,z_m1_s);
510
511 ++monitor;
512
513 }// finished iteration
514
515 } // end cg_m
516
517 } // end namespace krylov
518 } // end namespace cusp
519

  ViewVC Help
Powered by ViewVC 1.1.26