/[escript]/branches/diaplayground/cusplibrary/cusp/krylov/detail/bicgstab_m.inl
ViewVC logotype

Contents of /branches/diaplayground/cusplibrary/cusp/krylov/detail/bicgstab_m.inl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4955 - (show annotations)
Tue May 20 04:33:15 2014 UTC (6 years, 5 months ago) by caltinay
File size: 24493 byte(s)
added pristine copy of cusplibrary (apache license) to be used by ripley.

1 /*
2 * Copyright 2008-2009 NVIDIA Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <cusp/array1d.h>
18 #include <cusp/blas.h>
19 #include <cusp/multiply.h>
20 #include <cusp/monitor.h>
21
22 #include <thrust/copy.h>
23 #include <thrust/fill.h>
24 #include <thrust/functional.h>
25 #include <thrust/transform.h>
26 #include <thrust/transform_reduce.h>
27 #include <thrust/inner_product.h>
28
29 #include <thrust/iterator/transform_iterator.h>
30
31
32 /*
33 * The point of these routines is to solve systems of the type
34 *
35 * (A+\sigma Id)x = b
36 *
37 * for a number of different \sigma, iteratively, for sparse A, without
38 * additional matrix-vector multiplication.
39 *
40 * The idea comes from arXiv:hep-lat/9612014
41 *
42 */
43
44 // put everything in cusp
45 namespace cusp
46 {
47
48 // this namespace contains things that are like cusp::krylov
49 // different name chosen to avoid the possibility of collisions
50 namespace krylov
51 {
52
53 // structs in this namespace do things that are somewhat blas-like, but
54 // are not usual blas operations (e.g. they aren't all linear in all arguments)
55 //
56 // except for KERNEL_VCOPY all of these structs perform operations that
57 // are specific to CG-M
58 namespace detail_m
59 {
60 // computes new \zeta, \beta
61 template <typename ScalarType>
62 struct KERNEL_ZB
63 {
64 ScalarType beta_m1;
65 ScalarType beta_0;
66 ScalarType alpha_0;
67
68 KERNEL_ZB(ScalarType _beta_m1, ScalarType _beta_0, ScalarType _alpha_0)
69 : beta_m1(_beta_m1), beta_0(_beta_0), alpha_0(_alpha_0)
70 {}
71
72 template <typename Tuple>
73 __host__ __device__
74 void operator()(Tuple t)
75 {
76 // compute \zeta_1^\sigma
77 ScalarType z1, b0, z0=thrust::get<2>(t), zm1 = thrust::get<3>(t),
78 sigma = thrust::get<4>(t);
79 z1 = z0*zm1*beta_m1/(beta_0*alpha_0*(zm1-z0)
80 +beta_m1*zm1*(ScalarType(1)-beta_0*sigma));
81 b0 = beta_0*z1/z0;
82 if ( abs(z1) < ScalarType(1e-30) )
83 z1 = ScalarType(1e-18);
84 thrust::get<0>(t) = z1;
85 thrust::get<1>(t) = b0;
86 }
87 };
88
89 // computes new alpha
90 template <typename ScalarType>
91 struct KERNEL_A
92 {
93 ScalarType beta_0;
94 ScalarType alpha_0;
95
96 // note: only the ratio alpha_0/beta_0 enters in the computation, it might
97 // be better just to pass this ratio
98 KERNEL_A(ScalarType _beta_0, ScalarType _alpha_0)
99 : beta_0(_beta_0), alpha_0(_alpha_0)
100 {}
101
102 template <typename Tuple>
103 __host__ __device__
104 void operator()(Tuple t)
105 {
106 // compute \alpha_0^\sigma
107 thrust::get<0>(t)=alpha_0/beta_0*thrust::get<2>(t)*thrust::get<3>(t)/
108 thrust::get<1>(t);
109 }
110 };
111
112 //computes new w_1
113 template <typename ScalarType>
114 struct KERNEL_W
115 {
116 ScalarType beta_0;
117 KERNEL_W(const ScalarType _beta_0) : beta_0(_beta_0) {}
118
119 template <typename Tuple>
120 __host__ __device__
121 void operator()(Tuple t)
122 {
123 thrust::get<0>(t)=thrust::get<1>(t)+beta_0*thrust::get<2>(t);
124 }
125 };
126
127 // computes new s_0
128 template <typename ScalarType>
129 struct KERNEL_S
130 {
131 ScalarType alpha_1;
132 ScalarType chi_0;
133 KERNEL_S(ScalarType _alpha_1, ScalarType _chi_0) :
134 alpha_1(_alpha_1), chi_0(_chi_0)
135 {}
136
137 template <typename Tuple>
138 __host__ __device__
139 void operator()(Tuple t)
140 {
141 thrust::get<0>(t)=thrust::get<1>(t)
142 +alpha_1*(thrust::get<0>(t)-chi_0*thrust::get<2>(t));
143 }
144 };
145
146 // computes new \chi_0^\sigma \rho_1^\sigma
147 template <typename ScalarType>
148 struct KERNEL_CHIRHO
149 {
150 ScalarType chi_0;
151 KERNEL_CHIRHO(ScalarType _chi_0) : chi_0(_chi_0)
152 {}
153
154 template <typename Tuple>
155 __host__ __device__
156 void operator()(Tuple t)
157 {
158 ScalarType den = ScalarType(1.0)+chi_0*thrust::get<3>(t);
159 thrust::get<0>(t)=chi_0/den;
160 thrust::get<1>(t)=thrust::get<2>(t)/den;
161 }
162 };
163
164 // computes new s
165 template <typename ScalarType>
166 struct KERNEL_XS
167 {
168 int N;
169 const ScalarType *rp_beta_0_s;
170 const ScalarType *rp_chi_0_s;
171 const ScalarType *rp_rho_0_s;
172 const ScalarType *rp_zeta_0_s;
173 const ScalarType *rp_alpha_1_s;
174 const ScalarType *rp_rho_1_s;
175 const ScalarType *rp_zeta_1_s;
176 const ScalarType *rp_r_0;
177 const ScalarType *rp_r_1;
178 const ScalarType *rp_w_1;
179
180 KERNEL_XS(int _N, const ScalarType *_rp_beta_0_s,
181 const ScalarType *_rp_chi_0_s,
182 const ScalarType *_rp_rho_0_s,
183 const ScalarType *_rp_zeta_0_s,
184 const ScalarType *_rp_alpha_1_s,
185 const ScalarType *_rp_rho_1_s,
186 const ScalarType *_rp_zeta_1_s,
187 const ScalarType *_rp_r_0,
188 const ScalarType *_rp_r_1,
189 const ScalarType *_rp_w_1) :
190 N(_N),
191 rp_beta_0_s(_rp_beta_0_s),
192 rp_chi_0_s(_rp_chi_0_s),
193 rp_rho_0_s(_rp_rho_0_s),
194 rp_zeta_0_s(_rp_zeta_0_s),
195 rp_alpha_1_s(_rp_alpha_1_s),
196 rp_rho_1_s(_rp_rho_1_s),
197 rp_zeta_1_s(_rp_zeta_1_s),
198 rp_r_0(_rp_r_0),
199 rp_r_1(_rp_r_1),
200 rp_w_1(_rp_w_1)
201 {}
202
203 template <typename Tuple>
204 __host__ __device__
205 void operator()(Tuple t)
206 {
207 int index = thrust::get<2>(t);
208 int N_s = index / N;
209 int N_n = index % N;
210
211 // return the transformed result
212 ScalarType z1s = rp_zeta_1_s[N_s];
213 ScalarType b0s = rp_beta_0_s[N_s];
214 ScalarType c0s = rp_chi_0_s[N_s];
215 ScalarType w1 = rp_w_1[N_n];
216 ScalarType s_0 = thrust::get<0>(t);
217
218 thrust::get<1>(t) = thrust::get<1>(t)-b0s*s_0
219 +c0s*rp_rho_0_s[N_s]*z1s*w1;
220 thrust::get<0>(t) = z1s*rp_rho_1_s[N_s]*rp_r_1[N_n]
221 +rp_alpha_1_s[N_s]*
222 (s_0-c0s*rp_rho_0_s[N_s]
223 /b0s*(z1s*w1-rp_zeta_0_s[N_s]*rp_r_0[N_n]));
224
225 }
226 };
227
228 // computes new x
229 template <typename ScalarType>
230 struct KERNEL_X : thrust::binary_function<int, ScalarType, ScalarType>
231 {
232 int N;
233 const ScalarType *raw_ptr_beta_0_s;
234 const ScalarType *raw_ptr_chi_0_s;
235 const ScalarType *raw_ptr_rho_0_s;
236 const ScalarType *raw_ptr_zeta_1_s;
237 const ScalarType *raw_ptr_w_1;
238 const ScalarType *raw_ptr_s_0_s;
239
240 KERNEL_X(int _N, const ScalarType *_raw_ptr_beta_0_s,
241 const ScalarType *_raw_ptr_chi_0_s,
242 const ScalarType *_raw_ptr_rho_0_s,
243 const ScalarType *_raw_ptr_zeta_1_s,
244 const ScalarType *_raw_ptr_w_1,
245 const ScalarType *_raw_ptr_s_0_s) :
246 N(_N),
247 raw_ptr_beta_0_s(_raw_ptr_beta_0_s),
248 raw_ptr_chi_0_s(_raw_ptr_chi_0_s),
249 raw_ptr_rho_0_s(_raw_ptr_rho_0_s),
250 raw_ptr_zeta_1_s(_raw_ptr_zeta_1_s),
251 raw_ptr_w_1(_raw_ptr_w_1),
252 raw_ptr_s_0_s(_raw_ptr_s_0_s)
253 {}
254
255 __host__ __device__
256 ScalarType operator()(int index, ScalarType val)
257 {
258 unsigned int N_s = index / N;
259 unsigned int N_n = index % N;
260
261
262 // return the transformed result
263 return val-raw_ptr_beta_0_s[N_s]*raw_ptr_s_0_s[index]
264 +raw_ptr_chi_0_s[N_s]*raw_ptr_rho_0_s[N_s]
265 *raw_ptr_zeta_1_s[N_s]*raw_ptr_w_1[N_n];
266 }
267 };
268
269 // computes new p
270 template <typename ScalarType>
271 struct KERNEL_P : thrust::binary_function<int, ScalarType, ScalarType>
272 {
273 int N;
274 const ScalarType *alpha_0_s;
275 const ScalarType *z_1_s;
276 const ScalarType *r_0;
277
278 KERNEL_P(int _N, const ScalarType *_alpha_0_s,
279 const ScalarType *_z_1_s, const ScalarType *_r_0):
280 N(_N), alpha_0_s(_alpha_0_s), z_1_s(_z_1_s), r_0(_r_0)
281 {}
282
283 __host__ __device__
284 ScalarType operator()(int index, ScalarType val)
285 {
286 unsigned int N_s = index / N;
287 unsigned int N_i = index % N;
288
289 // return the transformed result
290 return z_1_s[N_s]*r_0[N_i]+alpha_0_s[N_s]*val;
291 }
292 };
293
294 // like blas::copy, but copies the same array many times into a larger array
295 template <typename ScalarType>
296 struct KERNEL_VCOPY : thrust::unary_function<int, ScalarType>
297 {
298 int N_t;
299 const ScalarType *source;
300
301 KERNEL_VCOPY(int _N_t, const ScalarType *_source) :
302 N_t(_N_t), source(_source)
303 {}
304
305 __host__ __device__
306 ScalarType operator()(int index)
307 {
308 unsigned int N = index % N_t;
309 return source[N];
310 }
311
312 };
313
314 } // end namespace detail_m
315
316 // Methods in this namespace are all routines that involve using
317 // thrust::for_each to perform some transformations on arrays of data.
318 //
319 // Except for vectorize_copy, these are specific to CG-M.
320 //
321 // Each has a version that takes Array inputs, and another that takes iterators
322 // as input. The CG-M routine only explicitly refers version with Arrays as
323 // arguments. The Array version calls the iterator version which uses
324 // a struct from cusp::krylov::detail_m.
325 namespace trans_m
326 {
327 // compute \zeta_1^\sigma, \beta_0^\sigma using iterators
328 // uses detail_m::KERNEL_ZB
329 template <typename InputIterator1, typename InputIterator2,
330 typename InputIterator3,
331 typename OutputIterator1, typename OutputIterator2,
332 typename ScalarType>
333 void compute_zb_m(InputIterator1 z_0_s_b, InputIterator1 z_0_s_e,
334 InputIterator2 z_m1_s_b, InputIterator3 sig_b,
335 OutputIterator1 z_1_s_b, OutputIterator2 b_0_s_b,
336 ScalarType beta_m1, ScalarType beta_0, ScalarType alpha_0)
337 {
338 size_t N = z_0_s_e - z_0_s_b;
339 thrust::for_each(
340 thrust::make_zip_iterator(thrust::make_tuple(z_1_s_b,b_0_s_b,z_0_s_b,z_m1_s_b,sig_b)),
341 thrust::make_zip_iterator(thrust::make_tuple(z_1_s_b,b_0_s_b,z_0_s_b,z_m1_s_b,sig_b))+N,
342 cusp::krylov::detail_m::KERNEL_ZB<ScalarType>(beta_m1,beta_0,alpha_0)
343 );
344 }
345
346 // compute \zeta_1^\sigma, \beta_0^\sigma using arrays
347 template <typename Array1, typename Array2, typename Array3,
348 typename Array4, typename Array5, typename ScalarType>
349 void compute_zb_m(const Array1& z_0_s, const Array2& z_m1_s,
350 const Array3& sig, Array4& z_1_s, Array5& b_0_s,
351 ScalarType beta_m1, ScalarType beta_0, ScalarType alpha_0)
352 {
353 // sanity checks
354 cusp::blas::detail::assert_same_dimensions(z_0_s,z_m1_s,z_1_s);
355 cusp::blas::detail::assert_same_dimensions(z_1_s,b_0_s,sig);
356
357 // compute
358 cusp::krylov::trans_m::compute_zb_m(z_0_s.begin(),z_0_s.end(),
359 z_m1_s.begin(),sig.begin(),z_1_s.begin(),b_0_s.begin(),
360 beta_m1,beta_0,alpha_0);
361
362 }
363
364 // compute \alpha_0^\sigma using iterators
365 // uses detail_m::KERNEL_A
366 template <typename InputIterator1, typename InputIterator2,
367 typename InputIterator3, typename OutputIterator,
368 typename ScalarType>
369 void compute_a_m(InputIterator1 z_0_s_b, InputIterator1 z_0_s_e,
370 InputIterator2 z_1_s_b, InputIterator3 beta_0_s_b,
371 OutputIterator alpha_0_s_b,
372 ScalarType beta_0, ScalarType alpha_0)
373 {
374 size_t N = z_0_s_e - z_0_s_b;
375 thrust::for_each(
376 thrust::make_zip_iterator(thrust::make_tuple(alpha_0_s_b,z_0_s_b,z_1_s_b,beta_0_s_b)),
377 thrust::make_zip_iterator(thrust::make_tuple(alpha_0_s_b,z_0_s_b,z_1_s_b,beta_0_s_b))+N,
378 cusp::krylov::detail_m::KERNEL_A<ScalarType>(beta_0,alpha_0));
379 }
380
381 // compute \alpha_0^\sigma using arrays
382 template <typename Array1, typename Array2, typename Array3,
383 typename Array4, typename ScalarType>
384 void compute_a_m(const Array1& z_0_s, const Array2& z_1_s,
385 const Array3& beta_0_s, Array4& alpha_0_s,
386 ScalarType beta_0, ScalarType alpha_0)
387 {
388 // sanity checks
389 cusp::blas::detail::assert_same_dimensions(z_0_s,z_1_s);
390 cusp::blas::detail::assert_same_dimensions(z_0_s,alpha_0_s,beta_0_s);
391
392 // compute
393 cusp::krylov::trans_m::compute_a_m(z_0_s.begin(), z_0_s.end(),
394 z_1_s.begin(), beta_0_s.begin(), alpha_0_s.begin(),
395 beta_0, alpha_0);
396 }
397
398 // compute x^\sigma, s^\sigma
399 // uses detail_m::KERNEL_XS
400 template <typename Array1, typename Array2, typename Array3, typename Array4,
401 typename Array5, typename Array6, typename Array7, typename Array8,
402 typename Array9, typename Array10, typename Array11,typename Array12>
403 void compute_xs_m(const Array1& beta_0_s, const Array2& chi_0_s,
404 const Array3& rho_0_s, const Array4& zeta_0_s,
405 const Array5& alpha_1_s, const Array6& rho_1_s,
406 const Array7& zeta_1_s,
407 const Array8& r_0, Array9& r_1,
408 const Array10& w_1, Array11& s_0_s, Array12& x)
409 {
410 // sanity check
411 cusp::blas::detail::assert_same_dimensions(beta_0_s,chi_0_s,rho_0_s);
412 cusp::blas::detail::assert_same_dimensions(beta_0_s,zeta_0_s,zeta_1_s);
413 cusp::blas::detail::assert_same_dimensions(alpha_1_s,rho_1_s,zeta_1_s);
414 cusp::blas::detail::assert_same_dimensions(r_0,r_1,w_1);
415 cusp::blas::detail::assert_same_dimensions(s_0_s,x);
416
417 size_t N = w_1.end()-w_1.begin();
418 size_t N_s = beta_0_s.end()-beta_0_s.begin();
419 size_t N_t = s_0_s.end()-s_0_s.begin();
420 assert (N_t == N*N_s);
421
422 // counting iterators to pass to thrust::transform
423 thrust::counting_iterator<int> count(0);
424
425 // get raw pointers for passing to kernels
426 typedef typename Array1::value_type ScalarType;
427 const ScalarType *raw_ptr_beta_0_s = thrust::raw_pointer_cast(beta_0_s.data());
428 const ScalarType *raw_ptr_chi_0_s = thrust::raw_pointer_cast(chi_0_s.data());
429 const ScalarType *raw_ptr_rho_0_s = thrust::raw_pointer_cast(rho_0_s.data());
430 const ScalarType *raw_ptr_zeta_0_s = thrust::raw_pointer_cast(zeta_0_s.data());
431 const ScalarType *raw_ptr_alpha_1_s = thrust::raw_pointer_cast(alpha_1_s.data());
432 const ScalarType *raw_ptr_rho_1_s = thrust::raw_pointer_cast(rho_1_s.data());
433 const ScalarType *raw_ptr_zeta_1_s = thrust::raw_pointer_cast(zeta_1_s.data());
434 const ScalarType *raw_ptr_r_0 = thrust::raw_pointer_cast(r_0.data());
435 const ScalarType *raw_ptr_r_1 = thrust::raw_pointer_cast(r_1.data());
436 const ScalarType *raw_ptr_w_1 = thrust::raw_pointer_cast(w_1.data());
437
438 // compute x
439 thrust::for_each(
440 thrust::make_zip_iterator(thrust::make_tuple(s_0_s.begin(),x.begin(),count)),
441 thrust::make_zip_iterator(thrust::make_tuple(s_0_s.begin(),x.begin(),count))+N_t,
442 cusp::krylov::detail_m::KERNEL_XS<ScalarType>(N, raw_ptr_beta_0_s, raw_ptr_chi_0_s, raw_ptr_rho_0_s, raw_ptr_zeta_0_s, raw_ptr_alpha_1_s, raw_ptr_rho_1_s, raw_ptr_zeta_1_s, raw_ptr_r_0, raw_ptr_r_1, raw_ptr_w_1));
443 }
444
445 template <typename InputIterator1, typename InputIterator2,
446 typename OutputIterator, typename ScalarType>
447 void compute_w_1_m(InputIterator1 r_0_b, InputIterator1 r_0_e,
448 InputIterator2 As_b, OutputIterator w_1_b,
449 ScalarType beta_0)
450 {
451 size_t N = r_0_e-r_0_b;
452 thrust::for_each(
453 thrust::make_zip_iterator(thrust::make_tuple(w_1_b,r_0_b,As_b)),
454 thrust::make_zip_iterator(thrust::make_tuple(w_1_b,r_0_b,As_b))+N,
455 cusp::krylov::detail_m::KERNEL_W<ScalarType>(beta_0));
456 }
457
458 template <typename Array1, typename Array2, typename Array3,
459 typename ScalarType>
460 void compute_w_1_m(const Array1& r_0, const Array2& As, Array3& w_1,
461 ScalarType beta_0)
462 {
463 // sanity checks
464 cusp::blas::detail::assert_same_dimensions(r_0,As,w_1);
465
466 // compute
467 cusp::krylov::trans_m::compute_w_1_m(r_0.begin(),r_0.end(),
468 As.begin(),w_1.begin(),beta_0);
469 }
470
471 template <typename InputIterator1, typename InputIterator2,
472 typename OutputIterator, typename ScalarType>
473 void compute_r_1_m(InputIterator1 w_1_b, InputIterator1 w_1_e,
474 InputIterator2 Aw_b, OutputIterator r_1_b,
475 ScalarType chi_0)
476 {
477 size_t N = w_1_e-w_1_b;
478 thrust::for_each(
479 thrust::make_zip_iterator(thrust::make_tuple(r_1_b,w_1_b,Aw_b)),
480 thrust::make_zip_iterator(thrust::make_tuple(r_1_b,w_1_b,Aw_b))+N,
481 cusp::krylov::detail_m::KERNEL_W<ScalarType>(-chi_0));
482 }
483
484 template <typename Array1, typename Array2, typename Array3,
485 typename ScalarType>
486 void compute_r_1_m(const Array1& w_1, const Array2& Aw, Array3& r_1,
487 ScalarType chi_0)
488 {
489 // sanity checks
490 cusp::blas::detail::assert_same_dimensions(w_1,Aw,r_1);
491
492 // compute
493 cusp::krylov::trans_m::compute_r_1_m(w_1.begin(),w_1.end(),
494 Aw.begin(),r_1.begin(),chi_0);
495 }
496
497 template <typename InputIterator1, typename InputIterator2,
498 typename OutputIterator, typename ScalarType>
499 void compute_s_0_m(InputIterator1 r_1_b, InputIterator1 r_1_e,
500 InputIterator2 As_b, OutputIterator s_0_b,
501 ScalarType alpha_1, ScalarType chi_0)
502 {
503 size_t N = r_1_e-r_1_b;
504 thrust::for_each(
505 thrust::make_zip_iterator(thrust::make_tuple(s_0_b,r_1_b,As_b)),
506 thrust::make_zip_iterator(thrust::make_tuple(s_0_b,r_1_b,As_b))+N,
507 cusp::krylov::detail_m::KERNEL_S<ScalarType>(alpha_1,chi_0));
508 }
509
510 template <typename Array1, typename Array2, typename Array3,
511 typename ScalarType>
512 void compute_s_0_m(const Array1& r_1, const Array2& As, Array3& s_0,
513 ScalarType alpha_1, ScalarType chi_0)
514 {
515 // sanity checks
516 cusp::blas::detail::assert_same_dimensions(r_1,As,s_0);
517
518 // compute
519 cusp::krylov::trans_m::compute_s_0_m(r_1.begin(),r_1.end(),
520 As.begin(),s_0.begin(),alpha_1,chi_0);
521 }
522
523 template <typename InputIterator1, typename InputIterator2,
524 typename OutputIterator1, typename OutputIterator2,
525 typename ScalarType>
526 void compute_chirho_m(InputIterator1 rho_0_s_b, InputIterator1 rho_0_s_e,
527 InputIterator2 sigma_b,
528 OutputIterator1 chi_0_s_b, OutputIterator2 rho_1_s_b,
529 ScalarType chi_0)
530 {
531 size_t N = rho_0_s_e-rho_0_s_b;
532 thrust::for_each(
533 thrust::make_zip_iterator(thrust::make_tuple(chi_0_s_b,rho_1_s_b,rho_0_s_b,sigma_b)),
534 thrust::make_zip_iterator(thrust::make_tuple(chi_0_s_b,rho_1_s_b,rho_0_s_b,sigma_b))+N,
535 cusp::krylov::detail_m::KERNEL_CHIRHO<ScalarType>(chi_0));
536 }
537
538 template <typename Array1, typename Array2, typename Array3, typename Array4,
539 typename ScalarType>
540 void compute_chirho_m(const Array1& rho_0_s, const Array2& sigma,
541 Array3& chi_0_s, Array4& rho_1_s, ScalarType chi_0)
542 {
543 // sanity checks
544 cusp::blas::detail::assert_same_dimensions(sigma,rho_0_s,rho_1_s);
545 cusp::blas::detail::assert_same_dimensions(chi_0_s,rho_1_s);
546
547 // compute
548 cusp::krylov::trans_m::compute_chirho_m(rho_0_s.begin(),rho_0_s.end(),
549 sigma.begin(),chi_0_s.begin(),rho_1_s.begin(),chi_0);
550 }
551
552 // multiple copy of array to another array
553 // this is just a vectorization of blas::copy
554 // uses detail_m::KERNEL_VCOPY
555 template <typename Array1, typename Array2>
556 void vectorize_copy(const Array1& source, Array2& dest)
557 {
558 // sanity check
559 size_t N = source.end()-source.begin();
560 size_t N_t = dest.end()-dest.begin();
561 assert ( N_t%N == 0 );
562
563 // counting iterators to pass to thrust::transform
564 thrust::counting_iterator<int> counter(0);
565
566 // pointer to data
567 typedef typename Array1::value_type ScalarType;
568 const ScalarType *raw_ptr_source = thrust::raw_pointer_cast(source.data());
569
570 // compute
571 thrust::transform(counter,counter+N_t,dest.begin(),
572 cusp::krylov::detail_m::KERNEL_VCOPY<ScalarType>(N,raw_ptr_source));
573
574 }
575
576 } // end namespace trans_m
577
578 // BiCGStab-M routine that uses the default monitor to determine completion
579 template <class LinearOperator,
580 class VectorType1, class VectorType2, class VectorType3>
581 void bicgstab_m(LinearOperator& A,
582 VectorType1& x, VectorType2& b, VectorType3& sigma)
583 {
584 typedef typename LinearOperator::value_type ValueType;
585
586 cusp::default_monitor<ValueType> monitor(b);
587
588 return bicgstab_m(A, x, b, sigma, monitor);
589 }
590
591 // BiCGStab-M routine that takes a user specified monitor
592 template <class LinearOperator,
593 class VectorType1, class VectorType2, class VectorType3,
594 class Monitor>
595 void bicgstab_m(LinearOperator& A,
596 VectorType1& x, VectorType2& b, VectorType3& sigma,
597 Monitor& monitor)
598 {
599 CUSP_PROFILE_SCOPED();
600
601 //
602 // This bit is initialization of the solver.
603 //
604
605
606 // shorthand for typenames
607 typedef typename LinearOperator::value_type ValueType;
608 typedef typename LinearOperator::memory_space MemorySpace;
609
610 // sanity checking
611 const size_t N = A.num_rows;
612 const size_t N_t = x.end()-x.begin();
613 const size_t test = b.end()-b.begin();
614 const size_t N_s = sigma.end()-sigma.begin();
615
616 assert(A.num_rows == A.num_cols);
617 assert(N_t == N*N_s);
618 assert(N == test);
619
620 //clock_t start = clock();
621
622 // w has data used in computing the soln.
623 cusp::array1d<ValueType,MemorySpace> w_1(N);
624 cusp::array1d<ValueType,MemorySpace> w_0(N);
625
626 // stores residuals
627 cusp::array1d<ValueType,MemorySpace> r_0(N);
628 cusp::array1d<ValueType,MemorySpace> r_1(N);
629
630 // used in iterates
631 cusp::array1d<ValueType,MemorySpace> s_0(N);
632 cusp::array1d<ValueType,MemorySpace> s_0_s(N_t);
633
634 // stores parameters used in the iteration
635 cusp::array1d<ValueType,MemorySpace> z_m1_s(N_s,ValueType(1));
636 cusp::array1d<ValueType,MemorySpace> z_0_s(N_s,ValueType(1));
637 cusp::array1d<ValueType,MemorySpace> z_1_s(N_s);
638
639 cusp::array1d<ValueType,MemorySpace> alpha_0_s(N_s,ValueType(0));
640 cusp::array1d<ValueType,MemorySpace> beta_0_s(N_s);
641
642 cusp::array1d<ValueType,MemorySpace> rho_0_s(N_s,ValueType(1));
643 cusp::array1d<ValueType,MemorySpace> rho_1_s(N_s);
644 cusp::array1d<ValueType,MemorySpace> chi_0_s(N_s);
645
646 // stores parameters used in the iteration for the undeformed system
647 ValueType beta_m1, beta_0(ValueType(1));
648 ValueType alpha_0(ValueType(0));
649
650 ValueType delta_0, delta_1;
651 ValueType phi_0;
652 ValueType chi_0;
653
654 // stores the value of the matrix-vector product we have to compute
655 cusp::array1d<ValueType,MemorySpace> As(N);
656 cusp::array1d<ValueType,MemorySpace> Aw(N);
657
658 // set up the initial conditions for the iteration
659 cusp::blas::copy(b,r_0);
660 cusp::blas::copy(b,w_1);
661 cusp::blas::copy(w_1,w_0);
662
663 // set up the intitial guess
664 cusp::blas::fill(x,ValueType(0));
665
666 // set up initial value of p_0 and p_0^\sigma
667 cusp::krylov::trans_m::vectorize_copy(b,s_0_s);
668 cusp::blas::copy(b,s_0);
669 cusp::multiply(A,s_0,As);
670
671 delta_1 = cusp::blas::dotc(w_0,r_0);
672 phi_0 = cusp::blas::dotc(w_0,As)/delta_1;
673
674 //
675 // Initialization is done. Solve iteratively
676 //
677 while (!monitor.finished(r_0))
678 {
679 // recycle iterates
680 beta_m1 = beta_0;
681 beta_0 = ValueType(-1.0)/phi_0;
682 delta_0 = delta_1;
683
684 // compute \zeta_1^\sigma, \beta_0^\sigma
685 cusp::krylov::trans_m::compute_zb_m(z_0_s, z_m1_s, sigma, z_1_s, beta_0_s,
686 beta_m1, beta_0, alpha_0);
687
688 // call w_1 kernel
689 cusp::krylov::trans_m::compute_w_1_m(r_0, As, w_1, beta_0);
690
691 // compute the matrix-vector product Aw
692 cusp::multiply(A,w_1,Aw);
693
694 // compute chi_0
695 chi_0 = cusp::blas::dotc(Aw,w_1)/cusp::blas::dotc(Aw,Aw);
696
697 // compute new residual
698 cusp::krylov::trans_m::compute_r_1_m(w_1,Aw,r_1,chi_0);
699
700 // compute the new delta
701 delta_1 = cusp::blas::dotc(w_0,r_1);
702
703 // compute new alpha
704 alpha_0 = -beta_0*delta_1/delta_0/chi_0;
705
706 // compute s_0
707 cusp::krylov::trans_m::compute_s_0_m(r_1,As,s_0,alpha_0,chi_0);
708
709 // compute As
710 cusp::multiply(A,s_0,As);
711
712 // compute new phi
713 phi_0 = cusp::blas::dotc(w_0,As)/delta_1;
714
715 // compute shifted rho, chi
716 cusp::krylov::trans_m::compute_chirho_m(rho_0_s,sigma,chi_0_s,rho_1_s,
717 chi_0);
718
719 // calculate \alpha_0^\sigma
720 cusp::krylov::trans_m::compute_a_m(z_0_s, z_1_s, beta_0_s,
721 alpha_0_s, beta_0, alpha_0);
722
723 // compute the new solution and s_0^sigma
724 cusp::krylov::trans_m::compute_xs_m(beta_0_s, chi_0_s, rho_0_s, z_0_s,
725 alpha_0_s, rho_1_s, z_1_s, r_0, r_1, w_1, s_0_s, x);
726
727 // recycle r_i
728 cusp::blas::copy(r_1,r_0);
729
730 // recycle \zeta_i^\sigma
731 cusp::blas::copy(z_0_s,z_m1_s);
732 cusp::blas::copy(z_1_s,z_0_s);
733
734 // recycle \rho_i^\sigma
735 cusp::blas::copy(rho_1_s,rho_0_s);
736
737 ++monitor;
738
739 }// finished iteration
740
741 } // end cg_m
742
743 } // end namespace krylov
744 } // end namespace cusp

  ViewVC Help
Powered by ViewVC 1.1.26