/[escript]/branches/diaplayground/cusplibrary/cusp/krylov/detail/bicgstab_m.inl
ViewVC logotype

Annotation of /branches/diaplayground/cusplibrary/cusp/krylov/detail/bicgstab_m.inl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4955 - (hide annotations)
Tue May 20 04:33:15 2014 UTC (6 years, 5 months ago) by caltinay
File size: 24493 byte(s)
added pristine copy of cusplibrary (apache license) to be used by ripley.

1 caltinay 4955 /*
2     * Copyright 2008-2009 NVIDIA Corporation
3     *
4     * Licensed under the Apache License, Version 2.0 (the "License");
5     * you may not use this file except in compliance with the License.
6     * You may obtain a copy of the License at
7     *
8     * http://www.apache.org/licenses/LICENSE-2.0
9     *
10     * Unless required by applicable law or agreed to in writing, software
11     * distributed under the License is distributed on an "AS IS" BASIS,
12     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     * See the License for the specific language governing permissions and
14     * limitations under the License.
15     */
16    
17     #include <cusp/array1d.h>
18     #include <cusp/blas.h>
19     #include <cusp/multiply.h>
20     #include <cusp/monitor.h>
21    
22     #include <thrust/copy.h>
23     #include <thrust/fill.h>
24     #include <thrust/functional.h>
25     #include <thrust/transform.h>
26     #include <thrust/transform_reduce.h>
27     #include <thrust/inner_product.h>
28    
29     #include <thrust/iterator/transform_iterator.h>
30    
31    
32     /*
33     * The point of these routines is to solve systems of the type
34     *
35     * (A+\sigma Id)x = b
36     *
37     * for a number of different \sigma, iteratively, for sparse A, without
38     * additional matrix-vector multiplication.
39     *
40     * The idea comes from arXiv:hep-lat/9612014
41     *
42     */
43    
44     // put everything in cusp
45     namespace cusp
46     {
47    
48     // this namespace contains things that are like cusp::krylov
49     // different name chosen to avoid the possibility of collisions
50     namespace krylov
51     {
52    
53     // structs in this namespace do things that are somewhat blas-like, but
54     // are not usual blas operations (e.g. they aren't all linear in all arguments)
55     //
56     // except for KERNEL_VCOPY all of these structs perform operations that
57     // are specific to CG-M
58     namespace detail_m
59     {
60     // computes new \zeta, \beta
61     template <typename ScalarType>
62     struct KERNEL_ZB
63     {
64     ScalarType beta_m1;
65     ScalarType beta_0;
66     ScalarType alpha_0;
67    
68     KERNEL_ZB(ScalarType _beta_m1, ScalarType _beta_0, ScalarType _alpha_0)
69     : beta_m1(_beta_m1), beta_0(_beta_0), alpha_0(_alpha_0)
70     {}
71    
72     template <typename Tuple>
73     __host__ __device__
74     void operator()(Tuple t)
75     {
76     // compute \zeta_1^\sigma
77     ScalarType z1, b0, z0=thrust::get<2>(t), zm1 = thrust::get<3>(t),
78     sigma = thrust::get<4>(t);
79     z1 = z0*zm1*beta_m1/(beta_0*alpha_0*(zm1-z0)
80     +beta_m1*zm1*(ScalarType(1)-beta_0*sigma));
81     b0 = beta_0*z1/z0;
82     if ( abs(z1) < ScalarType(1e-30) )
83     z1 = ScalarType(1e-18);
84     thrust::get<0>(t) = z1;
85     thrust::get<1>(t) = b0;
86     }
87     };
88    
89     // computes new alpha
90     template <typename ScalarType>
91     struct KERNEL_A
92     {
93     ScalarType beta_0;
94     ScalarType alpha_0;
95    
96     // note: only the ratio alpha_0/beta_0 enters in the computation, it might
97     // be better just to pass this ratio
98     KERNEL_A(ScalarType _beta_0, ScalarType _alpha_0)
99     : beta_0(_beta_0), alpha_0(_alpha_0)
100     {}
101    
102     template <typename Tuple>
103     __host__ __device__
104     void operator()(Tuple t)
105     {
106     // compute \alpha_0^\sigma
107     thrust::get<0>(t)=alpha_0/beta_0*thrust::get<2>(t)*thrust::get<3>(t)/
108     thrust::get<1>(t);
109     }
110     };
111    
112     //computes new w_1
113     template <typename ScalarType>
114     struct KERNEL_W
115     {
116     ScalarType beta_0;
117     KERNEL_W(const ScalarType _beta_0) : beta_0(_beta_0) {}
118    
119     template <typename Tuple>
120     __host__ __device__
121     void operator()(Tuple t)
122     {
123     thrust::get<0>(t)=thrust::get<1>(t)+beta_0*thrust::get<2>(t);
124     }
125     };
126    
127     // computes new s_0
128     template <typename ScalarType>
129     struct KERNEL_S
130     {
131     ScalarType alpha_1;
132     ScalarType chi_0;
133     KERNEL_S(ScalarType _alpha_1, ScalarType _chi_0) :
134     alpha_1(_alpha_1), chi_0(_chi_0)
135     {}
136    
137     template <typename Tuple>
138     __host__ __device__
139     void operator()(Tuple t)
140     {
141     thrust::get<0>(t)=thrust::get<1>(t)
142     +alpha_1*(thrust::get<0>(t)-chi_0*thrust::get<2>(t));
143     }
144     };
145    
146     // computes new \chi_0^\sigma \rho_1^\sigma
147     template <typename ScalarType>
148     struct KERNEL_CHIRHO
149     {
150     ScalarType chi_0;
151     KERNEL_CHIRHO(ScalarType _chi_0) : chi_0(_chi_0)
152     {}
153    
154     template <typename Tuple>
155     __host__ __device__
156     void operator()(Tuple t)
157     {
158     ScalarType den = ScalarType(1.0)+chi_0*thrust::get<3>(t);
159     thrust::get<0>(t)=chi_0/den;
160     thrust::get<1>(t)=thrust::get<2>(t)/den;
161     }
162     };
163    
164     // computes new s
165     template <typename ScalarType>
166     struct KERNEL_XS
167     {
168     int N;
169     const ScalarType *rp_beta_0_s;
170     const ScalarType *rp_chi_0_s;
171     const ScalarType *rp_rho_0_s;
172     const ScalarType *rp_zeta_0_s;
173     const ScalarType *rp_alpha_1_s;
174     const ScalarType *rp_rho_1_s;
175     const ScalarType *rp_zeta_1_s;
176     const ScalarType *rp_r_0;
177     const ScalarType *rp_r_1;
178     const ScalarType *rp_w_1;
179    
180     KERNEL_XS(int _N, const ScalarType *_rp_beta_0_s,
181     const ScalarType *_rp_chi_0_s,
182     const ScalarType *_rp_rho_0_s,
183     const ScalarType *_rp_zeta_0_s,
184     const ScalarType *_rp_alpha_1_s,
185     const ScalarType *_rp_rho_1_s,
186     const ScalarType *_rp_zeta_1_s,
187     const ScalarType *_rp_r_0,
188     const ScalarType *_rp_r_1,
189     const ScalarType *_rp_w_1) :
190     N(_N),
191     rp_beta_0_s(_rp_beta_0_s),
192     rp_chi_0_s(_rp_chi_0_s),
193     rp_rho_0_s(_rp_rho_0_s),
194     rp_zeta_0_s(_rp_zeta_0_s),
195     rp_alpha_1_s(_rp_alpha_1_s),
196     rp_rho_1_s(_rp_rho_1_s),
197     rp_zeta_1_s(_rp_zeta_1_s),
198     rp_r_0(_rp_r_0),
199     rp_r_1(_rp_r_1),
200     rp_w_1(_rp_w_1)
201     {}
202    
203     template <typename Tuple>
204     __host__ __device__
205     void operator()(Tuple t)
206     {
207     int index = thrust::get<2>(t);
208     int N_s = index / N;
209     int N_n = index % N;
210    
211     // return the transformed result
212     ScalarType z1s = rp_zeta_1_s[N_s];
213     ScalarType b0s = rp_beta_0_s[N_s];
214     ScalarType c0s = rp_chi_0_s[N_s];
215     ScalarType w1 = rp_w_1[N_n];
216     ScalarType s_0 = thrust::get<0>(t);
217    
218     thrust::get<1>(t) = thrust::get<1>(t)-b0s*s_0
219     +c0s*rp_rho_0_s[N_s]*z1s*w1;
220     thrust::get<0>(t) = z1s*rp_rho_1_s[N_s]*rp_r_1[N_n]
221     +rp_alpha_1_s[N_s]*
222     (s_0-c0s*rp_rho_0_s[N_s]
223     /b0s*(z1s*w1-rp_zeta_0_s[N_s]*rp_r_0[N_n]));
224    
225     }
226     };
227    
228     // computes new x
229     template <typename ScalarType>
230     struct KERNEL_X : thrust::binary_function<int, ScalarType, ScalarType>
231     {
232     int N;
233     const ScalarType *raw_ptr_beta_0_s;
234     const ScalarType *raw_ptr_chi_0_s;
235     const ScalarType *raw_ptr_rho_0_s;
236     const ScalarType *raw_ptr_zeta_1_s;
237     const ScalarType *raw_ptr_w_1;
238     const ScalarType *raw_ptr_s_0_s;
239    
240     KERNEL_X(int _N, const ScalarType *_raw_ptr_beta_0_s,
241     const ScalarType *_raw_ptr_chi_0_s,
242     const ScalarType *_raw_ptr_rho_0_s,
243     const ScalarType *_raw_ptr_zeta_1_s,
244     const ScalarType *_raw_ptr_w_1,
245     const ScalarType *_raw_ptr_s_0_s) :
246     N(_N),
247     raw_ptr_beta_0_s(_raw_ptr_beta_0_s),
248     raw_ptr_chi_0_s(_raw_ptr_chi_0_s),
249     raw_ptr_rho_0_s(_raw_ptr_rho_0_s),
250     raw_ptr_zeta_1_s(_raw_ptr_zeta_1_s),
251     raw_ptr_w_1(_raw_ptr_w_1),
252     raw_ptr_s_0_s(_raw_ptr_s_0_s)
253     {}
254    
255     __host__ __device__
256     ScalarType operator()(int index, ScalarType val)
257     {
258     unsigned int N_s = index / N;
259     unsigned int N_n = index % N;
260    
261    
262     // return the transformed result
263     return val-raw_ptr_beta_0_s[N_s]*raw_ptr_s_0_s[index]
264     +raw_ptr_chi_0_s[N_s]*raw_ptr_rho_0_s[N_s]
265     *raw_ptr_zeta_1_s[N_s]*raw_ptr_w_1[N_n];
266     }
267     };
268    
269     // computes new p
270     template <typename ScalarType>
271     struct KERNEL_P : thrust::binary_function<int, ScalarType, ScalarType>
272     {
273     int N;
274     const ScalarType *alpha_0_s;
275     const ScalarType *z_1_s;
276     const ScalarType *r_0;
277    
278     KERNEL_P(int _N, const ScalarType *_alpha_0_s,
279     const ScalarType *_z_1_s, const ScalarType *_r_0):
280     N(_N), alpha_0_s(_alpha_0_s), z_1_s(_z_1_s), r_0(_r_0)
281     {}
282    
283     __host__ __device__
284     ScalarType operator()(int index, ScalarType val)
285     {
286     unsigned int N_s = index / N;
287     unsigned int N_i = index % N;
288    
289     // return the transformed result
290     return z_1_s[N_s]*r_0[N_i]+alpha_0_s[N_s]*val;
291     }
292     };
293    
294     // like blas::copy, but copies the same array many times into a larger array
295     template <typename ScalarType>
296     struct KERNEL_VCOPY : thrust::unary_function<int, ScalarType>
297     {
298     int N_t;
299     const ScalarType *source;
300    
301     KERNEL_VCOPY(int _N_t, const ScalarType *_source) :
302     N_t(_N_t), source(_source)
303     {}
304    
305     __host__ __device__
306     ScalarType operator()(int index)
307     {
308     unsigned int N = index % N_t;
309     return source[N];
310     }
311    
312     };
313    
314     } // end namespace detail_m
315    
316     // Methods in this namespace are all routines that involve using
317     // thrust::for_each to perform some transformations on arrays of data.
318     //
319     // Except for vectorize_copy, these are specific to CG-M.
320     //
321     // Each has a version that takes Array inputs, and another that takes iterators
322     // as input. The CG-M routine only explicitly refers version with Arrays as
323     // arguments. The Array version calls the iterator version which uses
324     // a struct from cusp::krylov::detail_m.
325     namespace trans_m
326     {
327     // compute \zeta_1^\sigma, \beta_0^\sigma using iterators
328     // uses detail_m::KERNEL_ZB
329     template <typename InputIterator1, typename InputIterator2,
330     typename InputIterator3,
331     typename OutputIterator1, typename OutputIterator2,
332     typename ScalarType>
333     void compute_zb_m(InputIterator1 z_0_s_b, InputIterator1 z_0_s_e,
334     InputIterator2 z_m1_s_b, InputIterator3 sig_b,
335     OutputIterator1 z_1_s_b, OutputIterator2 b_0_s_b,
336     ScalarType beta_m1, ScalarType beta_0, ScalarType alpha_0)
337     {
338     size_t N = z_0_s_e - z_0_s_b;
339     thrust::for_each(
340     thrust::make_zip_iterator(thrust::make_tuple(z_1_s_b,b_0_s_b,z_0_s_b,z_m1_s_b,sig_b)),
341     thrust::make_zip_iterator(thrust::make_tuple(z_1_s_b,b_0_s_b,z_0_s_b,z_m1_s_b,sig_b))+N,
342     cusp::krylov::detail_m::KERNEL_ZB<ScalarType>(beta_m1,beta_0,alpha_0)
343     );
344     }
345    
346     // compute \zeta_1^\sigma, \beta_0^\sigma using arrays
347     template <typename Array1, typename Array2, typename Array3,
348     typename Array4, typename Array5, typename ScalarType>
349     void compute_zb_m(const Array1& z_0_s, const Array2& z_m1_s,
350     const Array3& sig, Array4& z_1_s, Array5& b_0_s,
351     ScalarType beta_m1, ScalarType beta_0, ScalarType alpha_0)
352     {
353     // sanity checks
354     cusp::blas::detail::assert_same_dimensions(z_0_s,z_m1_s,z_1_s);
355     cusp::blas::detail::assert_same_dimensions(z_1_s,b_0_s,sig);
356    
357     // compute
358     cusp::krylov::trans_m::compute_zb_m(z_0_s.begin(),z_0_s.end(),
359     z_m1_s.begin(),sig.begin(),z_1_s.begin(),b_0_s.begin(),
360     beta_m1,beta_0,alpha_0);
361    
362     }
363    
364     // compute \alpha_0^\sigma using iterators
365     // uses detail_m::KERNEL_A
366     template <typename InputIterator1, typename InputIterator2,
367     typename InputIterator3, typename OutputIterator,
368     typename ScalarType>
369     void compute_a_m(InputIterator1 z_0_s_b, InputIterator1 z_0_s_e,
370     InputIterator2 z_1_s_b, InputIterator3 beta_0_s_b,
371     OutputIterator alpha_0_s_b,
372     ScalarType beta_0, ScalarType alpha_0)
373     {
374     size_t N = z_0_s_e - z_0_s_b;
375     thrust::for_each(
376     thrust::make_zip_iterator(thrust::make_tuple(alpha_0_s_b,z_0_s_b,z_1_s_b,beta_0_s_b)),
377     thrust::make_zip_iterator(thrust::make_tuple(alpha_0_s_b,z_0_s_b,z_1_s_b,beta_0_s_b))+N,
378     cusp::krylov::detail_m::KERNEL_A<ScalarType>(beta_0,alpha_0));
379     }
380    
381     // compute \alpha_0^\sigma using arrays
382     template <typename Array1, typename Array2, typename Array3,
383     typename Array4, typename ScalarType>
384     void compute_a_m(const Array1& z_0_s, const Array2& z_1_s,
385     const Array3& beta_0_s, Array4& alpha_0_s,
386     ScalarType beta_0, ScalarType alpha_0)
387     {
388     // sanity checks
389     cusp::blas::detail::assert_same_dimensions(z_0_s,z_1_s);
390     cusp::blas::detail::assert_same_dimensions(z_0_s,alpha_0_s,beta_0_s);
391    
392     // compute
393     cusp::krylov::trans_m::compute_a_m(z_0_s.begin(), z_0_s.end(),
394     z_1_s.begin(), beta_0_s.begin(), alpha_0_s.begin(),
395     beta_0, alpha_0);
396     }
397    
398     // compute x^\sigma, s^\sigma
399     // uses detail_m::KERNEL_XS
400     template <typename Array1, typename Array2, typename Array3, typename Array4,
401     typename Array5, typename Array6, typename Array7, typename Array8,
402     typename Array9, typename Array10, typename Array11,typename Array12>
403     void compute_xs_m(const Array1& beta_0_s, const Array2& chi_0_s,
404     const Array3& rho_0_s, const Array4& zeta_0_s,
405     const Array5& alpha_1_s, const Array6& rho_1_s,
406     const Array7& zeta_1_s,
407     const Array8& r_0, Array9& r_1,
408     const Array10& w_1, Array11& s_0_s, Array12& x)
409     {
410     // sanity check
411     cusp::blas::detail::assert_same_dimensions(beta_0_s,chi_0_s,rho_0_s);
412     cusp::blas::detail::assert_same_dimensions(beta_0_s,zeta_0_s,zeta_1_s);
413     cusp::blas::detail::assert_same_dimensions(alpha_1_s,rho_1_s,zeta_1_s);
414     cusp::blas::detail::assert_same_dimensions(r_0,r_1,w_1);
415     cusp::blas::detail::assert_same_dimensions(s_0_s,x);
416    
417     size_t N = w_1.end()-w_1.begin();
418     size_t N_s = beta_0_s.end()-beta_0_s.begin();
419     size_t N_t = s_0_s.end()-s_0_s.begin();
420     assert (N_t == N*N_s);
421    
422     // counting iterators to pass to thrust::transform
423     thrust::counting_iterator<int> count(0);
424    
425     // get raw pointers for passing to kernels
426     typedef typename Array1::value_type ScalarType;
427     const ScalarType *raw_ptr_beta_0_s = thrust::raw_pointer_cast(beta_0_s.data());
428     const ScalarType *raw_ptr_chi_0_s = thrust::raw_pointer_cast(chi_0_s.data());
429     const ScalarType *raw_ptr_rho_0_s = thrust::raw_pointer_cast(rho_0_s.data());
430     const ScalarType *raw_ptr_zeta_0_s = thrust::raw_pointer_cast(zeta_0_s.data());
431     const ScalarType *raw_ptr_alpha_1_s = thrust::raw_pointer_cast(alpha_1_s.data());
432     const ScalarType *raw_ptr_rho_1_s = thrust::raw_pointer_cast(rho_1_s.data());
433     const ScalarType *raw_ptr_zeta_1_s = thrust::raw_pointer_cast(zeta_1_s.data());
434     const ScalarType *raw_ptr_r_0 = thrust::raw_pointer_cast(r_0.data());
435     const ScalarType *raw_ptr_r_1 = thrust::raw_pointer_cast(r_1.data());
436     const ScalarType *raw_ptr_w_1 = thrust::raw_pointer_cast(w_1.data());
437    
438     // compute x
439     thrust::for_each(
440     thrust::make_zip_iterator(thrust::make_tuple(s_0_s.begin(),x.begin(),count)),
441     thrust::make_zip_iterator(thrust::make_tuple(s_0_s.begin(),x.begin(),count))+N_t,
442     cusp::krylov::detail_m::KERNEL_XS<ScalarType>(N, raw_ptr_beta_0_s, raw_ptr_chi_0_s, raw_ptr_rho_0_s, raw_ptr_zeta_0_s, raw_ptr_alpha_1_s, raw_ptr_rho_1_s, raw_ptr_zeta_1_s, raw_ptr_r_0, raw_ptr_r_1, raw_ptr_w_1));
443     }
444    
445     template <typename InputIterator1, typename InputIterator2,
446     typename OutputIterator, typename ScalarType>
447     void compute_w_1_m(InputIterator1 r_0_b, InputIterator1 r_0_e,
448     InputIterator2 As_b, OutputIterator w_1_b,
449     ScalarType beta_0)
450     {
451     size_t N = r_0_e-r_0_b;
452     thrust::for_each(
453     thrust::make_zip_iterator(thrust::make_tuple(w_1_b,r_0_b,As_b)),
454     thrust::make_zip_iterator(thrust::make_tuple(w_1_b,r_0_b,As_b))+N,
455     cusp::krylov::detail_m::KERNEL_W<ScalarType>(beta_0));
456     }
457    
458     template <typename Array1, typename Array2, typename Array3,
459     typename ScalarType>
460     void compute_w_1_m(const Array1& r_0, const Array2& As, Array3& w_1,
461     ScalarType beta_0)
462     {
463     // sanity checks
464     cusp::blas::detail::assert_same_dimensions(r_0,As,w_1);
465    
466     // compute
467     cusp::krylov::trans_m::compute_w_1_m(r_0.begin(),r_0.end(),
468     As.begin(),w_1.begin(),beta_0);
469     }
470    
471     template <typename InputIterator1, typename InputIterator2,
472     typename OutputIterator, typename ScalarType>
473     void compute_r_1_m(InputIterator1 w_1_b, InputIterator1 w_1_e,
474     InputIterator2 Aw_b, OutputIterator r_1_b,
475     ScalarType chi_0)
476     {
477     size_t N = w_1_e-w_1_b;
478     thrust::for_each(
479     thrust::make_zip_iterator(thrust::make_tuple(r_1_b,w_1_b,Aw_b)),
480     thrust::make_zip_iterator(thrust::make_tuple(r_1_b,w_1_b,Aw_b))+N,
481     cusp::krylov::detail_m::KERNEL_W<ScalarType>(-chi_0));
482     }
483    
484     template <typename Array1, typename Array2, typename Array3,
485     typename ScalarType>
486     void compute_r_1_m(const Array1& w_1, const Array2& Aw, Array3& r_1,
487     ScalarType chi_0)
488     {
489     // sanity checks
490     cusp::blas::detail::assert_same_dimensions(w_1,Aw,r_1);
491    
492     // compute
493     cusp::krylov::trans_m::compute_r_1_m(w_1.begin(),w_1.end(),
494     Aw.begin(),r_1.begin(),chi_0);
495     }
496    
497     template <typename InputIterator1, typename InputIterator2,
498     typename OutputIterator, typename ScalarType>
499     void compute_s_0_m(InputIterator1 r_1_b, InputIterator1 r_1_e,
500     InputIterator2 As_b, OutputIterator s_0_b,
501     ScalarType alpha_1, ScalarType chi_0)
502     {
503     size_t N = r_1_e-r_1_b;
504     thrust::for_each(
505     thrust::make_zip_iterator(thrust::make_tuple(s_0_b,r_1_b,As_b)),
506     thrust::make_zip_iterator(thrust::make_tuple(s_0_b,r_1_b,As_b))+N,
507     cusp::krylov::detail_m::KERNEL_S<ScalarType>(alpha_1,chi_0));
508     }
509    
510     template <typename Array1, typename Array2, typename Array3,
511     typename ScalarType>
512     void compute_s_0_m(const Array1& r_1, const Array2& As, Array3& s_0,
513     ScalarType alpha_1, ScalarType chi_0)
514     {
515     // sanity checks
516     cusp::blas::detail::assert_same_dimensions(r_1,As,s_0);
517    
518     // compute
519     cusp::krylov::trans_m::compute_s_0_m(r_1.begin(),r_1.end(),
520     As.begin(),s_0.begin(),alpha_1,chi_0);
521     }
522    
523     template <typename InputIterator1, typename InputIterator2,
524     typename OutputIterator1, typename OutputIterator2,
525     typename ScalarType>
526     void compute_chirho_m(InputIterator1 rho_0_s_b, InputIterator1 rho_0_s_e,
527     InputIterator2 sigma_b,
528     OutputIterator1 chi_0_s_b, OutputIterator2 rho_1_s_b,
529     ScalarType chi_0)
530     {
531     size_t N = rho_0_s_e-rho_0_s_b;
532     thrust::for_each(
533     thrust::make_zip_iterator(thrust::make_tuple(chi_0_s_b,rho_1_s_b,rho_0_s_b,sigma_b)),
534     thrust::make_zip_iterator(thrust::make_tuple(chi_0_s_b,rho_1_s_b,rho_0_s_b,sigma_b))+N,
535     cusp::krylov::detail_m::KERNEL_CHIRHO<ScalarType>(chi_0));
536     }
537    
538     template <typename Array1, typename Array2, typename Array3, typename Array4,
539     typename ScalarType>
540     void compute_chirho_m(const Array1& rho_0_s, const Array2& sigma,
541     Array3& chi_0_s, Array4& rho_1_s, ScalarType chi_0)
542     {
543     // sanity checks
544     cusp::blas::detail::assert_same_dimensions(sigma,rho_0_s,rho_1_s);
545     cusp::blas::detail::assert_same_dimensions(chi_0_s,rho_1_s);
546    
547     // compute
548     cusp::krylov::trans_m::compute_chirho_m(rho_0_s.begin(),rho_0_s.end(),
549     sigma.begin(),chi_0_s.begin(),rho_1_s.begin(),chi_0);
550     }
551    
552     // multiple copy of array to another array
553     // this is just a vectorization of blas::copy
554     // uses detail_m::KERNEL_VCOPY
555     template <typename Array1, typename Array2>
556     void vectorize_copy(const Array1& source, Array2& dest)
557     {
558     // sanity check
559     size_t N = source.end()-source.begin();
560     size_t N_t = dest.end()-dest.begin();
561     assert ( N_t%N == 0 );
562    
563     // counting iterators to pass to thrust::transform
564     thrust::counting_iterator<int> counter(0);
565    
566     // pointer to data
567     typedef typename Array1::value_type ScalarType;
568     const ScalarType *raw_ptr_source = thrust::raw_pointer_cast(source.data());
569    
570     // compute
571     thrust::transform(counter,counter+N_t,dest.begin(),
572     cusp::krylov::detail_m::KERNEL_VCOPY<ScalarType>(N,raw_ptr_source));
573    
574     }
575    
576     } // end namespace trans_m
577    
578     // BiCGStab-M routine that uses the default monitor to determine completion
579     template <class LinearOperator,
580     class VectorType1, class VectorType2, class VectorType3>
581     void bicgstab_m(LinearOperator& A,
582     VectorType1& x, VectorType2& b, VectorType3& sigma)
583     {
584     typedef typename LinearOperator::value_type ValueType;
585    
586     cusp::default_monitor<ValueType> monitor(b);
587    
588     return bicgstab_m(A, x, b, sigma, monitor);
589     }
590    
591     // BiCGStab-M routine that takes a user specified monitor
592     template <class LinearOperator,
593     class VectorType1, class VectorType2, class VectorType3,
594     class Monitor>
595     void bicgstab_m(LinearOperator& A,
596     VectorType1& x, VectorType2& b, VectorType3& sigma,
597     Monitor& monitor)
598     {
599     CUSP_PROFILE_SCOPED();
600    
601     //
602     // This bit is initialization of the solver.
603     //
604    
605    
606     // shorthand for typenames
607     typedef typename LinearOperator::value_type ValueType;
608     typedef typename LinearOperator::memory_space MemorySpace;
609    
610     // sanity checking
611     const size_t N = A.num_rows;
612     const size_t N_t = x.end()-x.begin();
613     const size_t test = b.end()-b.begin();
614     const size_t N_s = sigma.end()-sigma.begin();
615    
616     assert(A.num_rows == A.num_cols);
617     assert(N_t == N*N_s);
618     assert(N == test);
619    
620     //clock_t start = clock();
621    
622     // w has data used in computing the soln.
623     cusp::array1d<ValueType,MemorySpace> w_1(N);
624     cusp::array1d<ValueType,MemorySpace> w_0(N);
625    
626     // stores residuals
627     cusp::array1d<ValueType,MemorySpace> r_0(N);
628     cusp::array1d<ValueType,MemorySpace> r_1(N);
629    
630     // used in iterates
631     cusp::array1d<ValueType,MemorySpace> s_0(N);
632     cusp::array1d<ValueType,MemorySpace> s_0_s(N_t);
633    
634     // stores parameters used in the iteration
635     cusp::array1d<ValueType,MemorySpace> z_m1_s(N_s,ValueType(1));
636     cusp::array1d<ValueType,MemorySpace> z_0_s(N_s,ValueType(1));
637     cusp::array1d<ValueType,MemorySpace> z_1_s(N_s);
638    
639     cusp::array1d<ValueType,MemorySpace> alpha_0_s(N_s,ValueType(0));
640     cusp::array1d<ValueType,MemorySpace> beta_0_s(N_s);
641    
642     cusp::array1d<ValueType,MemorySpace> rho_0_s(N_s,ValueType(1));
643     cusp::array1d<ValueType,MemorySpace> rho_1_s(N_s);
644     cusp::array1d<ValueType,MemorySpace> chi_0_s(N_s);
645    
646     // stores parameters used in the iteration for the undeformed system
647     ValueType beta_m1, beta_0(ValueType(1));
648     ValueType alpha_0(ValueType(0));
649    
650     ValueType delta_0, delta_1;
651     ValueType phi_0;
652     ValueType chi_0;
653    
654     // stores the value of the matrix-vector product we have to compute
655     cusp::array1d<ValueType,MemorySpace> As(N);
656     cusp::array1d<ValueType,MemorySpace> Aw(N);
657    
658     // set up the initial conditions for the iteration
659     cusp::blas::copy(b,r_0);
660     cusp::blas::copy(b,w_1);
661     cusp::blas::copy(w_1,w_0);
662    
663     // set up the intitial guess
664     cusp::blas::fill(x,ValueType(0));
665    
666     // set up initial value of p_0 and p_0^\sigma
667     cusp::krylov::trans_m::vectorize_copy(b,s_0_s);
668     cusp::blas::copy(b,s_0);
669     cusp::multiply(A,s_0,As);
670    
671     delta_1 = cusp::blas::dotc(w_0,r_0);
672     phi_0 = cusp::blas::dotc(w_0,As)/delta_1;
673    
674     //
675     // Initialization is done. Solve iteratively
676     //
677     while (!monitor.finished(r_0))
678     {
679     // recycle iterates
680     beta_m1 = beta_0;
681     beta_0 = ValueType(-1.0)/phi_0;
682     delta_0 = delta_1;
683    
684     // compute \zeta_1^\sigma, \beta_0^\sigma
685     cusp::krylov::trans_m::compute_zb_m(z_0_s, z_m1_s, sigma, z_1_s, beta_0_s,
686     beta_m1, beta_0, alpha_0);
687    
688     // call w_1 kernel
689     cusp::krylov::trans_m::compute_w_1_m(r_0, As, w_1, beta_0);
690    
691     // compute the matrix-vector product Aw
692     cusp::multiply(A,w_1,Aw);
693    
694     // compute chi_0
695     chi_0 = cusp::blas::dotc(Aw,w_1)/cusp::blas::dotc(Aw,Aw);
696    
697     // compute new residual
698     cusp::krylov::trans_m::compute_r_1_m(w_1,Aw,r_1,chi_0);
699    
700     // compute the new delta
701     delta_1 = cusp::blas::dotc(w_0,r_1);
702    
703     // compute new alpha
704     alpha_0 = -beta_0*delta_1/delta_0/chi_0;
705    
706     // compute s_0
707     cusp::krylov::trans_m::compute_s_0_m(r_1,As,s_0,alpha_0,chi_0);
708    
709     // compute As
710     cusp::multiply(A,s_0,As);
711    
712     // compute new phi
713     phi_0 = cusp::blas::dotc(w_0,As)/delta_1;
714    
715     // compute shifted rho, chi
716     cusp::krylov::trans_m::compute_chirho_m(rho_0_s,sigma,chi_0_s,rho_1_s,
717     chi_0);
718    
719     // calculate \alpha_0^\sigma
720     cusp::krylov::trans_m::compute_a_m(z_0_s, z_1_s, beta_0_s,
721     alpha_0_s, beta_0, alpha_0);
722    
723     // compute the new solution and s_0^sigma
724     cusp::krylov::trans_m::compute_xs_m(beta_0_s, chi_0_s, rho_0_s, z_0_s,
725     alpha_0_s, rho_1_s, z_1_s, r_0, r_1, w_1, s_0_s, x);
726    
727     // recycle r_i
728     cusp::blas::copy(r_1,r_0);
729    
730     // recycle \zeta_i^\sigma
731     cusp::blas::copy(z_0_s,z_m1_s);
732     cusp::blas::copy(z_1_s,z_0_s);
733    
734     // recycle \rho_i^\sigma
735     cusp::blas::copy(rho_1_s,rho_0_s);
736    
737     ++monitor;
738    
739     }// finished iteration
740    
741     } // end cg_m
742    
743     } // end namespace krylov
744     } // end namespace cusp

  ViewVC Help
Powered by ViewVC 1.1.26