/[escript]/branches/diaplayground/cusplibrary/cusp/krylov/detail/gmres.inl
ViewVC logotype

Contents of /branches/diaplayground/cusplibrary/cusp/krylov/detail/gmres.inl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4955 - (show annotations)
Tue May 20 04:33:15 2014 UTC (6 years, 4 months ago) by caltinay
File size: 6028 byte(s)
added pristine copy of cusplibrary (apache license) to be used by ripley.

1 /*
2 * Copyright 2011 The Regents of the University of California
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <cusp/array1d.h>
17 #include <cusp/blas.h>
18 #include <cusp/multiply.h>
19 #include <cusp/monitor.h>
20 #include <cusp/linear_operator.h>
21
22 namespace blas = cusp::blas;
23 namespace cusp
24 {
25 namespace krylov
26 {
27 template <typename ValueType>
28 void ApplyPlaneRotation(ValueType& dx,
29 ValueType& dy,
30 ValueType& cs,
31 ValueType& sn)
32 {
33 ValueType temp = cs * dx + sn *dy;
34 dy = -sn*dx+cs*dy;
35 dx = temp;
36 }
37
38 template <typename ValueType>
39 void GeneratePlaneRotation(ValueType& dx,
40 ValueType& dy,
41 ValueType& cs,
42 ValueType& sn)
43 {
44 if(dy == ValueType(0.0)){
45 cs = 1.0;
46 sn = 0.0;
47 }else if (abs(dy) > abs(dx)) {
48 ValueType tmp = dx / dy;
49 sn = ValueType(1.0) / sqrt(ValueType(1.0) + tmp*tmp);
50 cs = tmp*sn;
51 }else {
52 ValueType tmp = dy / dx;
53 cs = ValueType(1.0) / sqrt(ValueType(1.0) + tmp*tmp);
54 sn = tmp*cs;
55 }
56 }
57
58 template <class LinearOperator,typename ValueType>
59 void PlaneRotation(LinearOperator& H,
60 ValueType& cs,
61 ValueType& sn,
62 ValueType& s,
63 int i)
64 {
65 for (int k = 0; k < i; k++){
66 ApplyPlaneRotation(H(k,i), H(k+1,i), cs[k], sn[k]);
67 }
68 GeneratePlaneRotation(H(i,i), H(i+1,i), cs[i], sn[i]);
69 ApplyPlaneRotation(H(i,i), H(i+1,i), cs[i], sn[i]);
70 ApplyPlaneRotation(s[i], s[i+1], cs[i], sn[i]);
71 }
72
73 template <class LinearOperator,
74 class Vector>
75 void gmres(LinearOperator& A,
76 Vector& x,
77 Vector& b,
78 const size_t restart)
79 {
80 typedef typename LinearOperator::value_type ValueType;
81 cusp::default_monitor<ValueType> monitor(b);
82 cusp::krylov::gmres(A, x, b, restart, monitor);
83 }
84
85 template <class LinearOperator,
86 class Vector,
87 class Monitor>
88 void gmres(LinearOperator& A,
89 Vector& x,
90 Vector& b,
91 const size_t restart,
92 Monitor& monitor)
93 {
94 typedef typename LinearOperator::value_type ValueType;
95 typedef typename LinearOperator::memory_space MemorySpace;
96 cusp::identity_operator<ValueType,MemorySpace> M(A.num_rows, A.num_cols);
97 cusp::krylov::gmres(A, x, b, restart, monitor, M);
98 }
99
100 template <class LinearOperator,
101 class Vector,
102 class Monitor,
103 class Preconditioner>
104 void gmres(LinearOperator& A,
105 Vector& x,
106 Vector& b,
107 const size_t restart,
108 Monitor& monitor,
109 Preconditioner& M)
110 {
111 typedef typename LinearOperator::value_type ValueType;
112 typedef typename LinearOperator::memory_space MemorySpace;
113 typedef typename norm_type<ValueType>::type NormType;
114 assert(A.num_rows == A.num_cols); // sanity check
115 const size_t N = A.num_rows;
116 const int R = restart;
117 int i, j, k;
118 NormType beta = 0;
119 cusp::array1d<NormType,cusp::host_memory> resid(1);
120 //allocate workspace
121 cusp::array1d<ValueType,MemorySpace> w(N);
122 cusp::array1d<ValueType,MemorySpace> V0(N); //Arnoldi matrix pos 0
123 cusp::array2d<ValueType,MemorySpace,cusp::column_major> V(N,R+1,ValueType(0.0)); //Arnoldi matrix
124 //duplicate copy of s on GPU
125 cusp::array1d<ValueType,MemorySpace> sDev(R+1);
126 //HOST WORKSPACE
127 cusp::array2d<ValueType,cusp::host_memory,cusp::column_major> H(R+1, R); //Hessenberg matrix
128 cusp::array1d<ValueType,cusp::host_memory> s(R+1);
129 cusp::array1d<ValueType,cusp::host_memory> cs(R);
130 cusp::array1d<ValueType,cusp::host_memory> sn(R);
131 do{
132 // compute initial residual and its norm //
133 cusp::multiply(A, x, w); // V(0) = A*x //
134 blas::axpy(b,w,ValueType(-1)); // V(0) = V(0) - b //
135 cusp::multiply(M,w,w); // V(0) = M*V(0) //
136 beta = blas::nrm2(w); // beta = norm(V(0)) //
137 blas::scal(w, ValueType(-1.0/beta)); // V(0) = -V(0)/beta //
138 blas::copy(w,V.column(0));
139 //s = 0 //
140 blas::fill(s,ValueType(0.0));
141 s[0] = beta;
142 i = -1;
143 resid[0] = abs(s[0]);
144 if (monitor.finished(resid)){
145 break;
146 }
147
148 do{
149 ++i;
150 ++monitor;
151
152 //apply preconditioner
153 //can't pass in ref to column in V so need to use copy (w)
154 cusp::multiply(A,w,V0);
155 //V(i+1) = A*w = M*A*V(i) //
156 cusp::multiply(M,V0,w);
157
158 for (k = 0; k <= i; k++){
159 // H(k,i) = <V(i+1),V(k)> //
160 H(k, i) = blas::dotc(w, V.column(k));
161 // V(i+1) -= H(k, i) * V(k) //
162 blas::axpy(V.column(k),w,-H(k,i));
163 }
164
165 H(i+1,i) = blas::nrm2(w);
166 // V(i+1) = V(i+1) / H(i+1, i) //
167 blas::scal(w,ValueType(1.0)/H(i+1,i));
168 blas::copy(w,V.column(i+1));
169
170 PlaneRotation(H,cs,sn,s,i);
171
172 resid[0] = abs(s[i+1]);
173
174 //check convergence condition
175 if (monitor.finished(resid)){
176 break;
177 }
178 }while (i+1 < R && monitor.iteration_count()+1 <= monitor.iteration_limit());
179
180
181 // solve upper triangular system in place //
182 for (j = i; j >= 0; j--){
183 s[j] /= H(j,j);
184 //S(0:j) = s(0:j) - s[j] H(0:j,j)
185 for (k = j-1; k >= 0; k--){
186 s[k] -= H(k,j) * s[j];
187 }
188 }
189
190 // update the solution //
191
192 //copy s to gpu
193 blas::copy(s,sDev);
194 // x= V(1:N,0:i)*s(0:i)+x //
195 for (j = 0; j <= i; j++){
196 // x = x + s[j] * V(j) //
197 blas::axpy(V.column(j),x,s[j]);
198 }
199 } while (!monitor.finished(resid));
200 }
201 } // end namespace krylov
202 } // end namespace cusp

  ViewVC Help
Powered by ViewVC 1.1.26