/[escript]/trunk/cusplibrary/cusp/detail/host/spmv.h
ViewVC logotype

Contents of /trunk/cusplibrary/cusp/detail/host/spmv.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5148 - (show annotations)
Mon Sep 15 01:25:23 2014 UTC (4 years, 11 months ago) by caltinay
File MIME type: text/plain
File size: 11143 byte(s)
Merging ripley diagonal storage + CUDA support into trunk.
Options file version has been incremented due to new options
'cuda' and 'nvccflags'.

1 #pragma once
2
3 #include <thrust/functional.h>
4 #include <cusp/detail/functional.h>
5
6 #ifndef DIA_CHUNKSIZE
7 #define DIA_CHUNKSIZE 1024
8 #endif
9
10 //MW: add some OpenMP pragmas
11 namespace cusp
12 {
13 namespace detail
14 {
15 namespace host
16 {
17
18 //////////////
19 // COO SpMV //
20 //////////////
21 template <typename Matrix,
22 typename Vector1,
23 typename Vector2,
24 typename UnaryFunction,
25 typename BinaryFunction1,
26 typename BinaryFunction2>
27 void spmv_coo(const Matrix& A,
28 const Vector1& x,
29 Vector2& y,
30 UnaryFunction initialize,
31 BinaryFunction1 combine,
32 BinaryFunction2 reduce)
33 {
34 typedef typename Matrix::index_type IndexType;
35 typedef typename Vector2::value_type ValueType;
36
37 for(size_t i = 0; i < A.num_rows; i++)
38 y[i] = initialize(y[i]);
39
40 for(size_t n = 0; n < A.num_entries; n++)
41 {
42 const IndexType& i = A.row_indices[n];
43 const IndexType& j = A.column_indices[n];
44 const ValueType& Aij = A.values[n];
45 const ValueType& xj = x[j];
46
47 y[i] = reduce(y[i], combine(Aij, xj));
48 }
49 }
50
51 template <typename Matrix,
52 typename Vector1,
53 typename Vector2>
54 void spmv_coo(const Matrix& A,
55 const Vector1& x,
56 Vector2& y)
57 {
58 typedef typename Vector2::value_type ValueType;
59
60 spmv_coo(A, x, y,
61 cusp::detail::zero_function<ValueType>(),
62 thrust::multiplies<ValueType>(),
63 thrust::plus<ValueType>());
64 }
65
66
67 //////////////
68 // CSR SpMV //
69 //////////////
70 template <typename Matrix,
71 typename Vector1,
72 typename Vector2,
73 typename UnaryFunction,
74 typename BinaryFunction1,
75 typename BinaryFunction2>
76 void spmv_csr(const Matrix& A,
77 const Vector1& x,
78 Vector2& y,
79 UnaryFunction initialize,
80 BinaryFunction1 combine,
81 BinaryFunction2 reduce)
82 {
83 typedef typename Matrix::index_type IndexType;
84 typedef typename Vector2::value_type ValueType;
85
86 #pragma omp parallel for
87 for(size_t i = 0; i < A.num_rows; i++)
88 {
89 const IndexType& row_start = A.row_offsets[i];
90 const IndexType& row_end = A.row_offsets[i+1];
91
92 ValueType accumulator = initialize(y[i]);
93
94 for (IndexType jj = row_start; jj < row_end; jj++)
95 {
96 const IndexType& j = A.column_indices[jj];
97 const ValueType& Aij = A.values[jj];
98 const ValueType& xj = x[j];
99
100 accumulator = reduce(accumulator, combine(Aij, xj));
101 }
102
103 y[i] = accumulator;
104 }
105 }
106
107
108 template <typename Matrix,
109 typename Vector1,
110 typename Vector2>
111 void spmv_csr(const Matrix& A,
112 const Vector1& x,
113 Vector2& y)
114 {
115 typedef typename Vector2::value_type ValueType;
116
117 spmv_csr(A, x, y,
118 cusp::detail::zero_function<ValueType>(),
119 thrust::multiplies<ValueType>(),
120 thrust::plus<ValueType>());
121 }
122
123
124 //////////////
125 // DIA SpMV //
126 //////////////
127 template <typename Matrix,
128 typename Vector1,
129 typename Vector2,
130 typename UnaryFunction,
131 typename BinaryFunction1,
132 typename BinaryFunction2>
133 void spmv_dia(const Matrix& A,
134 const Vector1& x,
135 Vector2& y,
136 UnaryFunction initialize,
137 BinaryFunction1 combine,
138 BinaryFunction2 reduce)
139 {
140 typedef typename Matrix::index_type IndexType;
141 //typedef typename Vector2::value_type ValueType;
142
143 const size_t num_diagonals = A.values.num_cols;
144
145 #pragma omp parallel for
146 for (size_t ch = 0; ch < A.num_rows; ch += DIA_CHUNKSIZE) {
147 // initialize chunk
148 for (size_t row = ch; row < std::min(ch+DIA_CHUNKSIZE,A.num_rows); row++)
149 {
150 y[row] = initialize(y[row]);
151 }
152 // for each diagonal
153 for (size_t d = 0; d < num_diagonals; d++)
154 {
155 for (IndexType row=ch; row<std::min(ch+DIA_CHUNKSIZE,A.num_rows); row++)
156 {
157 const IndexType col = row + A.diagonal_offsets[d];
158 if (col >= 0 && col < A.num_cols)
159 {
160 y[row] = reduce(y[row], combine(A.values(row, d), x[col]));
161 }
162 }
163 }
164 }
165 /*
166 for(size_t i = 0; i < A.num_rows; i++)
167 y[i] = initialize(y[i]);
168
169 for(size_t i = 0; i < num_diagonals; i++)
170 {
171 const IndexType& k = A.diagonal_offsets[i];
172
173 const IndexType i_start = std::max<IndexType>(0, -k);
174 const IndexType j_start = std::max<IndexType>(0, k);
175
176 // number of elements to process in this diagonal
177 const IndexType N = std::min(A.num_rows - i_start, A.num_cols - j_start);
178
179 for(IndexType n = 0; n < N; n++)
180 {
181 const ValueType& Aij = A.values(i_start + n, i);
182
183 const ValueType& xj = x[j_start + n];
184 ValueType& yi = y[i_start + n];
185
186 yi = reduce(yi, combine(Aij, xj));
187 }
188 }
189 */
190 }
191
192 template <typename Matrix,
193 typename Vector1,
194 typename Vector2>
195 void spmv_dia(const Matrix& A,
196 const Vector1& x,
197 Vector2& y)
198 {
199 typedef typename Vector2::value_type ValueType;
200
201 spmv_dia(A, x, y,
202 cusp::detail::zero_function<ValueType>(),
203 thrust::multiplies<ValueType>(),
204 thrust::plus<ValueType>());
205 }
206
207
208 //////////////
209 // CDS SpMV //
210 //////////////
211 template <typename Matrix,
212 typename Vector1,
213 typename Vector2,
214 typename UnaryFunction,
215 typename BinaryFunction1,
216 typename BinaryFunction2>
217 void spmv_cds(const Matrix& A,
218 const Vector1& x,
219 Vector2& y,
220 UnaryFunction initialize,
221 BinaryFunction1 combine,
222 BinaryFunction2 reduce)
223 {
224 typedef typename Matrix::index_type IndexType;
225 typedef typename Vector2::value_type ValueType;
226
227 const IndexType num_diagonals = A.diagonal_offsets.size();
228 const IndexType block_size = (IndexType)A.block_size;
229 const IndexType num_rows = (IndexType)A.num_rows;
230 // make chunksize a multiple of block_size
231 const IndexType chunksize = block_size*(DIA_CHUNKSIZE/block_size);
232
233 // optimization for special case
234 if (block_size == 2) {
235 #pragma omp parallel for
236 for(IndexType ch = 0; ch < num_rows; ch+=chunksize)
237 {
238 for (IndexType row = ch; row<std::min(ch+chunksize,num_rows); row+=2)
239 {
240 ValueType sum1 = initialize(y[row]);
241 ValueType sum2 = initialize(y[row+1]);
242 // for each diagonal block
243 for(IndexType d = 0; d < num_diagonals; d++)
244 {
245 const IndexType col = row + A.diagonal_offsets[d]*2;
246 if (col >= 0 && col <= num_rows)
247 {
248 sum1 = reduce(sum1,combine(A.values(row, 2*d), x[col]));
249 sum2 = reduce(sum2,combine(A.values(row+1,2*d), x[col]));
250 sum1 = reduce(sum1,combine(A.values(row, 2*d+1),x[col+1]));
251 sum2 = reduce(sum2,combine(A.values(row+1,2*d+1),x[col+1]));
252 }
253 }
254 y[row] = sum1;
255 y[row+1] = sum2;
256 }
257 }
258 } else {
259 #pragma omp parallel for
260 for(IndexType ch = 0; ch < num_rows; ch+=chunksize)
261 {
262 for (IndexType row = ch; row<std::min(ch+chunksize,num_rows); row++)
263 {
264 y[row] = initialize(y[row]);
265 }
266
267 // for each diagonal block
268 for(IndexType d = 0; d < num_diagonals; d++)
269 {
270 const IndexType k = A.diagonal_offsets[d]*block_size;
271
272 for(IndexType row=ch; row<std::min(ch+chunksize,num_rows); row+=block_size)
273 {
274 const IndexType col = row + k;
275 if (col >= 0 && col <= num_rows-block_size)
276 {
277 // for each column in block
278 for (IndexType i = 0; i < block_size; i++)
279 {
280 // for each row in block
281 for (IndexType j = 0; j < block_size; j++)
282 {
283 const ValueType& Aij = A.values(row+j, d*block_size+i);
284 const ValueType& xj = x[col + i];
285
286 y[row+j] = reduce(y[row+j], combine(Aij, xj));
287 }
288 }
289 }
290 }
291 }
292 }
293 }
294 }
295
296 template <typename Matrix,
297 typename Vector1,
298 typename Vector2>
299 void spmv_cds(const Matrix& A,
300 const Vector1& x,
301 Vector2& y)
302 {
303 typedef typename Vector2::value_type ValueType;
304
305 if (A.block_size == 1) {
306 spmv_dia(A, x, y,
307 cusp::detail::zero_function<ValueType>(),
308 thrust::multiplies<ValueType>(),
309 thrust::plus<ValueType>());
310 } else {
311 spmv_cds(A, x, y,
312 cusp::detail::zero_function<ValueType>(),
313 thrust::multiplies<ValueType>(),
314 thrust::plus<ValueType>());
315 }
316 }
317
318
319 //////////////
320 // ELL SpMV //
321 //////////////
322 template <typename Matrix,
323 typename Vector1,
324 typename Vector2,
325 typename UnaryFunction,
326 typename BinaryFunction1,
327 typename BinaryFunction2>
328 void spmv_ell(const Matrix& A,
329 const Vector1& x,
330 Vector2& y,
331 UnaryFunction initialize,
332 BinaryFunction1 combine,
333 BinaryFunction2 reduce)
334 {
335 typedef typename Matrix::index_type IndexType;
336 typedef typename Vector2::value_type ValueType;
337
338 const size_t& num_entries_per_row = A.column_indices.num_cols;
339
340 const IndexType invalid_index = Matrix::invalid_index;
341
342 for(size_t i = 0; i < A.num_rows; i++)
343 y[i] = initialize(y[i]);
344
345 for(size_t n = 0; n < num_entries_per_row; n++)
346 {
347 for(size_t i = 0; i < A.num_rows; i++)
348 {
349 const IndexType& j = A.column_indices(i, n);
350 const ValueType& Aij = A.values(i,n);
351
352 if (j != invalid_index)
353 {
354 const ValueType& xj = x[j];
355 y[i] = reduce(y[i], combine(Aij, xj));
356 }
357 }
358 }
359 }
360
361
362 template <typename Matrix,
363 typename Vector1,
364 typename Vector2>
365 void spmv_ell(const Matrix& A,
366 const Vector1& x,
367 Vector2& y)
368 {
369 typedef typename Vector2::value_type ValueType;
370
371 spmv_ell(A, x, y,
372 cusp::detail::zero_function<ValueType>(),
373 thrust::multiplies<ValueType>(),
374 thrust::plus<ValueType>());
375 }
376
377 } // end namespace host
378 } // end namespace detail
379 } // end namespace cusp
380
381

  ViewVC Help
Powered by ViewVC 1.1.26