/[escript]/trunk/cusplibrary/cusp/detail/host/spmv.h
ViewVC logotype

Contents of /trunk/cusplibrary/cusp/detail/host/spmv.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5220 - (show annotations)
Thu Oct 23 03:50:37 2014 UTC (5 years, 11 months ago) by caltinay
File MIME type: text/plain
File size: 16684 byte(s)
Fixes to CDS SpMV symmetric and tests for blocksizes 1-4 symm/non-symm.

1 #pragma once
2
3 #include <thrust/functional.h>
4 #include <cusp/detail/functional.h>
5
6 #ifndef DIA_CHUNKSIZE
7 #define DIA_CHUNKSIZE 1024
8 #endif
9
10 //MW: add some OpenMP pragmas
11 namespace cusp
12 {
13 namespace detail
14 {
15 namespace host
16 {
17
18 //////////////
19 // COO SpMV //
20 //////////////
21 template <typename Matrix,
22 typename Vector1,
23 typename Vector2,
24 typename UnaryFunction,
25 typename BinaryFunction1,
26 typename BinaryFunction2>
27 void spmv_coo(const Matrix& A,
28 const Vector1& x,
29 Vector2& y,
30 UnaryFunction initialize,
31 BinaryFunction1 combine,
32 BinaryFunction2 reduce)
33 {
34 typedef typename Matrix::index_type IndexType;
35 typedef typename Vector2::value_type ValueType;
36
37 for(size_t i = 0; i < A.num_rows; i++)
38 y[i] = initialize(y[i]);
39
40 for(size_t n = 0; n < A.num_entries; n++)
41 {
42 const IndexType& i = A.row_indices[n];
43 const IndexType& j = A.column_indices[n];
44 const ValueType& Aij = A.values[n];
45 const ValueType& xj = x[j];
46
47 y[i] = reduce(y[i], combine(Aij, xj));
48 }
49 }
50
51 template <typename Matrix,
52 typename Vector1,
53 typename Vector2>
54 void spmv_coo(const Matrix& A,
55 const Vector1& x,
56 Vector2& y)
57 {
58 typedef typename Vector2::value_type ValueType;
59
60 spmv_coo(A, x, y,
61 cusp::detail::zero_function<ValueType>(),
62 thrust::multiplies<ValueType>(),
63 thrust::plus<ValueType>());
64 }
65
66
67 //////////////
68 // CSR SpMV //
69 //////////////
70 template <typename Matrix,
71 typename Vector1,
72 typename Vector2,
73 typename UnaryFunction,
74 typename BinaryFunction1,
75 typename BinaryFunction2>
76 void spmv_csr(const Matrix& A,
77 const Vector1& x,
78 Vector2& y,
79 UnaryFunction initialize,
80 BinaryFunction1 combine,
81 BinaryFunction2 reduce)
82 {
83 typedef typename Matrix::index_type IndexType;
84 typedef typename Vector2::value_type ValueType;
85
86 #pragma omp parallel for
87 for(size_t i = 0; i < A.num_rows; i++)
88 {
89 const IndexType& row_start = A.row_offsets[i];
90 const IndexType& row_end = A.row_offsets[i+1];
91
92 ValueType accumulator = initialize(y[i]);
93
94 for (IndexType jj = row_start; jj < row_end; jj++)
95 {
96 const IndexType& j = A.column_indices[jj];
97 const ValueType& Aij = A.values[jj];
98 const ValueType& xj = x[j];
99
100 accumulator = reduce(accumulator, combine(Aij, xj));
101 }
102
103 y[i] = accumulator;
104 }
105 }
106
107
108 template <typename Matrix,
109 typename Vector1,
110 typename Vector2>
111 void spmv_csr(const Matrix& A,
112 const Vector1& x,
113 Vector2& y)
114 {
115 typedef typename Vector2::value_type ValueType;
116
117 spmv_csr(A, x, y,
118 cusp::detail::zero_function<ValueType>(),
119 thrust::multiplies<ValueType>(),
120 thrust::plus<ValueType>());
121 }
122
123
124 //////////////
125 // DIA SpMV //
126 //////////////
127 template <typename Matrix,
128 typename Vector1,
129 typename Vector2,
130 typename UnaryFunction,
131 typename BinaryFunction1,
132 typename BinaryFunction2>
133 void spmv_dia(const Matrix& A,
134 const Vector1& x,
135 Vector2& y,
136 UnaryFunction initialize,
137 BinaryFunction1 combine,
138 BinaryFunction2 reduce)
139 {
140 typedef typename Matrix::index_type IndexType;
141 //typedef typename Vector2::value_type ValueType;
142
143 const size_t num_diagonals = A.values.num_cols;
144
145 if (A.symmetric) {
146 #pragma omp parallel for
147 for (size_t ch = 0; ch < A.num_rows; ch += DIA_CHUNKSIZE) {
148 for (size_t row = ch; row < std::min(ch+DIA_CHUNKSIZE,A.num_rows); row++)
149 {
150 // initialize
151 y[row] = initialize(y[row]);
152
153 // process subdiagonals
154 for (size_t d = 0; d < num_diagonals; d++)
155 {
156 const size_t diag = num_diagonals-d-1;
157 // skip main diagonal which is handled below
158 if (A.diagonal_offsets[diag] == 0)
159 continue;
160 const IndexType col = row - A.diagonal_offsets[diag];
161 if (col >= 0 && col < A.num_rows)
162 {
163 y[row] = reduce(y[row], combine(A.values(col, diag), x[col]));
164 }
165 }
166 // process main and upper diagonals
167 for (size_t d = 0; d < num_diagonals; d++)
168 {
169 const IndexType col = row + A.diagonal_offsets[d];
170 if (col >= 0 && col < A.num_cols)
171 {
172 y[row] = reduce(y[row], combine(A.values(row, d), x[col]));
173 }
174 }
175 }
176 }
177 } else {
178 #pragma omp parallel for
179 for (size_t ch = 0; ch < A.num_rows; ch += DIA_CHUNKSIZE) {
180 // initialize chunk
181 for (size_t row = ch; row < std::min(ch+DIA_CHUNKSIZE,A.num_rows); row++)
182 {
183 y[row] = initialize(y[row]);
184 }
185 // for each diagonal
186 for (size_t d = 0; d < num_diagonals; d++)
187 {
188 for (IndexType row=ch; row<std::min(ch+DIA_CHUNKSIZE,A.num_rows); row++)
189 {
190 const IndexType col = row + A.diagonal_offsets[d];
191 if (col >= 0 && col < A.num_cols)
192 {
193 y[row] = reduce(y[row], combine(A.values(row, d), x[col]));
194 }
195 }
196 }
197 }
198 }
199 }
200
201 template <typename Matrix,
202 typename Vector1,
203 typename Vector2>
204 void spmv_dia(const Matrix& A,
205 const Vector1& x,
206 Vector2& y)
207 {
208 typedef typename Vector2::value_type ValueType;
209
210 spmv_dia(A, x, y,
211 cusp::detail::zero_function<ValueType>(),
212 thrust::multiplies<ValueType>(),
213 thrust::plus<ValueType>());
214 }
215
216
217 //////////////
218 // CDS SpMV //
219 //////////////
220 template <typename Matrix,
221 typename Vector1,
222 typename Vector2,
223 typename UnaryFunction,
224 typename BinaryFunction1,
225 typename BinaryFunction2>
226 void spmv_cds(const Matrix& A,
227 const Vector1& x,
228 Vector2& y,
229 UnaryFunction initialize,
230 BinaryFunction1 combine,
231 BinaryFunction2 reduce)
232 {
233 typedef typename Matrix::index_type IndexType;
234 typedef typename Vector2::value_type ValueType;
235
236 const IndexType num_diagonals = A.diagonal_offsets.size();
237 const IndexType block_size = (IndexType)A.block_size;
238 const IndexType num_rows = (IndexType)A.num_rows;
239 // make chunksize a multiple of block_size
240 const IndexType chunksize = block_size*(DIA_CHUNKSIZE/block_size);
241
242 // optimization for special case
243 if (block_size == 2) {
244 if (A.symmetric) {
245 // if there is a main diagonal block, it is the first in offsets
246 // and should be skipped in the first loop below since the main
247 // diagonal is processed in the second loop
248 const IndexType d0 = (A.diagonal_offsets[0] == 0 ? 1 : 0);
249 #pragma omp parallel for
250 for(IndexType ch = 0; ch < num_rows; ch+=chunksize)
251 {
252 for (IndexType row = ch; row<std::min(ch+chunksize,num_rows); row+=2)
253 {
254 ValueType sum1 = initialize(y[row]);
255 ValueType sum2 = initialize(y[row+1]);
256 // process subdiagonal blocks
257 for(IndexType d = 0; d < num_diagonals-d0; d++)
258 {
259 const IndexType diag = num_diagonals-d-1;
260 const IndexType col = row - A.diagonal_offsets[diag]*2;
261 if (col >= 0 && col <= num_rows-2)
262 {
263 sum1 = reduce(sum1,combine(A.values(col, 2*diag), x[col]));
264 sum2 = reduce(sum2,combine(A.values(col, 2*diag+1),x[col]));
265 sum1 = reduce(sum1,combine(A.values(col+1,2*diag), x[col+1]));
266 sum2 = reduce(sum2,combine(A.values(col+1,2*diag+1),x[col+1]));
267 }
268 }
269 // process main and upper diagonal blocks
270 for(IndexType d = 0; d < num_diagonals; d++)
271 {
272 const IndexType col = row + A.diagonal_offsets[d]*2;
273 if (col >= 0 && col <= num_rows-2)
274 {
275 sum1 = reduce(sum1,combine(A.values(row, 2*d), x[col]));
276 sum2 = reduce(sum2,combine(A.values(row+1,2*d), x[col]));
277 sum1 = reduce(sum1,combine(A.values(row, 2*d+1),x[col+1]));
278 sum2 = reduce(sum2,combine(A.values(row+1,2*d+1),x[col+1]));
279 }
280 }
281 y[row] = sum1;
282 y[row+1] = sum2;
283 }
284 }
285 } else { // A.symmetric
286 #pragma omp parallel for
287 for(IndexType ch = 0; ch < num_rows; ch+=chunksize)
288 {
289 for (IndexType row = ch; row<std::min(ch+chunksize,num_rows); row+=2)
290 {
291 ValueType sum1 = initialize(y[row]);
292 ValueType sum2 = initialize(y[row+1]);
293 // for each diagonal block
294 for(IndexType d = 0; d < num_diagonals; d++)
295 {
296 const IndexType col = row + A.diagonal_offsets[d]*2;
297 if (col >= 0 && col <= num_rows-2)
298 {
299 sum1 = reduce(sum1,combine(A.values(row, 2*d), x[col]));
300 sum2 = reduce(sum2,combine(A.values(row+1,2*d), x[col]));
301 sum1 = reduce(sum1,combine(A.values(row, 2*d+1),x[col+1]));
302 sum2 = reduce(sum2,combine(A.values(row+1,2*d+1),x[col+1]));
303 }
304 }
305 y[row] = sum1;
306 y[row+1] = sum2;
307 }
308 }
309 } // A.symmetric
310 } else { // block size
311 if (A.symmetric) {
312 // if there is a main diagonal block, it is the first in offsets
313 // and should be skipped in the first loop below since the main
314 // diagonal is processed in the second loop
315 const IndexType d0 = (A.diagonal_offsets[0] == 0 ? 1 : 0);
316 #pragma omp parallel for
317 for(IndexType ch = 0; ch < num_rows; ch+=chunksize)
318 {
319 for (IndexType row = ch; row<std::min(ch+chunksize,num_rows); row++)
320 {
321 y[row] = initialize(y[row]);
322
323 // process subdiagonal blocks
324 for (IndexType d = 0; d < num_diagonals-d0; d++)
325 {
326 const IndexType diag = num_diagonals-d-1;
327 const IndexType col = block_size*(row/block_size) - A.diagonal_offsets[diag]*block_size;
328 if (col >= 0 && col <= num_rows-block_size)
329 {
330 // for each column in block
331 for (IndexType i = 0; i < block_size; i++)
332 {
333 const ValueType& Aij = A.values(col+i, diag*block_size+row%block_size);
334 const ValueType& xj = x[col + i];
335
336 y[row] = reduce(y[row], combine(Aij, xj));
337 }
338 }
339 }
340 // process main and upper diagonal blocks
341 for (IndexType d = 0; d < num_diagonals; d++)
342 {
343 const IndexType col = block_size*(row/block_size) + A.diagonal_offsets[d]*block_size;
344 if (col >= 0 && col <= num_rows-block_size)
345 {
346 // for each column in block
347 for (IndexType i = 0; i < block_size; i++)
348 {
349 const ValueType& Aij = A.values(row, d*block_size+i);
350 const ValueType& xj = x[col + i];
351
352 y[row] = reduce(y[row], combine(Aij, xj));
353 }
354 }
355 }
356 }
357 }
358 } else { // A.symmetric
359 #pragma omp parallel for
360 for(IndexType ch = 0; ch < num_rows; ch+=chunksize)
361 {
362 for (IndexType row = ch; row<std::min(ch+chunksize,num_rows); row++)
363 {
364 y[row] = initialize(y[row]);
365 }
366
367 // for each diagonal block
368 for(IndexType d = 0; d < num_diagonals; d++)
369 {
370 const IndexType k = A.diagonal_offsets[d]*block_size;
371
372 for(IndexType row=ch; row<std::min(ch+chunksize,num_rows); row+=block_size)
373 {
374 const IndexType col = row + k;
375 if (col >= 0 && col <= num_rows-block_size)
376 {
377 // for each column in block
378 for (IndexType i = 0; i < block_size; i++)
379 {
380 // for each row in block
381 for (IndexType j = 0; j < block_size; j++)
382 {
383 const ValueType& Aij = A.values(row+j, d*block_size+i);
384 const ValueType& xj = x[col + i];
385
386 y[row+j] = reduce(y[row+j], combine(Aij, xj));
387 }
388 }
389 }
390 }
391 }
392 }
393 }
394 }
395 }
396
397 template <typename Matrix,
398 typename Vector1,
399 typename Vector2>
400 void spmv_cds(const Matrix& A,
401 const Vector1& x,
402 Vector2& y)
403 {
404 typedef typename Vector2::value_type ValueType;
405
406 if (A.block_size == 1) {
407 spmv_dia(A, x, y,
408 cusp::detail::zero_function<ValueType>(),
409 thrust::multiplies<ValueType>(),
410 thrust::plus<ValueType>());
411 } else {
412 spmv_cds(A, x, y,
413 cusp::detail::zero_function<ValueType>(),
414 thrust::multiplies<ValueType>(),
415 thrust::plus<ValueType>());
416 }
417 }
418
419
420 //////////////
421 // ELL SpMV //
422 //////////////
423 template <typename Matrix,
424 typename Vector1,
425 typename Vector2,
426 typename UnaryFunction,
427 typename BinaryFunction1,
428 typename BinaryFunction2>
429 void spmv_ell(const Matrix& A,
430 const Vector1& x,
431 Vector2& y,
432 UnaryFunction initialize,
433 BinaryFunction1 combine,
434 BinaryFunction2 reduce)
435 {
436 typedef typename Matrix::index_type IndexType;
437 typedef typename Vector2::value_type ValueType;
438
439 const size_t& num_entries_per_row = A.column_indices.num_cols;
440
441 const IndexType invalid_index = Matrix::invalid_index;
442
443 for(size_t i = 0; i < A.num_rows; i++)
444 y[i] = initialize(y[i]);
445
446 for(size_t n = 0; n < num_entries_per_row; n++)
447 {
448 for(size_t i = 0; i < A.num_rows; i++)
449 {
450 const IndexType& j = A.column_indices(i, n);
451 const ValueType& Aij = A.values(i,n);
452
453 if (j != invalid_index)
454 {
455 const ValueType& xj = x[j];
456 y[i] = reduce(y[i], combine(Aij, xj));
457 }
458 }
459 }
460 }
461
462
463 template <typename Matrix,
464 typename Vector1,
465 typename Vector2>
466 void spmv_ell(const Matrix& A,
467 const Vector1& x,
468 Vector2& y)
469 {
470 typedef typename Vector2::value_type ValueType;
471
472 spmv_ell(A, x, y,
473 cusp::detail::zero_function<ValueType>(),
474 thrust::multiplies<ValueType>(),
475 thrust::plus<ValueType>());
476 }
477
478 } // end namespace host
479 } // end namespace detail
480 } // end namespace cusp
481
482

  ViewVC Help
Powered by ViewVC 1.1.26