/[escript]/trunk/cusplibrary/cusp/detail/host/transposed_spmv.h
ViewVC logotype

Contents of /trunk/cusplibrary/cusp/detail/host/transposed_spmv.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5221 - (show annotations)
Thu Oct 23 04:01:49 2014 UTC (7 years, 9 months ago) by caltinay
File MIME type: text/plain
File size: 6545 byte(s)
Actually implement transposed SpMV for blocksizes >=3

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #pragma once
18
19 #include <thrust/functional.h>
20 #include <cusp/detail/functional.h>
21
22 #ifndef DIA_CHUNKSIZE
23 #define DIA_CHUNKSIZE 1024
24 #endif
25
26 namespace cusp
27 {
28 namespace detail
29 {
30 namespace host
31 {
32
33 /////////////////////////
34 // DIA transposed SpMV //
35 /////////////////////////
36 template <typename Matrix,
37 typename Vector1,
38 typename Vector2,
39 typename UnaryFunction,
40 typename BinaryFunction1,
41 typename BinaryFunction2>
42 void transposed_spmv_dia(const Matrix& A,
43 const Vector1& x,
44 Vector2& y,
45 UnaryFunction initialize,
46 BinaryFunction1 combine,
47 BinaryFunction2 reduce)
48 {
49 typedef typename Matrix::index_type IndexType;
50 //typedef typename Vector2::value_type ValueType;
51
52 const size_t num_diagonals = A.values.num_cols;
53
54 #pragma omp parallel for
55 for (size_t ch = 0; ch < A.num_cols; ch += DIA_CHUNKSIZE) {
56 // initialize chunk
57 for (size_t row = ch; row < std::min(ch+DIA_CHUNKSIZE,A.num_cols); row++)
58 {
59 y[row] = initialize(y[row]);
60 }
61 // for each diagonal
62 for (size_t d = 0; d < num_diagonals; d++)
63 {
64 for (IndexType row=ch; row<std::min(ch+DIA_CHUNKSIZE,A.num_cols); row++)
65 {
66 const IndexType col = row - A.diagonal_offsets[d];
67 if (col >= 0 && col < A.num_rows)
68 {
69 y[row] = reduce(y[row], combine(A.values(col, d), x[col]));
70 }
71 }
72 }
73 }
74 }
75
76 template <typename Matrix,
77 typename Vector1,
78 typename Vector2>
79 void transposed_spmv_dia(const Matrix& A,
80 const Vector1& x,
81 Vector2& y)
82 {
83 typedef typename Vector2::value_type ValueType;
84
85 transposed_spmv_dia(A, x, y,
86 cusp::detail::zero_function<ValueType>(),
87 thrust::multiplies<ValueType>(),
88 thrust::plus<ValueType>());
89 }
90
91 template <typename Matrix,
92 typename Vector1,
93 typename Vector2,
94 typename UnaryFunction,
95 typename BinaryFunction1,
96 typename BinaryFunction2>
97 void transposed_spmv_cds(const Matrix& A,
98 const Vector1& x,
99 Vector2& y,
100 UnaryFunction initialize,
101 BinaryFunction1 combine,
102 BinaryFunction2 reduce)
103 {
104 typedef typename Matrix::index_type IndexType;
105 typedef typename Vector2::value_type ValueType;
106
107 const IndexType num_diagonals = A.diagonal_offsets.size();
108 const IndexType block_size = (IndexType)A.block_size;
109 const IndexType num_cols = (IndexType)A.num_cols;
110 // make chunksize a multiple of block_size
111 const IndexType chunksize = block_size*(DIA_CHUNKSIZE/block_size);
112
113 // optimisation for special case
114 if (block_size == 2) {
115 #pragma omp parallel for
116 for (IndexType ch = 0; ch < num_cols; ch += chunksize) {
117 for (IndexType row = ch; row < std::min(ch+chunksize,num_cols); row+=2)
118 {
119 ValueType sum1 = initialize(y[row]);
120 ValueType sum2 = initialize(y[row+1]);
121 // for each diagonal block
122 for (IndexType d = 0; d < num_diagonals; d++)
123 {
124 const IndexType col = row - A.diagonal_offsets[d]*2;
125 if (col >= 0 && col < A.num_rows)
126 {
127 sum1 = reduce(sum1, combine(A.values(col, 2*d), x[col]));
128 sum2 = reduce(sum2, combine(A.values(col, 2*d+1),x[col]));
129 sum1 = reduce(sum1, combine(A.values(col+1, 2*d), x[col+1]));
130 sum2 = reduce(sum2, combine(A.values(col+1, 2*d+1),x[col+1]));
131 }
132 }
133 y[row] = sum1;
134 y[row+1] = sum2;
135 }
136 }
137 } else { // block_size!=2
138 #pragma omp parallel for
139 for (IndexType ch = 0; ch < num_cols; ch += chunksize) {
140 for (IndexType row = ch; row < std::min(ch+chunksize,num_cols); row++)
141 {
142 y[row] = initialize(y[row]);
143
144 // for each diagonal block
145 for (IndexType d = 0; d < num_diagonals; d++)
146 {
147 const IndexType k = A.diagonal_offsets[d]*block_size;
148 const IndexType col = block_size*(row/block_size) - k;
149 if (col >= 0 && col <= A.num_rows-block_size)
150 {
151 // for each column in block
152 for (IndexType i = 0; i < block_size; i++)
153 {
154 const ValueType& Aij = A.values(col+i, d*block_size+row%block_size);
155 const ValueType& xj = x[col + i];
156 y[row] = reduce(y[row], combine(Aij, xj));
157 }
158 }
159 } // diagonals
160 } // rows
161 } // chunks
162 } // block_size
163 }
164
165 template <typename Matrix,
166 typename Vector1,
167 typename Vector2>
168 void transposed_spmv_cds(const Matrix& A,
169 const Vector1& x,
170 Vector2& y)
171 {
172 typedef typename Vector2::value_type ValueType;
173
174 if (A.block_size == 1) {
175 transposed_spmv_dia(A, x, y,
176 cusp::detail::zero_function<ValueType>(),
177 thrust::multiplies<ValueType>(),
178 thrust::plus<ValueType>());
179 } else {
180 transposed_spmv_cds(A, x, y,
181 cusp::detail::zero_function<ValueType>(),
182 thrust::multiplies<ValueType>(),
183 thrust::plus<ValueType>());
184 }
185 }
186
187 } // end namespace host
188 } // end namespace detail
189 } // end namespace cusp
190
191

  ViewVC Help
Powered by ViewVC 1.1.26