/[escript]/trunk/cusplibrary/cusp/detail/host/transposed_spmv.h
ViewVC logotype

Contents of /trunk/cusplibrary/cusp/detail/host/transposed_spmv.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5148 - (show annotations)
Mon Sep 15 01:25:23 2014 UTC (4 years, 11 months ago) by caltinay
File MIME type: text/plain
File size: 6252 byte(s)
Merging ripley diagonal storage + CUDA support into trunk.
Options file version has been incremented due to new options
'cuda' and 'nvccflags'.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #pragma once
18
19 #include <thrust/functional.h>
20 #include <cusp/detail/functional.h>
21
22 #ifndef DIA_CHUNKSIZE
23 #define DIA_CHUNKSIZE 1024
24 #endif
25
26 namespace cusp
27 {
28 namespace detail
29 {
30 namespace host
31 {
32
33 /////////////////////////
34 // DIA transposed SpMV //
35 /////////////////////////
36 template <typename Matrix,
37 typename Vector1,
38 typename Vector2,
39 typename UnaryFunction,
40 typename BinaryFunction1,
41 typename BinaryFunction2>
42 void transposed_spmv_dia(const Matrix& A,
43 const Vector1& x,
44 Vector2& y,
45 UnaryFunction initialize,
46 BinaryFunction1 combine,
47 BinaryFunction2 reduce)
48 {
49 typedef typename Matrix::index_type IndexType;
50 //typedef typename Vector2::value_type ValueType;
51
52 const size_t num_diagonals = A.values.num_cols;
53
54 #pragma omp parallel for
55 for (size_t ch = 0; ch < A.num_cols; ch += DIA_CHUNKSIZE) {
56 // initialize chunk
57 for (size_t row = ch; row < std::min(ch+DIA_CHUNKSIZE,A.num_cols); row++)
58 {
59 y[row] = initialize(y[row]);
60 }
61 // for each diagonal
62 for (size_t d = 0; d < num_diagonals; d++)
63 {
64 for (IndexType row=ch; row<std::min(ch+DIA_CHUNKSIZE,A.num_cols); row++)
65 {
66 const IndexType col = row - A.diagonal_offsets[d];
67 if (col >= 0 && col < A.num_rows)
68 {
69 y[row] = reduce(y[row], combine(A.values(col, d), x[col]));
70 }
71 }
72 }
73 }
74 }
75
76 template <typename Matrix,
77 typename Vector1,
78 typename Vector2>
79 void transposed_spmv_dia(const Matrix& A,
80 const Vector1& x,
81 Vector2& y)
82 {
83 typedef typename Vector2::value_type ValueType;
84
85 transposed_spmv_dia(A, x, y,
86 cusp::detail::zero_function<ValueType>(),
87 thrust::multiplies<ValueType>(),
88 thrust::plus<ValueType>());
89 }
90
91 template <typename Matrix,
92 typename Vector1,
93 typename Vector2,
94 typename UnaryFunction,
95 typename BinaryFunction1,
96 typename BinaryFunction2>
97 void transposed_spmv_cds(const Matrix& A,
98 const Vector1& x,
99 Vector2& y,
100 UnaryFunction initialize,
101 BinaryFunction1 combine,
102 BinaryFunction2 reduce)
103 {
104 typedef typename Matrix::index_type IndexType;
105 typedef typename Vector2::value_type ValueType;
106
107 const IndexType num_diagonals = A.diagonal_offsets.size();
108 const IndexType block_size = (IndexType)A.block_size;
109 const IndexType num_cols = (IndexType)A.num_cols;
110 // make chunksize a multiple of block_size
111 const IndexType chunksize = block_size*(DIA_CHUNKSIZE/block_size);
112
113 // optimisation for special case
114 if (block_size == 2) {
115 #pragma omp parallel for
116 for (IndexType ch = 0; ch < num_cols; ch += chunksize) {
117 for (IndexType row = ch; row < std::min(ch+chunksize,num_cols); row+=2)
118 {
119 ValueType sum1 = initialize(y[row]);
120 ValueType sum2 = initialize(y[row+1]);
121 // for each diagonal block
122 for (IndexType d = 0; d < num_diagonals; d++)
123 {
124 const IndexType col = row - A.diagonal_offsets[d]*2;
125 if (col >= 0 && col < A.num_rows)
126 {
127 sum1 = reduce(sum1, combine(A.values(col, 2*d), x[col]));
128 sum2 = reduce(sum2, combine(A.values(col, 2*d+1),x[col]));
129 sum1 = reduce(sum1, combine(A.values(col+1, 2*d), x[col+1]));
130 sum2 = reduce(sum2, combine(A.values(col+1, 2*d+1),x[col+1]));
131 }
132 }
133 y[row] = sum1;
134 y[row+1] = sum2;
135 }
136 }
137 } else {
138 #pragma omp parallel for
139 for (IndexType ch = 0; ch < num_cols; ch += chunksize) {
140 for (IndexType row = ch; row < std::min(ch+chunksize,num_cols); row++)
141 {
142 y[row] = initialize(y[row]);
143 }
144
145 // for each diagonal block
146 for (IndexType d = 0; d < num_diagonals; d++)
147 {
148 const IndexType k = A.diagonal_offsets[d]*block_size;
149 for (IndexType row=ch; row<std::min(ch+chunksize,num_cols); row+=block_size)
150 {
151 const IndexType col = row - k;
152 if (col >= 0 && col < A.num_rows-block_size)
153 {
154 //y[row] = reduce(y[row], combine(A.values(col, d), x[col]));
155 }
156 }
157 }
158 }
159 }
160 }
161
162 template <typename Matrix,
163 typename Vector1,
164 typename Vector2>
165 void transposed_spmv_cds(const Matrix& A,
166 const Vector1& x,
167 Vector2& y)
168 {
169 typedef typename Vector2::value_type ValueType;
170
171 if (A.block_size == 1) {
172 transposed_spmv_dia(A, x, y,
173 cusp::detail::zero_function<ValueType>(),
174 thrust::multiplies<ValueType>(),
175 thrust::plus<ValueType>());
176 } else {
177 transposed_spmv_cds(A, x, y,
178 cusp::detail::zero_function<ValueType>(),
179 thrust::multiplies<ValueType>(),
180 thrust::plus<ValueType>());
181 }
182 }
183
184 } // end namespace host
185 } // end namespace detail
186 } // end namespace cusp
187
188

  ViewVC Help
Powered by ViewVC 1.1.26