1 |
caltinay |
4955 |
#pragma once |
2 |
|
|
|
3 |
|
|
#include <string> |
4 |
|
|
#include <iostream> |
5 |
|
|
#include <stdio.h> |
6 |
|
|
|
7 |
|
|
#include "bytes_per_spmv.h" |
8 |
|
|
|
9 |
|
|
#include "../timer.h" |
10 |
|
|
#include <cusp/detail/device/spmv/coo_flat_k.h> |
11 |
|
|
|
12 |
|
|
const char * BENCHMARK_OUTPUT_FILE_NAME = "benchmark_output.log"; |
13 |
|
|
|
14 |
|
|
template <typename HostMatrix, typename TestMatrix, typename TestKernel> |
15 |
|
|
float check_spmv(HostMatrix& host_matrix, TestMatrix& test_matrix, TestKernel test_kernel) |
16 |
|
|
{ |
17 |
|
|
typedef typename TestMatrix::index_type IndexType; // ASSUME same as HostMatrix::index_type |
18 |
|
|
typedef typename TestMatrix::value_type ValueType; // ASSUME same as HostMatrix::value_type |
19 |
|
|
typedef typename TestMatrix::memory_space MemorySpace; |
20 |
|
|
|
21 |
|
|
const IndexType M = host_matrix.num_rows; |
22 |
|
|
const IndexType N = host_matrix.num_cols; |
23 |
|
|
|
24 |
|
|
// create host input (x) and output (y) vectors |
25 |
|
|
cusp::array1d<ValueType,cusp::host_memory> host_x(N); |
26 |
|
|
cusp::array1d<ValueType,cusp::host_memory> host_y(M); |
27 |
|
|
//for(IndexType i = 0; i < N; i++) host_x[i] = (rand() % 21) - 10; |
28 |
|
|
for(IndexType i = 0; i < N; i++) host_x[i] = (int(i % 21) - 10); |
29 |
|
|
for(IndexType i = 0; i < M; i++) host_y[i] = 0; |
30 |
|
|
|
31 |
|
|
// create test input (x) and output (y) vectors |
32 |
|
|
cusp::array1d<ValueType, MemorySpace> test_x(host_x.begin(), host_x.end()); |
33 |
|
|
cusp::array1d<ValueType, MemorySpace> test_y(host_y.begin(), host_y.end()); |
34 |
|
|
|
35 |
|
|
// compute SpMV on host and device |
36 |
|
|
cusp::multiply(host_matrix, host_x, host_y); |
37 |
|
|
test_kernel(test_matrix, thrust::raw_pointer_cast(&test_x[0]), thrust::raw_pointer_cast(&test_y[0])); |
38 |
|
|
|
39 |
|
|
// compare results |
40 |
|
|
cusp::array1d<ValueType,cusp::host_memory> test_y_copy(test_y.begin(), test_y.end()); |
41 |
|
|
double error = l2_error(M, thrust::raw_pointer_cast(&test_y_copy[0]), thrust::raw_pointer_cast(&host_y[0])); |
42 |
|
|
|
43 |
|
|
// if (error > 0.0001) |
44 |
|
|
// { |
45 |
|
|
// for(int i = 0; i < std::min<int>(N,256); i++) |
46 |
|
|
// printf("host_x[%5d] = %10.8f\n", i, (float) host_x[i]); |
47 |
|
|
// |
48 |
|
|
// int limit = 256; |
49 |
|
|
// for(int i = 0; i < M; i++) |
50 |
|
|
// { |
51 |
|
|
// if(std::abs(host_y[i] - test_y[i]) > 0.0) |
52 |
|
|
// { |
53 |
|
|
// printf("host_y[%5d] = %10.8f test_y[%5d] = %10.8f\n", i, (float) host_y[i], i, (float) test_y[i]); |
54 |
|
|
// |
55 |
|
|
// if(--limit <= 0) |
56 |
|
|
// break; |
57 |
|
|
// } |
58 |
|
|
// } |
59 |
|
|
// } |
60 |
|
|
|
61 |
|
|
return error; |
62 |
|
|
} |
63 |
|
|
|
64 |
|
|
|
65 |
|
|
template <typename TestMatrix, typename TestKernel> |
66 |
|
|
float time_spmv(TestMatrix& test_matrix, TestKernel test_spmv, double seconds = 3.0, size_t min_iterations = 100, size_t max_iterations = 500) |
67 |
|
|
{ |
68 |
|
|
typedef typename TestMatrix::index_type IndexType; // ASSUME same as HostMatrix::index_type |
69 |
|
|
typedef typename TestMatrix::value_type ValueType; // ASSUME same as HostMatrix::value_type |
70 |
|
|
typedef typename TestMatrix::memory_space MemorySpace; |
71 |
|
|
|
72 |
|
|
const IndexType M = test_matrix.num_rows; |
73 |
|
|
const IndexType N = test_matrix.num_cols; |
74 |
|
|
|
75 |
|
|
// create test input (x) and output (y) vectors |
76 |
|
|
cusp::array1d<ValueType, MemorySpace> test_x(N); |
77 |
|
|
cusp::array1d<ValueType, MemorySpace> test_y(M); |
78 |
|
|
|
79 |
|
|
// warmup |
80 |
|
|
timer time_one_iteration; |
81 |
|
|
test_spmv(test_matrix, thrust::raw_pointer_cast(&test_x[0]), thrust::raw_pointer_cast(&test_y[0])); |
82 |
|
|
cudaThreadSynchronize(); |
83 |
|
|
double estimated_time = time_one_iteration.seconds_elapsed(); |
84 |
|
|
|
85 |
|
|
// determine # of seconds dynamically |
86 |
|
|
size_t num_iterations; |
87 |
|
|
if (estimated_time == 0) |
88 |
|
|
num_iterations = max_iterations; |
89 |
|
|
else |
90 |
|
|
num_iterations = std::min(max_iterations, std::max(min_iterations, (size_t) (seconds / estimated_time)) ); |
91 |
|
|
|
92 |
|
|
// time several SpMV iterations |
93 |
|
|
timer t; |
94 |
|
|
for(size_t i = 0; i < num_iterations; i++) |
95 |
|
|
test_spmv(test_matrix, thrust::raw_pointer_cast(&test_x[0]), thrust::raw_pointer_cast(&test_y[0])); |
96 |
|
|
cudaThreadSynchronize(); |
97 |
|
|
|
98 |
|
|
float sec_per_iteration = t.seconds_elapsed() / num_iterations; |
99 |
|
|
|
100 |
|
|
return sec_per_iteration; |
101 |
|
|
} |
102 |
|
|
|
103 |
|
|
|
104 |
|
|
template <typename HostMatrix, typename TestMatrixOnHost, typename TestMatrixOnDevice, typename TestKernel> |
105 |
|
|
void test_spmv(std::string kernel_name, |
106 |
|
|
HostMatrix& host_matrix, |
107 |
|
|
TestMatrixOnHost& test_matrix_on_host, |
108 |
|
|
TestMatrixOnDevice& test_matrix_on_device, |
109 |
|
|
TestKernel test_spmv) |
110 |
|
|
{ |
111 |
|
|
float error = check_spmv(host_matrix, test_matrix_on_device, test_spmv); |
112 |
|
|
float time = time_spmv( test_matrix_on_device, test_spmv); |
113 |
|
|
float gbyte = bytes_per_spmv(test_matrix_on_host); |
114 |
|
|
|
115 |
|
|
float GFLOPs = (time == 0) ? 0 : (2 * host_matrix.num_entries / time) / 1e9; |
116 |
|
|
float GBYTEs = (time == 0) ? 0 : (gbyte / time) / 1e9; |
117 |
|
|
|
118 |
|
|
printf("\t%-20s: %8.4f ms ( %5.2f GFLOP/s %5.1f GB/s) [L2 error %f]\n", kernel_name.c_str(), 1e3 * time, GFLOPs, GBYTEs, error); |
119 |
|
|
|
120 |
|
|
//record results to file |
121 |
|
|
FILE * fid = fopen(BENCHMARK_OUTPUT_FILE_NAME, "a"); |
122 |
|
|
fprintf(fid, "kernel=%s gflops=%f gbytes=%f msec=%f\n", kernel_name.c_str(), GFLOPs, GBYTEs, 1e3 * time); |
123 |
|
|
fclose(fid); |
124 |
|
|
} |
125 |
|
|
|
126 |
|
|
|
127 |
|
|
///////////////////////////////////////////////////// |
128 |
|
|
// These methods test specific formats and kernels // |
129 |
|
|
///////////////////////////////////////////////////// |
130 |
|
|
|
131 |
|
|
template <typename HostMatrix> |
132 |
|
|
void test_coo(HostMatrix& host_matrix) |
133 |
|
|
{ |
134 |
|
|
typedef typename HostMatrix::index_type IndexType; |
135 |
|
|
typedef typename HostMatrix::value_type ValueType; |
136 |
|
|
|
137 |
|
|
// convert HostMatrix to TestMatrix on host |
138 |
|
|
cusp::coo_matrix<IndexType, ValueType, cusp::host_memory> test_matrix_on_host(host_matrix); |
139 |
|
|
|
140 |
|
|
// transfer TestMatrix to device |
141 |
|
|
typedef typename cusp::coo_matrix<IndexType, ValueType, cusp::device_memory> DeviceMatrix; |
142 |
|
|
DeviceMatrix test_matrix_on_device(test_matrix_on_host); |
143 |
|
|
|
144 |
|
|
test_spmv("coo_flat", host_matrix, test_matrix_on_host, test_matrix_on_device, cusp::detail::device::spmv_coo_flat <DeviceMatrix,ValueType>); |
145 |
|
|
test_spmv("coo_flat_tex", host_matrix, test_matrix_on_host, test_matrix_on_device, cusp::detail::device::spmv_coo_flat_tex<DeviceMatrix,ValueType>); |
146 |
|
|
|
147 |
|
|
// test_spmv("coo_flat_k", host_matrix, test_matrix_on_host, test_matrix_on_device, cusp::detail::device::spmv_coo_flat_k <DeviceMatrix,ValueType>); |
148 |
|
|
// test_spmv("coo_flat_k_tex", host_matrix, test_matrix_on_host, test_matrix_on_device, cusp::detail::device::spmv_coo_flat_k_tex<DeviceMatrix,ValueType>); |
149 |
|
|
} |
150 |
|
|
|
151 |
|
|
template <typename HostMatrix> |
152 |
|
|
void test_csr(HostMatrix& host_matrix) |
153 |
|
|
{ |
154 |
|
|
typedef typename HostMatrix::index_type IndexType; |
155 |
|
|
typedef typename HostMatrix::value_type ValueType; |
156 |
|
|
|
157 |
|
|
// convert HostMatrix to TestMatrix on host |
158 |
|
|
cusp::csr_matrix<IndexType, ValueType, cusp::host_memory> test_matrix_on_host(host_matrix); |
159 |
|
|
|
160 |
|
|
// transfer csr_matrix to device |
161 |
|
|
typedef typename cusp::csr_matrix<IndexType, ValueType, cusp::device_memory> DeviceMatrix; |
162 |
|
|
DeviceMatrix test_matrix_on_device(test_matrix_on_host); |
163 |
|
|
|
164 |
|
|
test_spmv("csr_scalar", host_matrix, test_matrix_on_host, test_matrix_on_device, cusp::detail::device::spmv_csr_scalar <DeviceMatrix,ValueType>); |
165 |
|
|
test_spmv("csr_scalar_tex", host_matrix, test_matrix_on_host, test_matrix_on_device, cusp::detail::device::spmv_csr_scalar_tex<DeviceMatrix,ValueType>); |
166 |
|
|
test_spmv("csr_vector", host_matrix, test_matrix_on_host, test_matrix_on_device, cusp::detail::device::spmv_csr_vector <DeviceMatrix,ValueType>); |
167 |
|
|
test_spmv("csr_vector_tex", host_matrix, test_matrix_on_host, test_matrix_on_device, cusp::detail::device::spmv_csr_vector_tex<DeviceMatrix,ValueType>); |
168 |
|
|
} |
169 |
|
|
|
170 |
|
|
template <typename HostMatrix> |
171 |
|
|
void test_dia(HostMatrix& host_matrix) |
172 |
|
|
{ |
173 |
|
|
typedef typename HostMatrix::index_type IndexType; |
174 |
|
|
typedef typename HostMatrix::value_type ValueType; |
175 |
|
|
|
176 |
|
|
// convert HostMatrix to TestMatrix on host |
177 |
|
|
cusp::dia_matrix<IndexType, ValueType, cusp::host_memory> test_matrix_on_host; |
178 |
|
|
|
179 |
|
|
try |
180 |
|
|
{ |
181 |
|
|
test_matrix_on_host = host_matrix; |
182 |
|
|
} |
183 |
|
|
catch (cusp::format_conversion_exception) |
184 |
|
|
{ |
185 |
|
|
std::cout << "\tRefusing to convert to DIA format" << std::endl; |
186 |
|
|
return; |
187 |
|
|
} |
188 |
|
|
|
189 |
|
|
// transfer TestMatrix to device |
190 |
|
|
typedef typename cusp::dia_matrix<IndexType, ValueType, cusp::device_memory> DeviceMatrix; |
191 |
|
|
DeviceMatrix test_matrix_on_device(test_matrix_on_host); |
192 |
|
|
|
193 |
|
|
test_spmv("dia", host_matrix, test_matrix_on_host, test_matrix_on_device, cusp::detail::device::spmv_dia <DeviceMatrix,ValueType>); |
194 |
|
|
test_spmv("dia_tex", host_matrix, test_matrix_on_host, test_matrix_on_device, cusp::detail::device::spmv_dia_tex<DeviceMatrix,ValueType>); |
195 |
|
|
} |
196 |
|
|
|
197 |
|
|
template <typename HostMatrix> |
198 |
|
|
void test_ell(HostMatrix& host_matrix) |
199 |
|
|
{ |
200 |
|
|
typedef typename HostMatrix::index_type IndexType; |
201 |
|
|
typedef typename HostMatrix::value_type ValueType; |
202 |
|
|
|
203 |
|
|
// convert HostMatrix to TestMatrix on host |
204 |
|
|
cusp::ell_matrix<IndexType, ValueType, cusp::host_memory> test_matrix_on_host; |
205 |
|
|
|
206 |
|
|
try |
207 |
|
|
{ |
208 |
|
|
test_matrix_on_host = host_matrix; |
209 |
|
|
} |
210 |
|
|
catch (cusp::format_conversion_exception) |
211 |
|
|
{ |
212 |
|
|
std::cout << "\tRefusing to convert to ELL format" << std::endl; |
213 |
|
|
return; |
214 |
|
|
} |
215 |
|
|
|
216 |
|
|
// transfer TestMatrix to device |
217 |
|
|
typedef typename cusp::ell_matrix<IndexType, ValueType, cusp::device_memory> DeviceMatrix; |
218 |
|
|
DeviceMatrix test_matrix_on_device(test_matrix_on_host); |
219 |
|
|
|
220 |
|
|
test_spmv("ell", host_matrix, test_matrix_on_host, test_matrix_on_device, cusp::detail::device::spmv_ell <DeviceMatrix,ValueType>); |
221 |
|
|
test_spmv("ell_tex", host_matrix, test_matrix_on_host, test_matrix_on_device, cusp::detail::device::spmv_ell_tex<DeviceMatrix,ValueType>); |
222 |
|
|
} |
223 |
|
|
|
224 |
|
|
template <typename HostMatrix> |
225 |
|
|
void test_hyb(HostMatrix& host_matrix) |
226 |
|
|
{ |
227 |
|
|
typedef typename HostMatrix::index_type IndexType; |
228 |
|
|
typedef typename HostMatrix::value_type ValueType; |
229 |
|
|
|
230 |
|
|
// convert HostMatrix to TestMatrix on host |
231 |
|
|
cusp::hyb_matrix<IndexType, ValueType, cusp::host_memory> test_matrix_on_host(host_matrix); |
232 |
|
|
|
233 |
|
|
// transfer TestMatrix to device |
234 |
|
|
typedef typename cusp::hyb_matrix<IndexType, ValueType, cusp::device_memory> DeviceMatrix; |
235 |
|
|
DeviceMatrix test_matrix_on_device(test_matrix_on_host); |
236 |
|
|
|
237 |
|
|
test_spmv("hyb", host_matrix, test_matrix_on_host, test_matrix_on_device, cusp::detail::device::spmv_hyb <DeviceMatrix,ValueType>); |
238 |
|
|
test_spmv("hyb_tex", host_matrix, test_matrix_on_host, test_matrix_on_device, cusp::detail::device::spmv_hyb_tex<DeviceMatrix,ValueType>); |
239 |
|
|
} |
240 |
|
|
|