/[escript]/branches/diaplayground/cusplibrary/cusp/detail/host/conversion.h
ViewVC logotype

Contents of /branches/diaplayground/cusplibrary/cusp/detail/host/conversion.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4956 - (show annotations)
Tue May 20 12:34:41 2014 UTC (5 years ago) by caltinay
File MIME type: text/plain
File size: 18660 byte(s)
minor changes to cusp to allow compilation with -pedantic and -Werror.
One bug fix in spmv_dia.

1 /*
2 * Copyright 2008-2009 NVIDIA Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18
19 #pragma once
20
21 #include <cusp/ell_matrix.h>
22 #include <cusp/exception.h>
23
24 #include <cusp/detail/host/conversion_utils.h>
25
26 #include <thrust/fill.h>
27 #include <thrust/extrema.h>
28 #include <thrust/count.h>
29
30 namespace cusp
31 {
32 namespace detail
33 {
34 namespace host
35 {
36
37 /////////////////////
38 // COO Conversions //
39 /////////////////////
40
41 template <typename Matrix1, typename Matrix2>
42 void coo_to_csr(const Matrix1& src, Matrix2& dst)
43 {
44 typedef typename Matrix2::index_type IndexType;
45 //typedef typename Matrix2::value_type ValueType;
46
47 dst.resize(src.num_rows, src.num_cols, src.num_entries);
48
49 // compute number of non-zero entries per row of A
50 thrust::fill(dst.row_offsets.begin(), dst.row_offsets.end(), IndexType(0));
51
52 for (size_t n = 0; n < src.num_entries; n++)
53 dst.row_offsets[src.row_indices[n]]++;
54
55 // cumsum the num_entries per row to get dst.row_offsets[]
56 IndexType cumsum = 0;
57 for(size_t i = 0; i < src.num_rows; i++)
58 {
59 IndexType temp = dst.row_offsets[i];
60 dst.row_offsets[i] = cumsum;
61 cumsum += temp;
62 }
63 dst.row_offsets[src.num_rows] = cumsum;
64
65 // write Aj,Ax into dst.column_indices,dst.values
66 for(size_t n = 0; n < src.num_entries; n++)
67 {
68 IndexType row = src.row_indices[n];
69 IndexType dest = dst.row_offsets[row];
70
71 dst.column_indices[dest] = src.column_indices[n];
72 dst.values[dest] = src.values[n];
73
74 dst.row_offsets[row]++;
75 }
76
77 IndexType last = 0;
78 for(size_t i = 0; i <= src.num_rows; i++)
79 {
80 IndexType temp = dst.row_offsets[i];
81 dst.row_offsets[i] = last;
82 last = temp;
83 }
84
85 //csr may contain duplicates
86 }
87
88 template <typename Matrix1, typename Matrix2>
89 void coo_to_array2d(const Matrix1& src, Matrix2& dst)
90 {
91 //typedef typename Matrix2::index_type IndexType;
92 typedef typename Matrix2::value_type ValueType;
93
94 dst.resize(src.num_rows, src.num_cols);
95
96 thrust::fill(dst.values.begin(), dst.values.end(), ValueType(0));
97
98 for(size_t n = 0; n < src.num_entries; n++)
99 dst(src.row_indices[n], src.column_indices[n]) += src.values[n]; //sum duplicates
100 }
101
102 /////////////////////
103 // CSR Conversions //
104 /////////////////////
105
106 template <typename Matrix1, typename Matrix2>
107 void csr_to_coo(const Matrix1& src, Matrix2& dst)
108 {
109 typedef typename Matrix2::index_type IndexType;
110 //typedef typename Matrix2::value_type ValueType;
111
112 dst.resize(src.num_rows, src.num_cols, src.num_entries);
113
114 // TODO replace with offsets_to_indices
115 for(size_t i = 0; i < src.num_rows; i++)
116 for(IndexType jj = src.row_offsets[i]; jj < src.row_offsets[i + 1]; jj++)
117 dst.row_indices[jj] = i;
118
119 cusp::copy(src.column_indices, dst.column_indices);
120 cusp::copy(src.values, dst.values);
121 }
122
123 template <typename Matrix1, typename Matrix2>
124 void csr_to_dia(const Matrix1& src, Matrix2& dst,
125 const size_t alignment = 32)
126 {
127 typedef typename Matrix2::index_type IndexType;
128 typedef typename Matrix2::value_type ValueType;
129
130 // compute number of occupied diagonals and enumerate them
131 size_t num_diagonals = 0;
132
133 cusp::array1d<IndexType,cusp::host_memory> diag_map(src.num_rows + src.num_cols, 0);
134
135 for(size_t i = 0; i < src.num_rows; i++)
136 {
137 for(IndexType jj = src.row_offsets[i]; jj < src.row_offsets[i+1]; jj++)
138 {
139 size_t j = src.column_indices[jj];
140 size_t map_index = (src.num_rows - i) + j; //offset shifted by + num_rows
141
142 if(diag_map[map_index] == 0)
143 {
144 diag_map[map_index] = 1;
145 num_diagonals++;
146 }
147 }
148 }
149
150
151 // allocate DIA structure
152 dst.resize(src.num_rows, src.num_cols, src.num_entries, num_diagonals, alignment);
153
154 // fill in diagonal_offsets array
155 for(size_t n = 0, diag = 0; n < src.num_rows + src.num_cols; n++)
156 {
157 if(diag_map[n] == 1)
158 {
159 diag_map[n] = diag;
160 dst.diagonal_offsets[diag] = (IndexType) n - (IndexType) src.num_rows;
161 diag++;
162 }
163 }
164
165 // fill in values array
166 thrust::fill(dst.values.values.begin(), dst.values.values.end(), ValueType(0));
167
168 for(size_t i = 0; i < src.num_rows; i++)
169 {
170 for(IndexType jj = src.row_offsets[i]; jj < src.row_offsets[i+1]; jj++)
171 {
172 size_t j = src.column_indices[jj];
173 size_t map_index = (src.num_rows - i) + j; //offset shifted by + num_rows
174 size_t diag = diag_map[map_index];
175
176 dst.values(i, diag) = src.values[jj];
177 }
178 }
179 }
180
181
182 template <typename Matrix1, typename Matrix2>
183 void csr_to_hyb(const Matrix1& src, Matrix2& dst,
184 const size_t num_entries_per_row,
185 const size_t alignment = 32)
186 {
187 typedef typename Matrix2::index_type IndexType;
188 typedef typename Matrix2::value_type ValueType;
189
190 // The ELL portion of the HYB matrix will have 'num_entries_per_row' columns.
191 // Nonzero values that do not fit within the ELL structure are placed in the
192 // COO format portion of the HYB matrix.
193
194 // compute number of nonzeros in the ELL and COO portions
195 size_t num_ell_entries = 0;
196 for(size_t i = 0; i < src.num_rows; i++)
197 num_ell_entries += thrust::min<size_t>(num_entries_per_row, src.row_offsets[i+1] - src.row_offsets[i]);
198
199 IndexType num_coo_entries = src.num_entries - num_ell_entries;
200
201 dst.resize(src.num_rows, src.num_cols,
202 num_ell_entries, num_coo_entries,
203 num_entries_per_row, alignment);
204
205 const IndexType invalid_index = cusp::ell_matrix<IndexType, ValueType, cusp::host_memory>::invalid_index;
206
207 // pad out ELL format with zeros
208 thrust::fill(dst.ell.column_indices.values.begin(), dst.ell.column_indices.values.end(), invalid_index);
209 thrust::fill(dst.ell.values.values.begin(), dst.ell.values.values.end(), ValueType(0));
210
211 for(size_t i = 0, coo_nnz = 0; i < src.num_rows; i++)
212 {
213 size_t n = 0;
214 IndexType jj = src.row_offsets[i];
215
216 // copy up to num_cols_per_row values of row i into the ELL
217 while(jj < src.row_offsets[i+1] && n < num_entries_per_row)
218 {
219 dst.ell.column_indices(i,n) = src.column_indices[jj];
220 dst.ell.values(i,n) = src.values[jj];
221 jj++, n++;
222 }
223
224 // copy any remaining values in row i into the COO
225 while(jj < src.row_offsets[i+1])
226 {
227 dst.coo.row_indices[coo_nnz] = i;
228 dst.coo.column_indices[coo_nnz] = src.column_indices[jj];
229 dst.coo.values[coo_nnz] = src.values[jj];
230 jj++; coo_nnz++;
231 }
232 }
233 }
234
235
236 template <typename Matrix1, typename Matrix2>
237 void csr_to_ell(const Matrix1& src, Matrix2& dst,
238 const size_t num_entries_per_row, const size_t alignment = 32)
239 {
240 typedef typename Matrix2::index_type IndexType;
241 typedef typename Matrix2::value_type ValueType;
242
243 // compute number of nonzeros
244 size_t num_entries = 0;
245 #pragma omp parallel for reduction( +: num_entries)
246 for(size_t i = 0; i < src.num_rows; i++)
247 num_entries += thrust::min<size_t>(num_entries_per_row, src.row_offsets[i+1] - src.row_offsets[i]);
248
249 dst.resize(src.num_rows, src.num_cols, num_entries, num_entries_per_row, alignment);
250
251 const IndexType invalid_index = cusp::ell_matrix<IndexType, ValueType, cusp::host_memory>::invalid_index;
252
253 // pad out ELL format with zeros
254 thrust::fill(dst.column_indices.values.begin(), dst.column_indices.values.end(), invalid_index);
255 thrust::fill(dst.values.values.begin(), dst.values.values.end(), ValueType(0));
256
257 #pragma omp parallel for
258 for(size_t i = 0; i < src.num_rows; i++)
259 {
260 size_t n = 0;
261 IndexType jj = src.row_offsets[i];
262
263 // copy up to num_cols_per_row values of row i into the ELL
264 while(jj < src.row_offsets[i+1] && n < num_entries_per_row)
265 {
266 dst.column_indices(i,n) = src.column_indices[jj];
267 dst.values(i,n) = src.values[jj];
268 jj++, n++;
269 }
270 }
271 }
272
273
274 template <typename Matrix1, typename Matrix2>
275 void csr_to_array2d(const Matrix1& src, Matrix2& dst)
276 {
277 typedef typename Matrix2::index_type IndexType;
278 typedef typename Matrix2::value_type ValueType;
279
280 dst.resize(src.num_rows, src.num_cols);
281
282 thrust::fill(dst.values.begin(), dst.values.end(), ValueType(0));
283
284 for(size_t i = 0; i < src.num_rows; i++)
285 for(IndexType jj = src.row_offsets[i]; jj < src.row_offsets[i+1]; jj++)
286 dst(i, src.column_indices[jj]) += src.values[jj]; //sum duplicates
287 }
288
289
290 /////////////////////
291 // DIA Conversions //
292 /////////////////////
293
294 template <typename Matrix1, typename Matrix2>
295 void dia_to_csr(const Matrix1& src, Matrix2& dst)
296 {
297 typedef typename Matrix2::index_type IndexType;
298 typedef typename Matrix2::value_type ValueType;
299
300 size_t num_entries = 0;
301 size_t num_diagonals = src.diagonal_offsets.size();
302
303 // count nonzero entries
304 for(size_t i = 0; i < src.num_rows; i++)
305 {
306 for(size_t n = 0; n < num_diagonals; n++)
307 {
308 const IndexType j = i + src.diagonal_offsets[n];
309
310 if(j >= 0 && static_cast<size_t>(j) < src.num_cols && src.values(i,n) != ValueType(0))
311 num_entries++;
312 }
313 }
314
315 dst.resize(src.num_rows, src.num_cols, num_entries);
316
317 num_entries = 0;
318 dst.row_offsets[0] = 0;
319
320 // copy nonzero entries to CSR structure
321 for(size_t i = 0; i < src.num_rows; i++)
322 {
323 for(size_t n = 0; n < num_diagonals; n++)
324 {
325 const IndexType j = i + src.diagonal_offsets[n];
326
327 if(j >= 0 && static_cast<size_t>(j) < src.num_cols)
328 {
329 const ValueType value = src.values(i, n);
330
331 if (value != ValueType(0))
332 {
333 dst.column_indices[num_entries] = j;
334 dst.values[num_entries] = value;
335 num_entries++;
336 }
337 }
338 }
339
340 dst.row_offsets[i + 1] = num_entries;
341 }
342 }
343
344 /////////////////////
345 // ELL Conversions //
346 /////////////////////
347
348 template <typename Matrix1, typename Matrix2>
349 void ell_to_coo(const Matrix1& src, Matrix2& dst)
350 {
351 typedef typename Matrix2::index_type IndexType;
352 typedef typename Matrix2::value_type ValueType;
353
354 const IndexType invalid_index = cusp::ell_matrix<IndexType, ValueType, cusp::host_memory>::invalid_index;
355
356 dst.resize(src.num_rows, src.num_cols, src.num_entries);
357
358 size_t num_entries = 0;
359
360 const size_t num_entries_per_row = src.column_indices.num_cols;
361
362 for(size_t i = 0; i < src.num_rows; i++)
363 {
364 for(size_t n = 0; n < num_entries_per_row; n++)
365 {
366 const IndexType j = src.column_indices(i,n);
367 const ValueType v = src.values(i,n);
368
369 if(j != invalid_index)
370 {
371 dst.row_indices[num_entries] = i;
372 dst.column_indices[num_entries] = j;
373 dst.values[num_entries] = v;
374 num_entries++;
375 }
376 }
377 }
378 }
379
380 template <typename Matrix1, typename Matrix2>
381 void ell_to_csr(const Matrix1& src, Matrix2& dst)
382 {
383 typedef typename Matrix2::index_type IndexType;
384 typedef typename Matrix2::value_type ValueType;
385
386 const IndexType invalid_index = cusp::ell_matrix<IndexType, ValueType, cusp::host_memory>::invalid_index;
387
388 dst.resize(src.num_rows, src.num_cols, src.num_entries);
389
390 size_t num_entries = 0;
391 dst.row_offsets[0] = 0;
392
393 const size_t num_entries_per_row = src.column_indices.num_cols;
394
395 for(size_t i = 0; i < src.num_rows; i++)
396 {
397 for(size_t n = 0; n < num_entries_per_row; n++)
398 {
399 const IndexType j = src.column_indices(i,n);
400 const ValueType v = src.values(i,n);
401
402 if(j != invalid_index)
403 {
404 dst.column_indices[num_entries] = j;
405 dst.values[num_entries] = v;
406 num_entries++;
407 }
408 }
409
410 dst.row_offsets[i + 1] = num_entries;
411 }
412 }
413
414 /////////////////////
415 // HYB Conversions //
416 /////////////////////
417
418 template <typename Matrix1, typename Matrix2>
419 void hyb_to_coo(const Matrix1& src, Matrix2& dst)
420 {
421 typedef typename Matrix2::index_type IndexType;
422 typedef typename Matrix2::value_type ValueType;
423
424 dst.resize(src.num_rows, src.num_cols, src.num_entries);
425
426 const IndexType invalid_index = cusp::ell_matrix<IndexType, ValueType, cusp::host_memory>::invalid_index;
427
428 const size_t num_entries_per_row = src.ell.column_indices.num_cols;
429
430 size_t num_entries = 0;
431 size_t coo_progress = 0;
432
433 // merge each row of the ELL and COO parts into a single COO row
434 for(size_t i = 0; i < src.num_rows; i++)
435 {
436 // append the i-th row from the ELL part
437 for(size_t n = 0; n < num_entries_per_row; n++)
438 {
439 const IndexType j = src.ell.column_indices(i,n);
440 const ValueType v = src.ell.values(i,n);
441
442 if(j != invalid_index)
443 {
444 dst.row_indices[num_entries] = i;
445 dst.column_indices[num_entries] = j;
446 dst.values[num_entries] = v;
447 num_entries++;
448 }
449 }
450
451 // append the i-th row from the COO part
452 while (coo_progress < src.coo.num_entries && static_cast<size_t>(src.coo.row_indices[coo_progress]) == i)
453 {
454 dst.row_indices[num_entries] = i;
455 dst.column_indices[num_entries] = src.coo.column_indices[coo_progress];
456 dst.values[num_entries] = src.coo.values[coo_progress];
457 num_entries++;
458 coo_progress++;
459 }
460 }
461 }
462
463 template <typename Matrix1, typename Matrix2>
464 void hyb_to_csr(const Matrix1& src, Matrix2& dst)
465 {
466 typedef typename Matrix2::index_type IndexType;
467 typedef typename Matrix2::value_type ValueType;
468
469 dst.resize(src.num_rows, src.num_cols, src.num_entries);
470
471 const IndexType invalid_index = cusp::ell_matrix<IndexType, ValueType, cusp::host_memory>::invalid_index;
472
473 const size_t num_entries_per_row = src.ell.column_indices.num_cols;
474
475 size_t num_entries = 0;
476 dst.row_offsets[0] = 0;
477
478 size_t coo_progress = 0;
479
480 // merge each row of the ELL and COO parts into a single CSR row
481 for(size_t i = 0; i < src.num_rows; i++)
482 {
483 // append the i-th row from the ELL part
484 for(size_t n = 0; n < num_entries_per_row; n++)
485 {
486 const IndexType j = src.ell.column_indices(i,n);
487 const ValueType v = src.ell.values(i,n);
488
489 if(j != invalid_index)
490 {
491 dst.column_indices[num_entries] = j;
492 dst.values[num_entries] = v;
493 num_entries++;
494 }
495 }
496
497 // append the i-th row from the COO part
498 while (coo_progress < src.coo.num_entries && static_cast<size_t>(src.coo.row_indices[coo_progress]) == i)
499 {
500 dst.column_indices[num_entries] = src.coo.column_indices[coo_progress];
501 dst.values[num_entries] = src.coo.values[coo_progress];
502 num_entries++;
503 coo_progress++;
504 }
505
506 dst.row_offsets[i + 1] = num_entries;
507 }
508 }
509
510
511 /////////////////////////
512 // Array1d Conversions //
513 /////////////////////////
514 template <typename Matrix1, typename Matrix2>
515 void array2d_to_array1d(const Matrix1& src, Matrix2& dst)
516 {
517 if (src.num_rows == 0 && src.num_cols == 0)
518 {
519 dst.resize(0);
520 }
521 else if (src.num_cols == 1)
522 {
523 dst.resize(src.num_rows);
524
525 for (size_t i = 0; i < src.num_rows; i++)
526 dst[i] = src(i,0);
527 }
528 else if (src.num_rows == 1)
529 {
530 dst.resize(src.num_cols);
531
532 for (size_t j = 0; j < src.num_cols; j++)
533 dst[j] = src(0,j);
534 }
535 else
536 {
537 throw cusp::format_conversion_exception("array2d to array1d conversion is only defined for row or column vectors");
538 }
539 }
540
541 /////////////////////////
542 // Array2d Conversions //
543 /////////////////////////
544 template <typename Matrix1, typename Matrix2>
545 void array1d_to_array2d(const Matrix1& src, Matrix2& dst)
546 {
547 dst.resize(src.size(),1);
548
549 for (size_t i = 0; i < src.size(); i++)
550 dst(i,0) = src[i];
551 }
552
553 template <typename Matrix1, typename Matrix2>
554 void array2d_to_coo(const Matrix1& src, Matrix2& dst)
555 {
556 //typedef typename Matrix2::index_type IndexType;
557 typedef typename Matrix2::value_type ValueType;
558
559 // count number of nonzero entries in array
560 size_t nnz = 0;
561
562 for(size_t i = 0; i < src.num_rows; i++)
563 {
564 for(size_t j = 0; j < src.num_cols; j++)
565 {
566 if (src(i,j) != ValueType(0))
567 nnz++;
568 }
569 }
570
571 dst.resize(src.num_rows, src.num_cols, nnz);
572
573 nnz = 0;
574
575 for(size_t i = 0; i < src.num_rows; i++)
576 {
577 for(size_t j = 0; j < src.num_cols; j++)
578 {
579 if (src(i,j) != ValueType(0))
580 {
581 dst.row_indices[nnz] = i;
582 dst.column_indices[nnz] = j;
583 dst.values[nnz] = src(i,j);
584 nnz++;
585 }
586 }
587 }
588 }
589
590 template <typename Matrix1, typename Matrix2>
591 void array2d_to_csr(const Matrix1& src, Matrix2& dst)
592 {
593 typedef typename Matrix2::index_type IndexType;
594 typedef typename Matrix2::value_type ValueType;
595
596 IndexType nnz = src.num_entries - thrust::count(src.values.begin(), src.values.end(), ValueType(0));
597
598 dst.resize(src.num_rows, src.num_cols, nnz);
599
600 IndexType num_entries = 0;
601
602 for(size_t i = 0; i < src.num_rows; i++)
603 {
604 dst.row_offsets[i] = num_entries;
605
606 for(size_t j = 0; j < src.num_cols; j++)
607 {
608 if (src(i,j) != ValueType(0))
609 {
610 dst.column_indices[num_entries] = j;
611 dst.values[num_entries] = src(i,j);
612 num_entries++;
613 }
614 }
615 }
616
617 dst.row_offsets[src.num_rows] = num_entries;
618 }
619
620 } // end namespace host
621 } // end namespace detail
622 } // end namespace cusp
623

  ViewVC Help
Powered by ViewVC 1.1.26