/[escript]/trunk/paso/src/SparseMatrix.cpp
ViewVC logotype

Contents of /trunk/paso/src/SparseMatrix.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5148 - (show annotations)
Mon Sep 15 01:25:23 2014 UTC (4 years, 11 months ago) by caltinay
File size: 24904 byte(s)
Merging ripley diagonal storage + CUDA support into trunk.
Options file version has been incremented due to new options
'cuda' and 'nvccflags'.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /****************************************************************************/
19
20 /* Paso: SparseMatrix */
21
22 /****************************************************************************/
23
24 /* Author: Lutz Gross, l.gross@uq.edu.au */
25
26 /****************************************************************************/
27
28 #include "SparseMatrix.h"
29 #include "BlockOps.h"
30 #include "MKL.h"
31 #include "Preconditioner.h"
32 #include "UMFPACK.h"
33 #include "mmio.h"
34
35 #include <boost/scoped_array.hpp>
36
37 /****************************************************************************/
38
39 namespace paso {
40
41 using esysUtils::IndexList;
42
43 /* debug: print the entries */
44 /*
45 void print_entries(index_t *r, index_t *c, double *v, int nz)
46 {
47 for(int i=0; i<nz; i++)
48 printf("(%ld, %ld) == %e\n", (long)r[i], (long)c[i], v[i]);
49 }
50 */
51
52 /* swap function */
53 void swap(index_t *r, index_t *c, double *v, int left, int right)
54 {
55 double v_temp;
56 index_t temp;
57
58 temp = r[left];
59 r[left] = r[right];
60 r[right] = temp;
61
62 temp = c[left];
63 c[left] = c[right];
64 c[right] = temp;
65
66 v_temp = v[left];
67 v[left] = v[right];
68 v[right] = v_temp;
69 }
70
71 void q_sort(index_t *row, index_t *col, double *val, int begin, int end, int N)
72 {
73 int l, r;
74 index_t pivot, lval;
75
76 if (end > begin) {
77 pivot = N * row[begin] + col[begin];
78 l = begin + 1;
79 r = end;
80
81 while (l < r) {
82 lval = N * row[l] + col[l];
83 if (lval < pivot)
84 l++;
85 else {
86 r--;
87 swap( row, col, val, l, r );
88 }
89 }
90 l--;
91 swap(row, col, val, begin, l);
92 q_sort(row, col, val, begin, l, N);
93 q_sort(row, col, val, r, end, N);
94 }
95 }
96
97
98 /* Allocates a SparseMatrix of given type using the given matrix pattern.
99 Values are initialized with zero.
100 If patternIsUnrolled and type & MATRIX_FORMAT_BLK1, it is assumed that the
101 pattern is already unrolled to match the requested block size
102 and offsets. Otherwise unrolling and offset adjustment will be performed.
103 */
104 SparseMatrix::SparseMatrix(SparseMatrixType ntype, Pattern_ptr npattern,
105 int rowBlockSize, int colBlockSize,
106 bool patternIsUnrolled) :
107 type(ntype),
108 val(NULL),
109 solver_package(PASO_PASO),
110 solver_p(NULL)
111 {
112 if (patternIsUnrolled) {
113 if (!XNOR(ntype & MATRIX_FORMAT_OFFSET1, npattern->type & MATRIX_FORMAT_OFFSET1)) {
114 Esys_setError(TYPE_ERROR, "SparseMatrix: requested offset and pattern offset do not match.");
115 }
116 }
117 // do we need to apply unrolling?
118 bool unroll
119 // we don't like non-square blocks
120 = (rowBlockSize != colBlockSize)
121 #ifndef USE_LAPACK
122 // or any block size bigger than 3
123 || (colBlockSize > 3)
124 # endif
125 // or if block size one requested and the block size is not 1
126 || ((ntype & MATRIX_FORMAT_BLK1) && (colBlockSize > 1))
127 // or if offsets don't match
128 || ((ntype & MATRIX_FORMAT_OFFSET1) != (npattern->type & MATRIX_FORMAT_OFFSET1));
129
130 SparseMatrixType pattern_format_out = (ntype & MATRIX_FORMAT_OFFSET1)
131 ? MATRIX_FORMAT_OFFSET1 : MATRIX_FORMAT_DEFAULT;
132
133 // === compressed sparse columns ===
134 if (ntype & MATRIX_FORMAT_CSC) {
135 if (unroll) {
136 if (patternIsUnrolled) {
137 pattern = npattern;
138 } else {
139 pattern = npattern->unrollBlocks(pattern_format_out,
140 colBlockSize, rowBlockSize);
141 }
142 row_block_size = 1;
143 col_block_size = 1;
144 } else {
145 pattern = npattern->unrollBlocks(pattern_format_out, 1, 1);
146 row_block_size = rowBlockSize;
147 col_block_size = colBlockSize;
148 }
149 if (Esys_noError()) {
150 numRows = pattern->numInput;
151 numCols = pattern->numOutput;
152 }
153 } else {
154 // === compressed sparse row ===
155 if (unroll) {
156 if (patternIsUnrolled) {
157 pattern = npattern;
158 } else {
159 pattern = npattern->unrollBlocks(pattern_format_out,
160 rowBlockSize, colBlockSize);
161 }
162 row_block_size = 1;
163 col_block_size = 1;
164 } else {
165 pattern = npattern->unrollBlocks(pattern_format_out, 1, 1);
166 row_block_size = rowBlockSize;
167 col_block_size = colBlockSize;
168 }
169 if (Esys_noError()) {
170 numRows = pattern->numOutput;
171 numCols = pattern->numInput;
172 }
173 }
174 if (Esys_noError()) {
175 if (ntype & MATRIX_FORMAT_DIAGONAL_BLOCK) {
176 block_size = MIN(row_block_size, col_block_size);
177 } else {
178 block_size = row_block_size*col_block_size;
179 }
180 len = (size_t)(pattern->len)*(size_t)(block_size);
181
182 val=new double[len];
183 setValues(0.);
184 }
185 }
186
187 SparseMatrix::~SparseMatrix()
188 {
189 switch (solver_package) {
190 case PASO_SMOOTHER:
191 Preconditioner_LocalSmoother_free((Preconditioner_LocalSmoother*) solver_p);
192 break;
193
194 case PASO_MKL:
195 MKL_free(this);
196 break;
197
198 case PASO_UMFPACK:
199 UMFPACK_free(this);
200 break;
201 }
202 delete[] val;
203 }
204
205 SparseMatrix_ptr SparseMatrix::loadMM_toCSR(const char* filename)
206 {
207 FILE *fileHandle_p = NULL;
208 SparseMatrix_ptr out;
209 int i, scan_ret;
210 MM_typecode matrixCode;
211 Esys_resetError();
212
213 // open the file
214 fileHandle_p = fopen(filename, "r");
215 if (fileHandle_p == NULL) {
216 Esys_setError(IO_ERROR, "SparseMatrix::loadMM_toCSR: Cannot open file for reading.");
217 return out;
218 }
219
220 // process banner
221 if (mm_read_banner(fileHandle_p, &matrixCode) != 0) {
222 Esys_setError(IO_ERROR, "SparseMatrix::loadMM_toCSR: Error processing MM banner.");
223 fclose(fileHandle_p);
224 return out;
225 }
226 if (!(mm_is_real(matrixCode) && mm_is_sparse(matrixCode) && mm_is_general(matrixCode))) {
227 Esys_setError(TYPE_ERROR, "SparseMatrix::loadMM_toCSR: found Matrix Market type is not supported.");
228 fclose(fileHandle_p);
229 return out;
230 }
231
232 // get matrix size
233 int M, N, nz;
234
235 if (mm_read_mtx_crd_size(fileHandle_p, &M, &N, &nz) != 0)
236 {
237 Esys_setError(IO_ERROR, "SparseMatrix::loadMM_toCSR: Could not parse matrix size.");
238 fclose(fileHandle_p);
239 return out;
240 }
241
242 // prepare storage
243 index_t* col_ind = new index_t[nz];
244 index_t* row_ind = new index_t[nz];
245 index_t* row_ptr = new index_t[M+1];
246 double* val = new double[nz];
247
248 // perform actual read of elements
249 for (i=0; i<nz; i++) {
250 scan_ret = fscanf(fileHandle_p, "%d %d %le\n", &row_ind[i], &col_ind[i], &val[i]);
251 if (scan_ret != 3) {
252 delete[] val;
253 delete[] row_ind;
254 delete[] col_ind;
255 delete[] row_ptr;
256 fclose(fileHandle_p);
257 return out;
258 }
259 row_ind[i]--;
260 col_ind[i]--;
261 }
262 fclose(fileHandle_p);
263
264 // sort the entries
265 q_sort(row_ind, col_ind, val, 0, nz, N);
266
267 // setup row_ptr
268 int curr_row = 0;
269 for(i=0; (i<nz && curr_row<M); curr_row++) {
270 while(row_ind[i] != curr_row)
271 i++;
272 row_ptr[curr_row] = i;
273 }
274 row_ptr[M] = nz;
275
276 Pattern_ptr mainPattern(new Pattern(MATRIX_FORMAT_DEFAULT, M, N,
277 row_ptr, col_ind));
278 out.reset(new SparseMatrix(MATRIX_FORMAT_DEFAULT, mainPattern, 1, 1, true));
279
280 // copy values
281 for (i=0; i<nz; i++) out->val[i] = val[i];
282
283 delete[] val;
284 delete[] row_ind;
285 return out;
286 }
287
288 void SparseMatrix::saveMM(const char* filename) const
289 {
290 if (col_block_size != row_block_size) {
291 Esys_setError(TYPE_ERROR, "SparseMatrix::saveMM: currently only square blocks are supported.");
292 return;
293 }
294
295 // open the file
296 FILE* fileHandle_p = fopen(filename, "w");
297 if (fileHandle_p==NULL) {
298 Esys_setError(IO_ERROR,"SparseMatrix_saveMM: File could not be opened for writing");
299 return;
300 }
301 if (type & MATRIX_FORMAT_CSC) {
302 Esys_setError(TYPE_ERROR,"SparseMatrix_saveMM does not support CSC yet.");
303 } else {
304 MM_typecode matcode;
305 mm_initialize_typecode(&matcode);
306 mm_set_matrix(&matcode);
307 mm_set_coordinate(&matcode);
308 mm_set_real(&matcode);
309
310 const dim_t N = getNumRows();
311 const dim_t M = getNumCols();
312 mm_write_banner(fileHandle_p, matcode);
313 mm_write_mtx_crd_size(fileHandle_p, N*row_block_size,
314 M*col_block_size, pattern->ptr[N]*block_size);
315
316 const index_t offset=(type & MATRIX_FORMAT_OFFSET1 ? 1:0);
317
318 if (type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
319 for (dim_t i=0; i<N; i++) {
320 for (dim_t iptr = pattern->ptr[i]-offset; iptr<pattern->ptr[i+1]-offset; ++iptr) {
321 const dim_t j=pattern->index[iptr]-offset;
322 for (dim_t ib=0; ib<block_size; ib++) {
323 const dim_t irow=ib+row_block_size*i;
324 const dim_t icol=ib+col_block_size*j;
325 fprintf(fileHandle_p, "%d %d %25.15e\n", irow+1,
326 icol+1, val[iptr*block_size+ib]);
327 }
328 }
329 }
330 } else {
331 for (dim_t i=0; i<N; i++) {
332 for (dim_t iptr = pattern->ptr[i]-offset; iptr<pattern->ptr[i+1]-offset; ++iptr) {
333 const dim_t j=pattern->index[iptr]-offset;
334 for (dim_t irb=0; irb<row_block_size; irb++) {
335 const dim_t irow=irb+row_block_size*i;
336 for (dim_t icb=0; icb<col_block_size; icb++) {
337 const dim_t icol=icb+col_block_size*j;
338 fprintf(fileHandle_p, "%d %d %25.15e\n", irow+1,
339 icol+1, val[iptr*block_size+irb+row_block_size*icb]);
340 }
341 }
342 }
343 }
344 }
345 }
346 // close the file
347 fclose(fileHandle_p);
348 }
349
350 void SparseMatrix::addAbsRow_CSR_OFFSET0(double* array) const
351 {
352 const dim_t nOut = pattern->numOutput;
353 #pragma omp parallel for
354 for (dim_t ir=0; ir < nOut; ir++) {
355 for (dim_t irb=0; irb < row_block_size; irb++) {
356 const dim_t irow = irb+row_block_size*ir;
357 double fac=0.;
358 for (index_t iptr=pattern->ptr[ir]; iptr < pattern->ptr[ir+1]; iptr++) {
359 for (dim_t icb=0; icb < col_block_size; icb++) {
360 const index_t idx = iptr*block_size+irb+row_block_size*icb;
361 fac += ABS(val[idx]);
362 }
363 }
364 array[irow]+=fac;
365 }
366 }
367 }
368
369 void SparseMatrix::maxAbsRow_CSR_OFFSET0(double* array) const
370 {
371 const dim_t nOut = pattern->numOutput;
372 #pragma omp parallel for
373 for (dim_t ir=0; ir < nOut; ir++) {
374 for (dim_t irb=0; irb < row_block_size; irb++) {
375 const dim_t irow = irb+row_block_size*ir;
376 double fac=0.;
377 for (index_t iptr=pattern->ptr[ir]; iptr < pattern->ptr[ir+1]; iptr++) {
378 for (dim_t icb=0; icb < col_block_size; icb++) {
379 const index_t idx = iptr*block_size+irb+row_block_size*icb;
380 fac=MAX(fac, std::abs(val[idx]));
381 }
382 }
383 array[irow]=MAX(array[irow], fac);
384 }
385 }
386 }
387
388 void SparseMatrix::addRow_CSR_OFFSET0(double* array) const
389 {
390 const dim_t nOut = pattern->numOutput;
391 #pragma omp parallel for
392 for (dim_t ir=0; ir < nOut; ir++) {
393 for (dim_t irb=0; irb < row_block_size; irb++) {
394 dim_t irow=irb+row_block_size*ir;
395 double fac=0.;
396 for (index_t iptr=pattern->ptr[ir]; iptr<pattern->ptr[ir+1]; iptr++) {
397 for (dim_t icb=0; icb < col_block_size; icb++)
398 fac += val[iptr*block_size+irb+row_block_size*icb];
399
400 }
401 array[irow]+=fac;
402 }
403 }
404 }
405
406 void SparseMatrix::copyBlockToMainDiagonal(const double* in)
407 {
408 const dim_t n = pattern->numOutput;
409 const dim_t nblk = block_size;
410 const size_t nblk_size = sizeof(double)*nblk;
411 const index_t* main_ptr = borrowMainDiagonalPointer();
412 #pragma omp parallel for
413 for (index_t ir=0; ir < n;ir++) {
414 memcpy((void*)&val[main_ptr[ir]*nblk], (void*)&in[nblk*ir], nblk_size);
415 }
416 }
417
418 void SparseMatrix::copyBlockFromMainDiagonal(double* out) const
419 {
420 const dim_t n = pattern->numOutput;
421 const dim_t nblk = block_size;
422 const size_t nblk_size = sizeof(double)*nblk;
423 const index_t* main_ptr = borrowMainDiagonalPointer();
424 #pragma omp parallel for
425 for (index_t ir=0; ir < n; ir++) {
426 memcpy((void*)&out[nblk*ir], (void*)&val[main_ptr[ir]*nblk], nblk_size);
427 }
428 }
429
430 void SparseMatrix::copyFromMainDiagonal(double* out) const
431 {
432 const dim_t n = pattern->numOutput;
433 const dim_t nblk = block_size;
434 const dim_t blk = MIN(row_block_size, col_block_size);
435 const index_t* main_ptr = borrowMainDiagonalPointer();
436 #pragma omp parallel for
437 for (index_t ir=0; ir < n; ir++) {
438 for (index_t ib=0; ib < blk; ib++) {
439 out[ir*blk+ib] = val[main_ptr[ir]*nblk+ib+row_block_size*ib];
440 }
441 }
442 }
443
444 void SparseMatrix::copyToMainDiagonal(const double* in)
445 {
446 const dim_t n = pattern->numOutput;
447 const dim_t nblk = block_size;
448 const dim_t blk = MIN(row_block_size, col_block_size);
449 const index_t* main_ptr = borrowMainDiagonalPointer();
450 #pragma omp parallel for
451 for (index_t ir=0; ir < n; ir++) {
452 for (index_t ib=0; ib < blk; ib++) {
453 val[main_ptr[ir]*nblk+ib+row_block_size*ib] = in[ir*blk+ib];
454 }
455 }
456 }
457
458 void SparseMatrix::applyDiagonal_CSR_OFFSET0(const double* left,
459 const double* right)
460 {
461 const dim_t row_block = row_block_size;
462 const dim_t col_block = col_block_size;
463 const dim_t n_block = row_block*col_block;
464 const dim_t nOut = pattern->numOutput;
465
466 #pragma omp parallel for
467 for (index_t ir=0; ir < nOut; ir++) {
468 for (index_t irb=0; irb < row_block; irb++) {
469 const index_t irow = irb+row_block*ir;
470 const double rtmp = left[irow];
471 for (index_t iptr=pattern->ptr[ir]; iptr < pattern->ptr[ir+1]; iptr++) {
472 #pragma ivdep
473 for (index_t icb=0; icb < col_block_size; icb++) {
474 const index_t icol = icb+col_block*pattern->index[iptr];
475 const index_t l = iptr*n_block + irb+row_block*icb;
476 val[l] *= rtmp*right[icol];
477 }
478 }
479 }
480 }
481 }
482
483 void SparseMatrix::setValues(double value)
484 {
485 const index_t index_offset=(type & MATRIX_FORMAT_OFFSET1 ? 1:0);
486 if (!pattern->isEmpty()) {
487 const dim_t nOut = pattern->numOutput;
488 #pragma omp parallel for
489 for (dim_t i=0; i < nOut; ++i) {
490 for (index_t iptr=pattern->ptr[i]-index_offset; iptr < pattern->ptr[i+1]-index_offset; ++iptr) {
491 for (dim_t j=0; j<block_size; ++j)
492 val[iptr*block_size+j] = value;
493 }
494 }
495 }
496 }
497
498 void SparseMatrix::invMain(double* inv_diag, int* pivot) const
499 {
500 int failed = 0;
501 double A11;
502 const dim_t n=numRows;
503 const dim_t n_block=row_block_size;
504 const dim_t m_block=col_block_size;
505 dim_t i;
506 index_t iPtr;
507 index_t* main_ptr=pattern->borrowMainDiagonalPointer();
508 // check matrix is square
509 if (m_block != n_block) {
510 Esys_setError(TYPE_ERROR, "SparseMatrix::invMain: square block size expected.");
511 }
512 if (Esys_noError()) {
513 if (n_block == 1) {
514 #pragma omp parallel for private(i, iPtr, A11) schedule(static)
515 for (i = 0; i < n; i++) {
516 iPtr = main_ptr[i];
517 A11 = val[iPtr];
518 if (ABS(A11) > 0.) {
519 inv_diag[i]=1./A11;
520 } else {
521 failed=1;
522 }
523 }
524 } else if (n_block==2) {
525 #pragma omp parallel for private(i, iPtr) schedule(static)
526 for (i = 0; i < n; i++) {
527 iPtr = main_ptr[i];
528 BlockOps_invM_2(&inv_diag[i*4], &val[iPtr*4], &failed);
529 }
530 } else if (n_block==3) {
531 #pragma omp parallel for private(i, iPtr) schedule(static)
532 for (i = 0; i < n; i++) {
533 iPtr = main_ptr[i];
534 BlockOps_invM_3(&inv_diag[i*9], &val[iPtr*9], &failed);
535 }
536 } else {
537 #pragma omp parallel for private(i, iPtr) schedule(static)
538 for (i = 0; i < n; i++) {
539 iPtr = main_ptr[i];
540 BlockOps_Cpy_N(block_size, &inv_diag[i*block_size], &val[iPtr*block_size]);
541 BlockOps_invM_N(n_block, &inv_diag[i*block_size], &pivot[i*n_block], &failed);
542 }
543 }
544 }
545 if (failed > 0) {
546 Esys_setError(ZERO_DIVISION_ERROR, "SparseMatrix::invMain: non-regular main diagonal block.");
547 }
548 }
549
550 void SparseMatrix::applyBlockMatrix(double* block_diag, int* pivot, double* x,
551 const double *b) const
552 {
553 const dim_t n = numRows;
554 const dim_t n_block = row_block_size;
555 util::copy(n_block*n, x, b);
556 BlockOps_solveAll(n_block, n, block_diag, pivot, x);
557 }
558
559 SparseMatrix_ptr SparseMatrix::getTranspose() const
560 {
561 const dim_t m = numCols;
562 const dim_t n = numRows;
563 boost::scoped_array<IndexList> index_list(new IndexList[m]);
564
565 for (dim_t i=0; i<n; ++i) {
566 for (index_t iptr2=pattern->ptr[i]; iptr2<pattern->ptr[i+1]; ++iptr2) {
567 const index_t j = pattern->index[iptr2];
568 index_list[j].insertIndex(i);
569 }
570 }
571
572 Pattern_ptr ATpattern(Pattern::fromIndexListArray(0,m,index_list.get(),0,n,0));
573 SparseMatrix_ptr AT(new SparseMatrix(type, ATpattern, col_block_size, row_block_size, false));
574
575 if ( ((type & MATRIX_FORMAT_DIAGONAL_BLOCK) && (block_size == 1)) ||
576 (row_block_size == 1 && col_block_size == 1)) {
577 #pragma omp parallel for
578 for (dim_t i=0; i<m; ++i) {
579 for (index_t iptr_AT=AT->pattern->ptr[i]; iptr_AT<AT->pattern->ptr[i+1]; ++iptr_AT) {
580 const index_t j = AT->pattern->index[iptr_AT];
581 index_t jptr_A = pattern->ptr[j];
582 const index_t* start_p = &pattern->index[jptr_A];
583 const index_t* where_p=(index_t*)bsearch(&i, start_p,
584 pattern->ptr[j+1]-jptr_A,
585 sizeof(index_t), util::comparIndex);
586 if (where_p != NULL) { // this should always be the case
587 jptr_A += (index_t)(where_p-start_p);
588 AT->val[iptr_AT] = val[jptr_A];
589 }
590 }
591 }
592 } else {
593 if (type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
594 #pragma omp parallel for
595 for (dim_t i=0; i<m; ++i) {
596 for (index_t iptr_AT=AT->pattern->ptr[i]; iptr_AT<AT->pattern->ptr[i+1]; ++iptr_AT) {
597 const index_t j = AT->pattern->index[iptr_AT];
598 index_t jptr_A = pattern->ptr[j];
599 const index_t* start_p = &pattern->index[jptr_A];
600 const index_t* where_p = (index_t*)bsearch(&i, start_p,
601 pattern->ptr[j+1]-jptr_A,
602 sizeof(index_t), util::comparIndex);
603 if (where_p != NULL) { // this should always be the case
604 jptr_A += (index_t)(where_p-start_p);
605 for (dim_t ib=0; ib < block_size; ++ib)
606 AT->val[iptr_AT*block_size+ib] = val[jptr_A*block_size+ib];
607 }
608 }
609 }
610 } else {
611 #pragma omp parallel for
612 for (dim_t i=0; i<m; ++i) {
613 for (index_t iptr_AT=AT->pattern->ptr[i]; iptr_AT<AT->pattern->ptr[i+1]; ++iptr_AT) {
614 const index_t j = AT->pattern->index[iptr_AT];
615 index_t jptr_A = pattern->ptr[j];
616 const index_t* start_p = &pattern->index[jptr_A];
617 const index_t* where_p=(index_t*)bsearch(&i, start_p,
618 pattern->ptr[j + 1]-jptr_A,
619 sizeof(index_t), util::comparIndex);
620 if (where_p != NULL) { // this should always be the case
621 jptr_A += (index_t)(where_p-start_p);
622 for (index_t irb=0; irb < row_block_size; ++irb) {
623 for (index_t icb=0 ; icb < col_block_size; ++icb) {
624 AT->val[iptr_AT*block_size+icb+col_block_size*irb] = val[jptr_A*block_size+irb+row_block_size*icb];
625 }
626 }
627 }
628 }
629 }
630 }
631 }
632 return AT;
633 }
634
635 SparseMatrix_ptr SparseMatrix::unroll(SparseMatrixType newType) const
636 {
637 const index_t out_type = (newType & MATRIX_FORMAT_BLK1) ? newType : newType + MATRIX_FORMAT_BLK1;
638 SparseMatrix_ptr out(new SparseMatrix(out_type, pattern, row_block_size, col_block_size, false));
639
640 const dim_t n = numRows;
641 const index_t A_offset = (type & MATRIX_FORMAT_OFFSET1 ? 1 : 0);
642 const index_t out_offset = (out_type & MATRIX_FORMAT_OFFSET1 ? 1 : 0);
643
644 if (Esys_noError()) {
645 if (out->type & MATRIX_FORMAT_CSC) {
646 #pragma omp parallel for
647 for (dim_t i=0; i<n; ++i) {
648 for (index_t iptr=pattern->ptr[i]-A_offset; iptr<pattern->ptr[i+1]-A_offset; ++iptr) {
649 const index_t j = pattern->index[iptr]-A_offset;
650 for (dim_t icb=0; icb<col_block_size; ++icb) {
651 const index_t icol=j*col_block_size+icb;
652 const index_t* start_p=&out->pattern->index[out->pattern->ptr[icol]-out_offset];
653 const index_t l_col=out->pattern->ptr[icol+1]-out->pattern->ptr[icol];
654 for (dim_t irb=0; irb<row_block_size; ++irb) {
655 const index_t irow=row_block_size*i+irb+out_offset;
656 const index_t* where_p = (index_t*)bsearch(&irow,
657 start_p, l_col, sizeof(index_t),
658 util::comparIndex);
659 if (where_p != NULL)
660 out->val[out->pattern->ptr[icol]-out_offset+(index_t)(where_p-start_p)] =
661 val[block_size*iptr+irb+row_block_size*icb];
662 }
663 }
664 }
665 }
666 } else {
667 #pragma omp parallel for
668 for (dim_t i=0; i<n; ++i) {
669 for (index_t iptr=pattern->ptr[i]-A_offset; iptr<pattern->ptr[i+1]-A_offset; ++iptr) {
670 const index_t j = pattern->index[iptr]-A_offset;
671 for (dim_t irb=0; irb<row_block_size; ++irb) {
672 const index_t irow=row_block_size*i+irb;
673 const index_t* start_p = &out->pattern->index[out->pattern->ptr[irow]-out_offset];
674 const index_t l_row=out->pattern->ptr[irow+1]-out->pattern->ptr[irow];
675 for (dim_t icb=0; icb<col_block_size; ++icb) {
676 const index_t icol=j*col_block_size+icb+out_offset;
677 const index_t* where_p = (index_t*)bsearch(&icol,
678 start_p, l_row, sizeof(index_t),
679 util::comparIndex);
680 if (where_p != NULL)
681 out->val[out->pattern->ptr[irow]-out_offset+(index_t)(where_p-start_p)] =
682 val[block_size*iptr+irb+row_block_size*icb];
683 }
684 }
685 }
686 }
687 }
688 }
689 return out;
690 }
691
692 } // namespace paso
693

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/SparseMatrix.cpp:3531-3826 /branches/diaplayground/paso/src/SparseMatrix.cpp:4940-5147 /branches/lapack2681/paso/src/SparseMatrix.cpp:2682-2741 /branches/pasowrap/paso/src/SparseMatrix.cpp:3661-3674 /branches/py3_attempt2/paso/src/SparseMatrix.cpp:3871-3891 /branches/restext/paso/src/SparseMatrix.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/SparseMatrix.cpp:3669-3791 /branches/stage3.0/paso/src/SparseMatrix.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/SparseMatrix.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/SparseMatrix.cpp:3517-3974 /release/3.0/paso/src/SparseMatrix.cpp:2591-2601 /trunk/paso/src/SparseMatrix.cpp:4257-4344 /trunk/ripley/test/python/paso/src/SparseMatrix.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26