31 |
#include "Paso.h" |
#include "Paso.h" |
32 |
#include "SystemMatrix.h" |
#include "SystemMatrix.h" |
33 |
|
|
34 |
double* Paso_SystemMatrix_borrowNormalization(Paso_SystemMatrix* A) { |
void Paso_SystemMatrix_applyBalanceInPlace(const Paso_SystemMatrix* A, double* x, const bool_t RHS) |
35 |
dim_t irow, nrow; |
{ |
36 |
index_t irow_failed, irow_failed_local; |
const dim_t nrow=A->mainBlock->numRows*A->row_block_size; |
37 |
|
const dim_t ncol=A->mainBlock->numCols*A->col_block_size; |
38 |
|
index_t i; |
39 |
|
|
40 |
|
if (A->is_balanced) { |
41 |
|
if (RHS) { |
42 |
|
#pragma omp parallel for private(i) schedule(static) |
43 |
|
for (i=0; i<nrow ; ++i) { |
44 |
|
x[i]*=A->balance_vector[i]; |
45 |
|
} |
46 |
|
} else { |
47 |
|
#pragma omp parallel for private(i) schedule(static) |
48 |
|
for (i=0; i<ncol ; ++i) { |
49 |
|
x[i]*=A->balance_vector[i]; |
50 |
|
} |
51 |
|
} |
52 |
|
} |
53 |
|
return; |
54 |
|
} |
55 |
|
|
56 |
|
void Paso_SystemMatrix_applyBalance(const Paso_SystemMatrix* A, double* x_out, const double* x, const bool_t RHS) |
57 |
|
{ |
58 |
|
const dim_t nrow=A->mainBlock->numRows*A->row_block_size; |
59 |
|
const dim_t ncol=A->mainBlock->numCols*A->col_block_size; |
60 |
|
index_t i; |
61 |
|
|
62 |
|
if (A->is_balanced) { |
63 |
|
if (RHS) { |
64 |
|
#pragma omp parallel for private(i) schedule(static) |
65 |
|
for (i=0; i<nrow ; ++i) { |
66 |
|
x_out[i]=x[i]*A->balance_vector[i]; |
67 |
|
} |
68 |
|
} else { |
69 |
|
#pragma omp parallel for private(i) schedule(static) |
70 |
|
for (i=0; i<ncol ; ++i) { |
71 |
|
x_out[i]=x[i]*A->balance_vector[i]; |
72 |
|
} |
73 |
|
} |
74 |
|
} |
75 |
|
return; |
76 |
|
} |
77 |
|
|
78 |
|
void Paso_SystemMatrix_balance(Paso_SystemMatrix* A) { |
79 |
|
dim_t irow; |
80 |
|
const dim_t nrow=A->mainBlock->numRows*A->row_block_size; |
81 |
register double fac; |
register double fac; |
82 |
if (!A->normalizer_is_valid) { |
if (!A->is_balanced) { |
83 |
if ((A->type & MATRIX_FORMAT_CSC) || (A->type & MATRIX_FORMAT_OFFSET1)) { |
if ((A->type & MATRIX_FORMAT_CSC) || (A->type & MATRIX_FORMAT_OFFSET1)) { |
84 |
Esys_setError(TYPE_ERROR,"Paso_SystemMatrix_borrowNormalization: No normalization available for compressed sparse column or index offset 1."); |
Esys_setError(TYPE_ERROR,"Paso_SystemMatrix_balance: No normalization available for compressed sparse column or index offset 1."); |
85 |
} else { |
} |
86 |
if (Esys_checkPtr(A->normalizer)) { |
if (Esys_checkPtr(A->balance_vector)) { |
87 |
Esys_setError(SYSTEM_ERROR,"Paso_SystemMatrix_borrowNormalization: no memory alloced for normalizer."); |
Esys_setError(SYSTEM_ERROR,"Paso_SystemMatrix_balance: no memory alloced for balance vector."); |
88 |
|
} |
89 |
|
if ( ! ( (Paso_SystemMatrix_getGlobalNumRows(A) == Paso_SystemMatrix_getGlobalNumCols(A)) && (A->row_block_size == A->col_block_size) ) ) { |
90 |
|
Esys_setError(SYSTEM_ERROR,"Paso_SystemMatrix_balance: matrix needs to ba a square matrix."); |
91 |
|
} |
92 |
|
if (Esys_noError() ) { |
93 |
|
|
94 |
} else { |
/* calculat absolute max value over each row */ |
|
nrow=A->mainBlock->numRows*A->row_block_size; |
|
95 |
#pragma omp parallel for private(irow) schedule(static) |
#pragma omp parallel for private(irow) schedule(static) |
96 |
for (irow=0; irow<nrow ; ++irow) { |
for (irow=0; irow<nrow ; ++irow) { |
97 |
A->normalizer[irow]=0; |
A->balance_vector[irow]=0; |
98 |
} |
} |
99 |
Paso_SparseMatrix_addAbsRow_CSR_OFFSET0(A->mainBlock,A->normalizer); |
Paso_SparseMatrix_maxAbsRow_CSR_OFFSET0(A->mainBlock,A->balance_vector); |
100 |
if(A->col_coupleBlock->pattern->ptr!=NULL) { |
if(A->col_coupleBlock->pattern->ptr!=NULL) { |
101 |
Paso_SparseMatrix_addAbsRow_CSR_OFFSET0(A->col_coupleBlock,A->normalizer); |
Paso_SparseMatrix_maxAbsRow_CSR_OFFSET0(A->col_coupleBlock,A->balance_vector); |
102 |
} |
} |
103 |
|
|
104 |
|
/* set balancing vector */ |
105 |
#pragma omp parallel |
#pragma omp parallel |
106 |
{ |
{ |
|
irow_failed_local=-1; |
|
107 |
#pragma omp for private(irow,fac) schedule(static) |
#pragma omp for private(irow,fac) schedule(static) |
108 |
for (irow=0; irow<nrow ; ++irow) { |
for (irow=0; irow<nrow ; ++irow) { |
109 |
fac=A->normalizer[irow]; |
fac=A->balance_vector[irow]; |
110 |
if (ABS(fac)>0) { |
if (fac>0) { |
111 |
A->normalizer[irow]=1./fac; |
A->balance_vector[irow]=sqrt(1./fac); |
112 |
} else { |
} else { |
113 |
A->normalizer[irow]=1.; |
A->balance_vector[irow]=1.; |
|
irow_failed=irow; |
|
114 |
} |
} |
115 |
} |
} |
|
#pragma omp critical |
|
|
irow_failed=irow_failed_local; |
|
116 |
} |
} |
117 |
if (irow_failed>=0) { |
{ |
118 |
Esys_setError(ZERO_DIVISION_ERROR,"There is a row containing zero entries only."); |
/* rescale matrix: */ |
119 |
} |
double *remote_values=NULL; |
120 |
A->normalizer_is_valid=TRUE; |
/* start exchange */ |
121 |
|
Paso_SystemMatrix_startCollect(A, A->balance_vector); |
122 |
|
/* process main block */ |
123 |
|
Paso_SparseMatrix_applyDiagonal_CSR_OFFSET0(A->mainBlock,A->balance_vector, A->balance_vector); |
124 |
|
/* finish exchange */ |
125 |
|
remote_values=Paso_SystemMatrix_finishCollect(A); |
126 |
|
/* process couple block */ |
127 |
|
if (A->col_coupleBlock->pattern->ptr!=NULL) { |
128 |
|
Paso_SparseMatrix_applyDiagonal_CSR_OFFSET0(A->col_coupleBlock,A->balance_vector, remote_values); |
129 |
|
} |
130 |
|
if (A->row_coupleBlock->pattern->ptr!=NULL) { |
131 |
|
Paso_SparseMatrix_applyDiagonal_CSR_OFFSET0(A->row_coupleBlock, remote_values, A->balance_vector); |
132 |
|
} |
133 |
|
} |
134 |
|
A->is_balanced=TRUE; |
135 |
} |
} |
136 |
} |
} |
|
Esys_MPIInfo_noError(A->mpi_info ); |
|
|
} |
|
|
return A->normalizer; |
|
137 |
} |
} |