1 |
|
2 |
/***************************************************************************** |
3 |
* |
4 |
* Copyright (c) 2003-2020 by The University of Queensland |
5 |
* http://www.uq.edu.au |
6 |
* |
7 |
* Primary Business: Queensland, Australia |
8 |
* Licensed under the Apache License, version 2.0 |
9 |
* http://www.apache.org/licenses/LICENSE-2.0 |
10 |
* |
11 |
* Development until 2012 by Earth Systems Science Computational Center (ESSCC) |
12 |
* Development 2012-2013 by School of Earth Sciences |
13 |
* Development from 2014-2017 by Centre for Geoscience Computing (GeoComp) |
14 |
* Development from 2019 by School of Earth and Environmental Sciences |
15 |
** |
16 |
*****************************************************************************/ |
17 |
|
18 |
|
19 |
/****************************************************************************/ |
20 |
|
21 |
/* Paso: SystemMatrix */ |
22 |
|
23 |
/****************************************************************************/ |
24 |
|
25 |
/* Copyrights by ACcESS Australia 2003,2004,2005,2006 */ |
26 |
/* Author: Lutz Gross, l.gross@uq.edu.au */ |
27 |
|
28 |
/****************************************************************************/ |
29 |
|
30 |
#ifndef __PASO_SYSTEMMATRIX_H__ |
31 |
#define __PASO_SYSTEMMATRIX_H__ |
32 |
|
33 |
#include "SparseMatrix.h" |
34 |
#include "SystemMatrixPattern.h" |
35 |
|
36 |
#include <escript/AbstractSystemMatrix.h> |
37 |
|
38 |
namespace paso { |
39 |
|
40 |
struct Options; |
41 |
class SystemMatrix; |
42 |
typedef boost::shared_ptr<SystemMatrix> SystemMatrix_ptr; |
43 |
typedef boost::shared_ptr<const SystemMatrix> const_SystemMatrix_ptr; |
44 |
|
45 |
typedef int SystemMatrixType; |
46 |
|
47 |
/// this class holds a (distributed) stiffness matrix |
48 |
class PASO_DLL_API SystemMatrix : public escript::AbstractSystemMatrix |
49 |
{ |
50 |
public: |
51 |
/// default constructor - throws exception. |
52 |
SystemMatrix(); |
53 |
|
54 |
SystemMatrix(SystemMatrixType type, SystemMatrixPattern_ptr pattern, |
55 |
dim_t rowBlockSize, dim_t columnBlockSize, |
56 |
bool patternIsUnrolled, const escript::FunctionSpace& rowFS, |
57 |
const escript::FunctionSpace& colFS); |
58 |
|
59 |
~SystemMatrix(); |
60 |
|
61 |
/// Nullifies rows and columns in the matrix. |
62 |
/// The rows and columns are marked by positive values in mask_row and |
63 |
/// mask_col. Values on the main diagonal which are marked to set to |
64 |
/// zero by both mask_row and mask_col are set to main_diagonal_value. |
65 |
virtual void nullifyRowsAndCols(escript::Data& mask_row, |
66 |
escript::Data& mask_col, |
67 |
double main_diagonal_value); |
68 |
|
69 |
virtual inline void saveMM(const std::string& filename) const |
70 |
{ |
71 |
if (mpi_info->size > 1) { |
72 |
//throw PasoException("SystemMatrix::saveMM: Only single rank supported."); |
73 |
SparseMatrix_ptr merged(mergeSystemMatrix()); |
74 |
if (mpi_info->rank == 0) |
75 |
merged->saveMM(filename.c_str()); |
76 |
} else { |
77 |
mainBlock->saveMM(filename.c_str()); |
78 |
} |
79 |
} |
80 |
|
81 |
virtual inline void saveHB(const std::string& filename) const |
82 |
{ |
83 |
if (mpi_info->size > 1) { |
84 |
throw PasoException("SystemMatrix::saveHB: Only single rank supported."); |
85 |
} else if (!(type & MATRIX_FORMAT_CSC)) { |
86 |
throw PasoException("SystemMatrix::saveHB: Only CSC format supported."); |
87 |
} else { |
88 |
mainBlock->saveHB_CSC(filename.c_str()); |
89 |
} |
90 |
} |
91 |
|
92 |
virtual void resetValues(bool preserveSolverData = false); |
93 |
|
94 |
/// Nullifies rows in the matrix. |
95 |
/// The rows are marked by positive values in mask_row. Values on the |
96 |
/// main diagonal which are marked to set to zero by mask_row are set |
97 |
/// to main_diagonal_value. |
98 |
void nullifyRows(double* mask_row, double main_diagonal_value); |
99 |
|
100 |
void add(dim_t, index_t*, dim_t, dim_t, index_t*, dim_t, double*); |
101 |
|
102 |
void makeZeroRowSums(double* left_over); |
103 |
|
104 |
/// copies the col_coupleBlock into row_coupleBlock. |
105 |
/// WARNING: this method uses mpi_requests of the coupler attached to the |
106 |
/// matrix. No reordering on the received columns is performed. |
107 |
/// In practice this means that components in |
108 |
/// row_coupleBlock->pattern->index and |
109 |
/// row_coupler->connector->recv->shared |
110 |
/// are ordered by increasing value. |
111 |
/// Note that send and receive row_coupler->connectors are swapping roles. |
112 |
void copyColCoupleBlock(); |
113 |
|
114 |
void copyRemoteCoupleBlock(bool recreatePattern); |
115 |
|
116 |
void fillWithGlobalCoordinates(double f1); |
117 |
|
118 |
void print() const; |
119 |
|
120 |
/// Merges the system matrix which is distributed on several MPI ranks |
121 |
/// into a complete sparse matrix on rank 0. Used by the Merged Solver. |
122 |
SparseMatrix_ptr mergeSystemMatrix() const; |
123 |
|
124 |
void mergeMainAndCouple(index_t** p_ptr, index_t** p_idx, double** p_val) const; |
125 |
|
126 |
void mergeMainAndCouple_CSR_OFFSET0(index_t** p_ptr, index_t** p_idx, double** p_val) const; |
127 |
void mergeMainAndCouple_CSR_OFFSET0_Block(index_t** p_ptr, index_t** p_idx, double** p_val) const; |
128 |
|
129 |
void mergeMainAndCouple_CSC_OFFSET1(index_t** p_ptr, index_t** p_idx, double** p_val) const; |
130 |
|
131 |
void copyMain_CSC_OFFSET1(index_t** p_ptr, index_t** p_idx, double** p_val); |
132 |
|
133 |
void extendedRowsForST(dim_t* degree_ST, index_t* offset_ST, index_t* ST); |
134 |
|
135 |
void applyBalanceInPlace(double* x, bool RHS) const; |
136 |
|
137 |
void applyBalance(double* x_out, const double* x, bool RHS) const; |
138 |
|
139 |
void balance(); |
140 |
|
141 |
double getGlobalSize() const; |
142 |
|
143 |
void setPreconditioner(Options* options); |
144 |
|
145 |
/// Applies the preconditioner. |
146 |
/// This method needs to be called within a parallel region. |
147 |
/// Barrier synchronization is performed before the evaluation to make |
148 |
/// sure that the input vector is available |
149 |
void solvePreconditioner(double* x, double* b); |
150 |
|
151 |
void freePreconditioner(); |
152 |
|
153 |
index_t* borrowMainDiagonalPointer() const; |
154 |
|
155 |
inline void startCollect(const double* in) const |
156 |
{ |
157 |
startColCollect(in); |
158 |
} |
159 |
|
160 |
inline double* finishCollect() const |
161 |
{ |
162 |
return finishColCollect(); |
163 |
} |
164 |
|
165 |
inline void startColCollect(const double* in) const |
166 |
{ |
167 |
col_coupler->startCollect(in); |
168 |
} |
169 |
|
170 |
inline double* finishColCollect() const |
171 |
{ |
172 |
return col_coupler->finishCollect(); |
173 |
} |
174 |
|
175 |
inline void startRowCollect(const double* in) |
176 |
{ |
177 |
row_coupler->startCollect(in); |
178 |
} |
179 |
|
180 |
inline double* finishRowCollect() |
181 |
{ |
182 |
return row_coupler->finishCollect(); |
183 |
} |
184 |
|
185 |
inline dim_t getNumRows() const |
186 |
{ |
187 |
return mainBlock->numRows; |
188 |
} |
189 |
|
190 |
inline dim_t getNumCols() const |
191 |
{ |
192 |
return mainBlock->numCols; |
193 |
} |
194 |
|
195 |
inline dim_t getTotalNumRows() const |
196 |
{ |
197 |
return getNumRows() * row_block_size; |
198 |
} |
199 |
|
200 |
inline dim_t getTotalNumCols() const |
201 |
{ |
202 |
return getNumCols() * col_block_size; |
203 |
} |
204 |
|
205 |
inline dim_t getRowOverlap() const |
206 |
{ |
207 |
return row_coupler->getNumOverlapComponents(); |
208 |
} |
209 |
|
210 |
inline dim_t getColOverlap() const |
211 |
{ |
212 |
return col_coupler->getNumOverlapComponents(); |
213 |
} |
214 |
|
215 |
inline dim_t getGlobalNumRows() const |
216 |
{ |
217 |
if (type & MATRIX_FORMAT_CSC) { |
218 |
return pattern->input_distribution->getGlobalNumComponents(); |
219 |
} |
220 |
return pattern->output_distribution->getGlobalNumComponents(); |
221 |
} |
222 |
|
223 |
inline dim_t getGlobalNumCols() const |
224 |
{ |
225 |
if (type & MATRIX_FORMAT_CSC) { |
226 |
return pattern->output_distribution->getGlobalNumComponents(); |
227 |
} |
228 |
return pattern->input_distribution->getGlobalNumComponents(); |
229 |
} |
230 |
|
231 |
inline dim_t getGlobalTotalNumRows() const |
232 |
{ |
233 |
return getGlobalNumRows() * row_block_size; |
234 |
} |
235 |
|
236 |
inline dim_t getGlobalTotalNumCols() const |
237 |
{ |
238 |
return getGlobalNumCols() * col_block_size; |
239 |
} |
240 |
|
241 |
inline double getSparsity() const |
242 |
{ |
243 |
return getGlobalSize() / |
244 |
((double)getGlobalTotalNumRows()*getGlobalTotalNumCols()); |
245 |
} |
246 |
|
247 |
inline dim_t getNumOutput() const |
248 |
{ |
249 |
return pattern->getNumOutput(); |
250 |
} |
251 |
|
252 |
inline void copyBlockFromMainDiagonal(double* out) const |
253 |
{ |
254 |
mainBlock->copyBlockFromMainDiagonal(out); |
255 |
} |
256 |
|
257 |
inline void copyBlockToMainDiagonal(const double* in) |
258 |
{ |
259 |
mainBlock->copyBlockToMainDiagonal(in); |
260 |
} |
261 |
|
262 |
inline void copyFromMainDiagonal(double* out) const |
263 |
{ |
264 |
mainBlock->copyFromMainDiagonal(out); |
265 |
} |
266 |
|
267 |
inline void copyToMainDiagonal(const double* in) |
268 |
{ |
269 |
mainBlock->copyToMainDiagonal(in); |
270 |
} |
271 |
|
272 |
inline void setValues(double value) |
273 |
{ |
274 |
mainBlock->setValues(value); |
275 |
col_coupleBlock->setValues(value); |
276 |
row_coupleBlock->setValues(value); |
277 |
is_balanced = false; |
278 |
} |
279 |
|
280 |
inline void rowSum(double* row_sum) const |
281 |
{ |
282 |
if ((type & MATRIX_FORMAT_CSC) || (type & MATRIX_FORMAT_OFFSET1)) { |
283 |
throw PasoException("SystemMatrix::rowSum: No normalization " |
284 |
"available for compressed sparse column or index offset 1."); |
285 |
} else { |
286 |
const dim_t nrow = mainBlock->numRows*row_block_size; |
287 |
#pragma omp parallel for |
288 |
for (index_t irow=0; irow<nrow; ++irow) { |
289 |
row_sum[irow]=0.; |
290 |
} |
291 |
mainBlock->addRow_CSR_OFFSET0(row_sum); |
292 |
col_coupleBlock->addRow_CSR_OFFSET0(row_sum); |
293 |
} |
294 |
} |
295 |
|
296 |
void MatrixVector(double alpha, const double* in, double beta, |
297 |
double* out) const; |
298 |
|
299 |
void MatrixVector(double alpha, const cplx_t* in, double beta, |
300 |
cplx_t* out) const; |
301 |
|
302 |
void MatrixVector_CSR_OFFSET0(double alpha, const double* in, double beta, |
303 |
double* out) const; |
304 |
|
305 |
static SystemMatrix_ptr loadMM_toCSR(const char* filename); |
306 |
|
307 |
static SystemMatrix_ptr loadMM_toCSC(const char* filename); |
308 |
|
309 |
static int getSystemMatrixTypeId(int solver, int preconditioner, |
310 |
int package, bool symmetry, |
311 |
const escript::JMPI& mpi_info); |
312 |
|
313 |
SystemMatrixType type; |
314 |
SystemMatrixPattern_ptr pattern; |
315 |
|
316 |
dim_t logical_row_block_size; |
317 |
dim_t logical_col_block_size; |
318 |
|
319 |
dim_t row_block_size; |
320 |
dim_t col_block_size; |
321 |
dim_t block_size; |
322 |
|
323 |
escript::Distribution_ptr row_distribution; |
324 |
escript::Distribution_ptr col_distribution; |
325 |
escript::JMPI mpi_info; |
326 |
|
327 |
Coupler_ptr<real_t> col_coupler; |
328 |
Coupler_ptr<real_t> row_coupler; |
329 |
|
330 |
/// main block |
331 |
SparseMatrix_ptr mainBlock; |
332 |
/// coupling to neighbouring processors (row - col) |
333 |
SparseMatrix_ptr col_coupleBlock; |
334 |
/// coupling to neighbouring processors (col - row) |
335 |
SparseMatrix_ptr row_coupleBlock; |
336 |
/// coupling of rows-cols on neighbouring processors (may not be valid) |
337 |
SparseMatrix_ptr remote_coupleBlock; |
338 |
|
339 |
bool is_balanced; |
340 |
|
341 |
/// matrix may be balanced by a diagonal matrix D=diagonal(balance_vector) |
342 |
/// if is_balanced is true, the matrix stored is D*A*D where A is the |
343 |
/// original matrix. |
344 |
/// When the system of linear equations is solved we solve D*A*D*y=c. |
345 |
/// So to solve A*x=b one needs to set c=D*b and x=D*y. |
346 |
double* balance_vector; |
347 |
|
348 |
/// stores the global ids for all cols in col_coupleBlock |
349 |
mutable index_t* global_id; |
350 |
|
351 |
/// package code controlling the solver pointer |
352 |
mutable index_t solver_package; |
353 |
|
354 |
/// pointer to data needed by a solver |
355 |
void* solver_p; |
356 |
|
357 |
private: |
358 |
virtual void setToSolution(escript::Data& out, escript::Data& in, |
359 |
boost::python::object& options) const; |
360 |
|
361 |
virtual void ypAx(escript::Data& y, escript::Data& x) const; |
362 |
|
363 |
void solve(double* out, double* in, Options* options) const; |
364 |
|
365 |
void solve(cplx_t* out, cplx_t* in, Options* options) const; |
366 |
}; |
367 |
|
368 |
|
369 |
void RHS_loadMM_toCSR(const char* filename, double* b, dim_t size); |
370 |
|
371 |
|
372 |
} // namespace paso |
373 |
|
374 |
#endif // __PASO_SYSTEMMATRIX_H__ |
375 |
|