1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2010 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
/**************************************************************/ |
16 |
|
17 |
/* Paso: SystemMatrix */ |
18 |
|
19 |
/**************************************************************/ |
20 |
|
21 |
/* Author: Lutz Gross, l.gross@uq.edu.au */ |
22 |
|
23 |
/**************************************************************/ |
24 |
|
25 |
#include "SystemMatrix.h" |
26 |
#include "Preconditioner.h" |
27 |
|
28 |
/**************************************************************/ |
29 |
|
30 |
/* allocates a SystemMatrix of type type using the given matrix pattern |
31 |
Values are initialized by zero. |
32 |
if patternIsUnrolled and type & MATRIX_FORMAT_BLK1, it is assumed that the pattern is allready unrolled to match the requested block size |
33 |
and offsets otherwise unrolling and offset adjustment will be performed. |
34 |
*/ |
35 |
|
36 |
Paso_SystemMatrix* Paso_SystemMatrix_alloc(Paso_SystemMatrixType type,Paso_SystemMatrixPattern *pattern, int row_block_size, int col_block_size, |
37 |
const bool_t patternIsUnrolled) { |
38 |
|
39 |
Paso_SystemMatrix*out=NULL; |
40 |
dim_t n_norm,i; |
41 |
Paso_SystemMatrixType pattern_format_out; |
42 |
bool_t unroll=FALSE; |
43 |
|
44 |
pattern_format_out= (type & MATRIX_FORMAT_OFFSET1)? PATTERN_FORMAT_OFFSET1: PATTERN_FORMAT_DEFAULT; |
45 |
Esys_resetError(); |
46 |
if (type & MATRIX_FORMAT_SYM) { |
47 |
Esys_setError(TYPE_ERROR,"Paso_SystemMatrix_alloc: Symmetric matrix patterns are not supported."); |
48 |
return NULL; |
49 |
} |
50 |
if (patternIsUnrolled) { |
51 |
if ( ! XNOR(type & MATRIX_FORMAT_OFFSET1, pattern->type & PATTERN_FORMAT_OFFSET1) ) { |
52 |
Esys_setError(TYPE_ERROR,"Paso_SystemMatrix_alloc: requested offset and pattern offset does not match."); |
53 |
return NULL; |
54 |
} |
55 |
} |
56 |
/* do we need to apply unrolling ? */ |
57 |
unroll |
58 |
/* we don't like non-square blocks */ |
59 |
= (row_block_size!=col_block_size) |
60 |
/* or any block size bigger than 3 */ |
61 |
|| (col_block_size>3) |
62 |
/* or if lock size one requested and the block size is not 1 */ |
63 |
|| ((type & MATRIX_FORMAT_BLK1) && (col_block_size>1) ) |
64 |
/* or the offsets are wrong */ |
65 |
|| ((type & MATRIX_FORMAT_OFFSET1) != ( pattern->type & PATTERN_FORMAT_OFFSET1)); |
66 |
|
67 |
out=MEMALLOC(1,Paso_SystemMatrix); |
68 |
if (! Esys_checkPtr(out)) { |
69 |
out->type=type; |
70 |
out->pattern=NULL; |
71 |
out->row_distribution=NULL; |
72 |
out->col_distribution=NULL; |
73 |
out->mpi_info=Esys_MPIInfo_getReference(pattern->mpi_info); |
74 |
out->row_coupler=NULL; |
75 |
out->col_coupler=NULL; |
76 |
out->mainBlock=NULL; |
77 |
out->row_coupleBlock=NULL; |
78 |
out->col_coupleBlock=NULL; |
79 |
out->normalizer_is_valid=FALSE; |
80 |
out->normalizer=NULL; |
81 |
out->solver_package=PASO_PASO; |
82 |
out->solver_p=NULL; |
83 |
out->trilinos_data=NULL; |
84 |
out->reference_counter=1; |
85 |
out->logical_row_block_size=row_block_size; |
86 |
out->logical_col_block_size=col_block_size; |
87 |
|
88 |
|
89 |
if (type & MATRIX_FORMAT_CSC) { |
90 |
if (unroll) { |
91 |
if (patternIsUnrolled) { |
92 |
out->pattern=Paso_SystemMatrixPattern_getReference(pattern); |
93 |
} else { |
94 |
out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,col_block_size,row_block_size); |
95 |
} |
96 |
out->row_block_size=1; |
97 |
out->col_block_size=1; |
98 |
} else { |
99 |
out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,1,1); |
100 |
out->row_block_size=row_block_size; |
101 |
out->col_block_size=col_block_size; |
102 |
} |
103 |
if (Esys_noError()) { |
104 |
out->row_distribution=Paso_Distribution_getReference(out->pattern->input_distribution); |
105 |
out->col_distribution=Paso_Distribution_getReference(out->pattern->output_distribution); |
106 |
} |
107 |
} else { |
108 |
if (unroll) { |
109 |
if (patternIsUnrolled) { |
110 |
out->pattern=Paso_SystemMatrixPattern_getReference(pattern); |
111 |
} else { |
112 |
out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,row_block_size,col_block_size); |
113 |
} |
114 |
out->row_block_size=1; |
115 |
out->col_block_size=1; |
116 |
} else { |
117 |
out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,1,1); |
118 |
out->row_block_size=row_block_size; |
119 |
out->col_block_size=col_block_size; |
120 |
} |
121 |
if (Esys_noError()) { |
122 |
out->row_distribution=Paso_Distribution_getReference(out->pattern->output_distribution); |
123 |
out->col_distribution=Paso_Distribution_getReference(out->pattern->input_distribution); |
124 |
} |
125 |
} |
126 |
if (Esys_noError()) { |
127 |
out->block_size=out->row_block_size*out->col_block_size; |
128 |
out->col_coupler=Paso_Coupler_alloc(pattern->col_connector,out->col_block_size); |
129 |
out->row_coupler=Paso_Coupler_alloc(pattern->row_connector,out->row_block_size); |
130 |
/* this should be bypassed if trilinos is used */ |
131 |
if (type & MATRIX_FORMAT_TRILINOS_CRS) { |
132 |
#ifdef TRILINOS |
133 |
out->trilinos_data=Paso_TRILINOS_alloc(); |
134 |
#endif |
135 |
} else { |
136 |
out->solver_package=PASO_PASO; |
137 |
out->mainBlock=Paso_SparseMatrix_alloc(type,out->pattern->mainPattern,row_block_size,col_block_size,TRUE); |
138 |
out->col_coupleBlock=Paso_SparseMatrix_alloc(type,out->pattern->col_couplePattern,row_block_size,col_block_size,TRUE); |
139 |
out->row_coupleBlock=Paso_SparseMatrix_alloc(type,out->pattern->row_couplePattern,row_block_size,col_block_size,TRUE); |
140 |
if (Esys_noError()) { |
141 |
/* allocate memory for matrix entries */ |
142 |
if (type & MATRIX_FORMAT_CSC) { |
143 |
n_norm = out->mainBlock->numCols * out->col_block_size; |
144 |
} else { |
145 |
n_norm = out->mainBlock->numRows * out->row_block_size; |
146 |
} |
147 |
out->normalizer=MEMALLOC(n_norm,double); |
148 |
out->normalizer_is_valid=FALSE; |
149 |
if (! Esys_checkPtr(out->normalizer)) { |
150 |
#pragma omp parallel for private(i) schedule(static) |
151 |
for (i=0;i<n_norm;++i) out->normalizer[i]=0.; |
152 |
} |
153 |
} |
154 |
} |
155 |
} |
156 |
} |
157 |
/* all done: */ |
158 |
if (! Esys_noError()) { |
159 |
Paso_SystemMatrix_free(out); |
160 |
return NULL; |
161 |
} else { |
162 |
return out; |
163 |
} |
164 |
} |
165 |
|
166 |
/* returns a reference to Paso_SystemMatrix in */ |
167 |
|
168 |
Paso_SystemMatrix* Paso_SystemMatrix_getReference(Paso_SystemMatrix* in) { |
169 |
if (in!=NULL) ++(in->reference_counter); |
170 |
return in; |
171 |
} |
172 |
|
173 |
/* deallocates a SystemMatrix: */ |
174 |
|
175 |
void Paso_SystemMatrix_free(Paso_SystemMatrix* in) { |
176 |
if (in!=NULL) { |
177 |
in->reference_counter--; |
178 |
if (in->reference_counter<=0) { |
179 |
Paso_solve_free(in); |
180 |
Paso_SystemMatrixPattern_free(in->pattern); |
181 |
Paso_Distribution_free(in->row_distribution); |
182 |
Paso_Distribution_free(in->col_distribution); |
183 |
Esys_MPIInfo_free(in->mpi_info); |
184 |
Paso_Coupler_free(in->row_coupler); |
185 |
Paso_Coupler_free(in->col_coupler); |
186 |
Paso_SparseMatrix_free(in->mainBlock); |
187 |
Paso_SparseMatrix_free(in->col_coupleBlock); |
188 |
Paso_SparseMatrix_free(in->row_coupleBlock); |
189 |
MEMFREE(in->normalizer); |
190 |
Paso_solve_free(in); |
191 |
#ifdef TRILINOS |
192 |
Paso_TRILINOS_free(in->trilinos_data); |
193 |
#endif |
194 |
MEMFREE(in); |
195 |
#ifdef Paso_TRACE |
196 |
printf("Paso_SystemMatrix_free: system matrix as been deallocated.\n"); |
197 |
#endif |
198 |
} |
199 |
} |
200 |
} |
201 |
void Paso_SystemMatrix_startCollect(Paso_SystemMatrix* A,const double* in) |
202 |
{ |
203 |
Paso_SystemMatrix_startColCollect(A,in); |
204 |
} |
205 |
double* Paso_SystemMatrix_finishCollect(Paso_SystemMatrix* A) |
206 |
{ |
207 |
return Paso_SystemMatrix_finishColCollect(A); |
208 |
} |
209 |
|
210 |
void Paso_SystemMatrix_startColCollect(Paso_SystemMatrix* A,const double* in) |
211 |
{ |
212 |
Paso_Coupler_startCollect(A->col_coupler, in); |
213 |
} |
214 |
double* Paso_SystemMatrix_finishColCollect(Paso_SystemMatrix* A) |
215 |
{ |
216 |
Paso_Coupler_finishCollect(A->col_coupler); |
217 |
return A->col_coupler->recv_buffer; |
218 |
} |
219 |
void Paso_SystemMatrix_startRowCollect(Paso_SystemMatrix* A,const double* in) |
220 |
{ |
221 |
Paso_Coupler_startCollect(A->row_coupler, in); |
222 |
} |
223 |
double* Paso_SystemMatrix_finishRowCollect(Paso_SystemMatrix* A) |
224 |
{ |
225 |
Paso_Coupler_finishCollect(A->row_coupler); |
226 |
return A->row_coupler->recv_buffer; |
227 |
} |
228 |
|
229 |
dim_t Paso_SystemMatrix_getTotalNumRows(const Paso_SystemMatrix* A){ |
230 |
return A->mainBlock->numRows * A->row_block_size; |
231 |
} |
232 |
|
233 |
dim_t Paso_SystemMatrix_getTotalNumCols(const Paso_SystemMatrix* A){ |
234 |
return A->mainBlock->numCols * A->col_block_size; |
235 |
} |
236 |
dim_t Paso_SystemMatrix_getGlobalNumRows(Paso_SystemMatrix* A) { |
237 |
if (A->type & MATRIX_FORMAT_CSC) { |
238 |
return Paso_Distribution_getGlobalNumComponents(A->pattern->input_distribution); |
239 |
} else { |
240 |
return Paso_Distribution_getGlobalNumComponents(A->pattern->output_distribution); |
241 |
} |
242 |
} |
243 |
dim_t Paso_SystemMatrix_getGlobalNumCols(Paso_SystemMatrix* A) { |
244 |
if (A->type & MATRIX_FORMAT_CSC) { |
245 |
return Paso_Distribution_getGlobalNumComponents(A->pattern->output_distribution); |
246 |
} else { |
247 |
return Paso_Distribution_getGlobalNumComponents(A->pattern->input_distribution); |
248 |
} |
249 |
|
250 |
} |
251 |
dim_t Paso_SystemMatrix_getNumOutput(Paso_SystemMatrix* A) { |
252 |
return Paso_SystemMatrixPattern_getNumOutput(A->pattern); |
253 |
} |
254 |
|
255 |
index_t* Paso_SystemMatrix_borrowMainDiagonalPointer(Paso_SystemMatrix * A_p) |
256 |
{ |
257 |
index_t* out=NULL; |
258 |
int fail=0; |
259 |
out=Paso_SparseMatrix_borrowMainDiagonalPointer(A_p->mainBlock); |
260 |
if (out==NULL) fail=1; |
261 |
#ifdef ESYS_MPI |
262 |
{ |
263 |
int fail_loc = fail; |
264 |
MPI_Allreduce(&fail_loc, &fail, 1, MPI_INT, MPI_MAX, A_p->mpi_info->comm); |
265 |
} |
266 |
#endif |
267 |
if (fail>0) Esys_setError(VALUE_ERROR, "Paso_SystemMatrix_borrowMainDiagonalPointer: no main diagonal"); |
268 |
return out; |
269 |
} |
270 |
|
271 |
void Paso_SystemMatrix_makeZeroRowSums(Paso_SystemMatrix * A_p, double* left_over) |
272 |
{ |
273 |
index_t ir, ib, irow; |
274 |
register double rtmp1, rtmp2; |
275 |
const dim_t n = Paso_SystemMatrixPattern_getNumOutput(A_p->pattern); |
276 |
const dim_t nblk = A_p->block_size; |
277 |
const dim_t blk = A_p->row_block_size; |
278 |
const index_t* main_ptr=Paso_SystemMatrix_borrowMainDiagonalPointer(A_p); |
279 |
|
280 |
|
281 |
Paso_SystemMatrix_rowSum(A_p, left_over); /* left_over now hold the row sum */ |
282 |
|
283 |
#pragma omp parallel for private(ir,ib, rtmp1, rtmp2) schedule(static) |
284 |
for (ir=0;ir< n;ir++) { |
285 |
for (ib=0;ib<blk; ib++) { |
286 |
irow=ib+blk*ir; |
287 |
rtmp1=left_over[irow]; |
288 |
rtmp2=A_p->mainBlock->val[main_ptr[ir]*nblk+ib+blk*ib]; |
289 |
A_p->mainBlock->val[main_ptr[ir]*nblk+ib+blk*ib] = -rtmp1; |
290 |
left_over[irow]=rtmp2+rtmp1; |
291 |
} |
292 |
} |
293 |
} |
294 |
void Paso_SystemMatrix_copyBlockFromMainDiagonal(Paso_SystemMatrix * A_p, double* out) |
295 |
{ |
296 |
Paso_SparseMatrix_copyBlockFromMainDiagonal(A_p->mainBlock, out); |
297 |
return; |
298 |
} |
299 |
void Paso_SystemMatrix_copyBlockToMainDiagonal(Paso_SystemMatrix * A_p, const double* in) |
300 |
{ |
301 |
Paso_SparseMatrix_copyBlockToMainDiagonal(A_p->mainBlock, in); |
302 |
return; |
303 |
} |
304 |
void Paso_SystemMatrix_copyFromMainDiagonal(Paso_SystemMatrix * A_p, double* out) |
305 |
{ |
306 |
Paso_SparseMatrix_copyFromMainDiagonal(A_p->mainBlock, out); |
307 |
return; |
308 |
} |
309 |
void Paso_SystemMatrix_copyToMainDiagonal(Paso_SystemMatrix * A_p, const double* in) |
310 |
{ |
311 |
Paso_SparseMatrix_copyToMainDiagonal(A_p->mainBlock, in); |
312 |
return; |
313 |
} |
314 |
|
315 |
void Paso_SystemMatrix_setPreconditioner(Paso_SystemMatrix* A,Paso_Options* options) { |
316 |
if (A->solver_p==NULL) { |
317 |
A->solver_p=Paso_Preconditioner_alloc(A,options); |
318 |
} |
319 |
} |
320 |
|
321 |
/* applies the preconditioner */ |
322 |
/* has to be called within a parallel reqion */ |
323 |
/* barrier synchronization is performed before the evaluation to make sure that the input vector is available */ |
324 |
void Paso_SystemMatrix_solvePreconditioner(Paso_SystemMatrix* A,double* x,double* b){ |
325 |
Paso_Preconditioner* prec=(Paso_Preconditioner*) A->solver_p; |
326 |
Paso_Preconditioner_solve(prec, A,x,b); |
327 |
} |
328 |
void Paso_SystemMatrix_freePreconditioner(Paso_SystemMatrix* A) { |
329 |
Paso_Preconditioner* prec=NULL; |
330 |
if (A!=NULL) { |
331 |
prec=(Paso_Preconditioner*) A->solver_p; |
332 |
Paso_Preconditioner_free(prec); |
333 |
A->solver_p=NULL; |
334 |
} |
335 |
} |