1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2010 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
/**************************************************************/ |
16 |
|
17 |
/* Paso: SystemMatrix */ |
18 |
|
19 |
/**************************************************************/ |
20 |
|
21 |
/* Author: Lutz Gross, l.gross@uq.edu.au */ |
22 |
|
23 |
/**************************************************************/ |
24 |
|
25 |
#include "SystemMatrix.h" |
26 |
#include "Preconditioner.h" |
27 |
|
28 |
/**************************************************************/ |
29 |
|
30 |
/* allocates a SystemMatrix of type type using the given matrix pattern |
31 |
Values are initialized by zero. |
32 |
if patternIsUnrolled and type & MATRIX_FORMAT_BLK1, it is assumed that the pattern is allready unrolled to match the requested block size |
33 |
and offsets otherwise unrolling and offset adjustment will be performed. |
34 |
*/ |
35 |
|
36 |
Paso_SystemMatrix* Paso_SystemMatrix_alloc(Paso_SystemMatrixType type,Paso_SystemMatrixPattern *pattern, int row_block_size, int col_block_size, |
37 |
const bool_t patternIsUnrolled) { |
38 |
|
39 |
Paso_SystemMatrix*out=NULL; |
40 |
dim_t n_norm,i; |
41 |
Paso_SystemMatrixType pattern_format_out; |
42 |
bool_t unroll=FALSE; |
43 |
|
44 |
pattern_format_out= (type & MATRIX_FORMAT_OFFSET1)? MATRIX_FORMAT_OFFSET1: MATRIX_FORMAT_DEFAULT; |
45 |
Esys_resetError(); |
46 |
if (patternIsUnrolled) { |
47 |
if ( ! XNOR(type & MATRIX_FORMAT_OFFSET1, pattern->type & MATRIX_FORMAT_OFFSET1) ) { |
48 |
Esys_setError(TYPE_ERROR,"Paso_SystemMatrix_alloc: requested offset and pattern offset does not match."); |
49 |
return NULL; |
50 |
} |
51 |
} |
52 |
/* do we need to apply unrolling ? */ |
53 |
unroll |
54 |
/* we don't like non-square blocks */ |
55 |
= (row_block_size!=col_block_size) |
56 |
/* or any block size bigger than 3 */ |
57 |
|| (col_block_size>3) |
58 |
/* or if lock size one requested and the block size is not 1 */ |
59 |
|| ((type & MATRIX_FORMAT_BLK1) && (col_block_size>1) ) |
60 |
/* or the offsets are wrong */ |
61 |
|| ((type & MATRIX_FORMAT_OFFSET1) != ( pattern->type & MATRIX_FORMAT_OFFSET1)); |
62 |
|
63 |
out=MEMALLOC(1,Paso_SystemMatrix); |
64 |
if (! Esys_checkPtr(out)) { |
65 |
out->type=type; |
66 |
out->pattern=NULL; |
67 |
out->row_distribution=NULL; |
68 |
out->col_distribution=NULL; |
69 |
out->mpi_info=Esys_MPIInfo_getReference(pattern->mpi_info); |
70 |
out->row_coupler=NULL; |
71 |
out->col_coupler=NULL; |
72 |
out->mainBlock=NULL; |
73 |
out->row_coupleBlock=NULL; |
74 |
out->col_coupleBlock=NULL; |
75 |
out->normalizer_is_valid=FALSE; |
76 |
out->normalizer=NULL; |
77 |
out->solver_package=PASO_PASO; |
78 |
out->solver_p=NULL; |
79 |
out->trilinos_data=NULL; |
80 |
out->reference_counter=1; |
81 |
out->logical_row_block_size=row_block_size; |
82 |
out->logical_col_block_size=col_block_size; |
83 |
|
84 |
|
85 |
if (type & MATRIX_FORMAT_CSC) { |
86 |
printf("MATRIX_FORMAT_CSC\n"); |
87 |
if (unroll) { |
88 |
if (patternIsUnrolled) { |
89 |
out->pattern=Paso_SystemMatrixPattern_getReference(pattern); |
90 |
} else { |
91 |
out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,col_block_size,row_block_size); |
92 |
} |
93 |
out->row_block_size=1; |
94 |
out->col_block_size=1; |
95 |
} else { |
96 |
out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,1,1); |
97 |
out->row_block_size=row_block_size; |
98 |
out->col_block_size=col_block_size; |
99 |
} |
100 |
if (Esys_noError()) { |
101 |
out->row_distribution=Paso_Distribution_getReference(out->pattern->input_distribution); |
102 |
out->col_distribution=Paso_Distribution_getReference(out->pattern->output_distribution); |
103 |
} |
104 |
} else { |
105 |
if (unroll) { |
106 |
if (patternIsUnrolled) { |
107 |
out->pattern=Paso_SystemMatrixPattern_getReference(pattern); |
108 |
} else { |
109 |
out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,row_block_size,col_block_size); |
110 |
} |
111 |
out->row_block_size=1; |
112 |
out->col_block_size=1; |
113 |
} else { |
114 |
out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,1,1); |
115 |
out->row_block_size=row_block_size; |
116 |
out->col_block_size=col_block_size; |
117 |
} |
118 |
if (Esys_noError()) { |
119 |
out->row_distribution=Paso_Distribution_getReference(out->pattern->output_distribution); |
120 |
out->col_distribution=Paso_Distribution_getReference(out->pattern->input_distribution); |
121 |
} |
122 |
} |
123 |
if (Esys_noError()) { |
124 |
out->block_size=out->row_block_size*out->col_block_size; |
125 |
out->col_coupler=Paso_Coupler_alloc(pattern->col_connector,out->col_block_size); |
126 |
out->row_coupler=Paso_Coupler_alloc(pattern->row_connector,out->row_block_size); |
127 |
/* this should be bypassed if trilinos is used */ |
128 |
if (type & MATRIX_FORMAT_TRILINOS_CRS) { |
129 |
#ifdef TRILINOS |
130 |
out->trilinos_data=Paso_TRILINOS_alloc(); |
131 |
#endif |
132 |
} else { |
133 |
out->solver_package=PASO_PASO; |
134 |
out->mainBlock=Paso_SparseMatrix_alloc(type,out->pattern->mainPattern,row_block_size,col_block_size,TRUE); |
135 |
out->col_coupleBlock=Paso_SparseMatrix_alloc(type,out->pattern->col_couplePattern,row_block_size,col_block_size,TRUE); |
136 |
out->row_coupleBlock=Paso_SparseMatrix_alloc(type,out->pattern->row_couplePattern,row_block_size,col_block_size,TRUE); |
137 |
if (Esys_noError()) { |
138 |
/* allocate memory for matrix entries */ |
139 |
if (type & MATRIX_FORMAT_CSC) { |
140 |
n_norm = out->mainBlock->numCols * out->col_block_size; |
141 |
} else { |
142 |
n_norm = out->mainBlock->numRows * out->row_block_size; |
143 |
} |
144 |
out->normalizer=MEMALLOC(n_norm,double); |
145 |
out->normalizer_is_valid=FALSE; |
146 |
if (! Esys_checkPtr(out->normalizer)) { |
147 |
#pragma omp parallel for private(i) schedule(static) |
148 |
for (i=0;i<n_norm;++i) out->normalizer[i]=0.; |
149 |
} |
150 |
} |
151 |
} |
152 |
} |
153 |
} |
154 |
/* all done: */ |
155 |
if (! Esys_noError()) { |
156 |
Paso_SystemMatrix_free(out); |
157 |
return NULL; |
158 |
} else { |
159 |
return out; |
160 |
} |
161 |
} |
162 |
|
163 |
/* returns a reference to Paso_SystemMatrix in */ |
164 |
|
165 |
Paso_SystemMatrix* Paso_SystemMatrix_getReference(Paso_SystemMatrix* in) { |
166 |
if (in!=NULL) ++(in->reference_counter); |
167 |
return in; |
168 |
} |
169 |
|
170 |
/* deallocates a SystemMatrix: */ |
171 |
|
172 |
void Paso_SystemMatrix_free(Paso_SystemMatrix* in) { |
173 |
if (in!=NULL) { |
174 |
in->reference_counter--; |
175 |
if (in->reference_counter<=0) { |
176 |
Paso_solve_free(in); |
177 |
Paso_SystemMatrixPattern_free(in->pattern); |
178 |
Paso_Distribution_free(in->row_distribution); |
179 |
Paso_Distribution_free(in->col_distribution); |
180 |
Esys_MPIInfo_free(in->mpi_info); |
181 |
Paso_Coupler_free(in->row_coupler); |
182 |
Paso_Coupler_free(in->col_coupler); |
183 |
Paso_SparseMatrix_free(in->mainBlock); |
184 |
Paso_SparseMatrix_free(in->col_coupleBlock); |
185 |
Paso_SparseMatrix_free(in->row_coupleBlock); |
186 |
MEMFREE(in->normalizer); |
187 |
Paso_solve_free(in); |
188 |
#ifdef TRILINOS |
189 |
Paso_TRILINOS_free(in->trilinos_data); |
190 |
#endif |
191 |
MEMFREE(in); |
192 |
#ifdef Paso_TRACE |
193 |
printf("Paso_SystemMatrix_free: system matrix as been deallocated.\n"); |
194 |
#endif |
195 |
} |
196 |
} |
197 |
} |
198 |
void Paso_SystemMatrix_startCollect(Paso_SystemMatrix* A,const double* in) |
199 |
{ |
200 |
Paso_SystemMatrix_startColCollect(A,in); |
201 |
} |
202 |
double* Paso_SystemMatrix_finishCollect(Paso_SystemMatrix* A) |
203 |
{ |
204 |
return Paso_SystemMatrix_finishColCollect(A); |
205 |
} |
206 |
|
207 |
void Paso_SystemMatrix_startColCollect(Paso_SystemMatrix* A,const double* in) |
208 |
{ |
209 |
Paso_Coupler_startCollect(A->col_coupler, in); |
210 |
} |
211 |
double* Paso_SystemMatrix_finishColCollect(Paso_SystemMatrix* A) |
212 |
{ |
213 |
Paso_Coupler_finishCollect(A->col_coupler); |
214 |
return A->col_coupler->recv_buffer; |
215 |
} |
216 |
void Paso_SystemMatrix_startRowCollect(Paso_SystemMatrix* A,const double* in) |
217 |
{ |
218 |
Paso_Coupler_startCollect(A->row_coupler, in); |
219 |
} |
220 |
double* Paso_SystemMatrix_finishRowCollect(Paso_SystemMatrix* A) |
221 |
{ |
222 |
Paso_Coupler_finishCollect(A->row_coupler); |
223 |
return A->row_coupler->recv_buffer; |
224 |
} |
225 |
|
226 |
dim_t Paso_SystemMatrix_getTotalNumRows(const Paso_SystemMatrix* A){ |
227 |
return A->mainBlock->numRows * A->row_block_size; |
228 |
} |
229 |
|
230 |
dim_t Paso_SystemMatrix_getTotalNumCols(const Paso_SystemMatrix* A){ |
231 |
return A->mainBlock->numCols * A->col_block_size; |
232 |
} |
233 |
dim_t Paso_SystemMatrix_getGlobalNumRows(Paso_SystemMatrix* A) { |
234 |
if (A->type & MATRIX_FORMAT_CSC) { |
235 |
return Paso_Distribution_getGlobalNumComponents(A->pattern->input_distribution); |
236 |
} else { |
237 |
return Paso_Distribution_getGlobalNumComponents(A->pattern->output_distribution); |
238 |
} |
239 |
} |
240 |
dim_t Paso_SystemMatrix_getGlobalNumCols(Paso_SystemMatrix* A) { |
241 |
if (A->type & MATRIX_FORMAT_CSC) { |
242 |
return Paso_Distribution_getGlobalNumComponents(A->pattern->output_distribution); |
243 |
} else { |
244 |
return Paso_Distribution_getGlobalNumComponents(A->pattern->input_distribution); |
245 |
} |
246 |
|
247 |
} |
248 |
dim_t Paso_SystemMatrix_getNumOutput(Paso_SystemMatrix* A) { |
249 |
return Paso_SystemMatrixPattern_getNumOutput(A->pattern); |
250 |
} |
251 |
|
252 |
index_t* Paso_SystemMatrix_borrowMainDiagonalPointer(Paso_SystemMatrix * A_p) |
253 |
{ |
254 |
index_t* out=NULL; |
255 |
int fail=0; |
256 |
out=Paso_SparseMatrix_borrowMainDiagonalPointer(A_p->mainBlock); |
257 |
if (out==NULL) fail=1; |
258 |
#ifdef ESYS_MPI |
259 |
{ |
260 |
int fail_loc = fail; |
261 |
MPI_Allreduce(&fail_loc, &fail, 1, MPI_INT, MPI_MAX, A_p->mpi_info->comm); |
262 |
} |
263 |
#endif |
264 |
if (fail>0) Esys_setError(VALUE_ERROR, "Paso_SystemMatrix_borrowMainDiagonalPointer: no main diagonal"); |
265 |
return out; |
266 |
} |
267 |
|
268 |
void Paso_SystemMatrix_makeZeroRowSums(Paso_SystemMatrix * A_p, double* left_over) |
269 |
{ |
270 |
index_t ir, ib, irow; |
271 |
register double rtmp1, rtmp2; |
272 |
const dim_t n = Paso_SystemMatrixPattern_getNumOutput(A_p->pattern); |
273 |
const dim_t nblk = A_p->block_size; |
274 |
const dim_t blk = A_p->row_block_size; |
275 |
const index_t* main_ptr=Paso_SystemMatrix_borrowMainDiagonalPointer(A_p); |
276 |
|
277 |
|
278 |
Paso_SystemMatrix_rowSum(A_p, left_over); /* left_over now hold the row sum */ |
279 |
|
280 |
#pragma omp parallel for private(ir,ib, rtmp1, rtmp2) schedule(static) |
281 |
for (ir=0;ir< n;ir++) { |
282 |
for (ib=0;ib<blk; ib++) { |
283 |
irow=ib+blk*ir; |
284 |
rtmp1=left_over[irow]; |
285 |
rtmp2=A_p->mainBlock->val[main_ptr[ir]*nblk+ib+blk*ib]; |
286 |
A_p->mainBlock->val[main_ptr[ir]*nblk+ib+blk*ib] = -rtmp1; |
287 |
left_over[irow]=rtmp2+rtmp1; |
288 |
} |
289 |
} |
290 |
} |
291 |
void Paso_SystemMatrix_copyBlockFromMainDiagonal(Paso_SystemMatrix * A_p, double* out) |
292 |
{ |
293 |
Paso_SparseMatrix_copyBlockFromMainDiagonal(A_p->mainBlock, out); |
294 |
return; |
295 |
} |
296 |
void Paso_SystemMatrix_copyBlockToMainDiagonal(Paso_SystemMatrix * A_p, const double* in) |
297 |
{ |
298 |
Paso_SparseMatrix_copyBlockToMainDiagonal(A_p->mainBlock, in); |
299 |
return; |
300 |
} |
301 |
void Paso_SystemMatrix_copyFromMainDiagonal(Paso_SystemMatrix * A_p, double* out) |
302 |
{ |
303 |
Paso_SparseMatrix_copyFromMainDiagonal(A_p->mainBlock, out); |
304 |
return; |
305 |
} |
306 |
void Paso_SystemMatrix_copyToMainDiagonal(Paso_SystemMatrix * A_p, const double* in) |
307 |
{ |
308 |
Paso_SparseMatrix_copyToMainDiagonal(A_p->mainBlock, in); |
309 |
return; |
310 |
} |
311 |
|
312 |
void Paso_SystemMatrix_setPreconditioner(Paso_SystemMatrix* A,Paso_Options* options) { |
313 |
if (A->solver_p==NULL) { |
314 |
A->solver_p=Paso_Preconditioner_alloc(A,options); |
315 |
} |
316 |
} |
317 |
|
318 |
/* applies the preconditioner */ |
319 |
/* has to be called within a parallel reqion */ |
320 |
/* barrier synchronization is performed before the evaluation to make sure that the input vector is available */ |
321 |
void Paso_SystemMatrix_solvePreconditioner(Paso_SystemMatrix* A,double* x,double* b){ |
322 |
Paso_Preconditioner* prec=(Paso_Preconditioner*) A->solver_p; |
323 |
Paso_Preconditioner_solve(prec, A,x,b); |
324 |
} |
325 |
void Paso_SystemMatrix_freePreconditioner(Paso_SystemMatrix* A) { |
326 |
Paso_Preconditioner* prec=NULL; |
327 |
if (A!=NULL) { |
328 |
prec=(Paso_Preconditioner*) A->solver_p; |
329 |
Paso_Preconditioner_free(prec); |
330 |
A->solver_p=NULL; |
331 |
} |
332 |
} |