1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2010 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
/**************************************************************/ |
16 |
|
17 |
/* Paso: SystemMatrix */ |
18 |
|
19 |
/**************************************************************/ |
20 |
|
21 |
/* Author: Lutz Gross, l.gross@uq.edu.au */ |
22 |
|
23 |
/**************************************************************/ |
24 |
|
25 |
#include "SystemMatrix.h" |
26 |
#include "Preconditioner.h" |
27 |
|
28 |
/**************************************************************/ |
29 |
|
30 |
/* allocates a SystemMatrix of type type using the given matrix pattern |
31 |
Values are initialized by zero. |
32 |
if patternIsUnrolled and type & MATRIX_FORMAT_BLK1, it is assumed that the pattern is allready unrolled to match the requested block size |
33 |
and offsets otherwise unrolling and offset adjustment will be performed. |
34 |
*/ |
35 |
|
36 |
Paso_SystemMatrix* Paso_SystemMatrix_alloc(Paso_SystemMatrixType type,Paso_SystemMatrixPattern *pattern, int row_block_size, int col_block_size, |
37 |
const bool_t patternIsUnrolled) { |
38 |
|
39 |
Paso_SystemMatrix*out=NULL; |
40 |
dim_t n_norm,i; |
41 |
Paso_SystemMatrixType pattern_format_out; |
42 |
bool_t unroll=FALSE; |
43 |
|
44 |
pattern_format_out= (type & MATRIX_FORMAT_OFFSET1)? MATRIX_FORMAT_OFFSET1: MATRIX_FORMAT_DEFAULT; |
45 |
Esys_resetError(); |
46 |
if (patternIsUnrolled) { |
47 |
if ( ! XNOR(type & MATRIX_FORMAT_OFFSET1, pattern->type & MATRIX_FORMAT_OFFSET1) ) { |
48 |
Esys_setError(TYPE_ERROR,"Paso_SystemMatrix_alloc: requested offset and pattern offset does not match."); |
49 |
return NULL; |
50 |
} |
51 |
} |
52 |
/* do we need to apply unrolling ? */ |
53 |
unroll |
54 |
/* we don't like non-square blocks */ |
55 |
= (row_block_size!=col_block_size) |
56 |
/* or any block size bigger than 3 */ |
57 |
|| (col_block_size>3) |
58 |
/* or if lock size one requested and the block size is not 1 */ |
59 |
|| ((type & MATRIX_FORMAT_BLK1) && (col_block_size>1) ) |
60 |
/* or the offsets are wrong */ |
61 |
|| ((type & MATRIX_FORMAT_OFFSET1) != ( pattern->type & MATRIX_FORMAT_OFFSET1)); |
62 |
|
63 |
out=MEMALLOC(1,Paso_SystemMatrix); |
64 |
if (! Esys_checkPtr(out)) { |
65 |
out->type=type; |
66 |
out->pattern=NULL; |
67 |
out->row_distribution=NULL; |
68 |
out->col_distribution=NULL; |
69 |
out->mpi_info=Esys_MPIInfo_getReference(pattern->mpi_info); |
70 |
out->row_coupler=NULL; |
71 |
out->col_coupler=NULL; |
72 |
out->mainBlock=NULL; |
73 |
out->row_coupleBlock=NULL; |
74 |
out->col_coupleBlock=NULL; |
75 |
out->is_balanced=FALSE; |
76 |
out->balance_vector=NULL; |
77 |
out->solver_package=PASO_PASO; |
78 |
out->solver_p=NULL; |
79 |
out->trilinos_data=NULL; |
80 |
out->reference_counter=1; |
81 |
out->logical_row_block_size=row_block_size; |
82 |
out->logical_col_block_size=col_block_size; |
83 |
|
84 |
|
85 |
if (type & MATRIX_FORMAT_CSC) { |
86 |
if (unroll) { |
87 |
if (patternIsUnrolled) { |
88 |
out->pattern=Paso_SystemMatrixPattern_getReference(pattern); |
89 |
} else { |
90 |
out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,col_block_size,row_block_size); |
91 |
} |
92 |
out->row_block_size=1; |
93 |
out->col_block_size=1; |
94 |
} else { |
95 |
out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,1,1); |
96 |
out->row_block_size=row_block_size; |
97 |
out->col_block_size=col_block_size; |
98 |
} |
99 |
if (Esys_noError()) { |
100 |
out->row_distribution=Paso_Distribution_getReference(out->pattern->input_distribution); |
101 |
out->col_distribution=Paso_Distribution_getReference(out->pattern->output_distribution); |
102 |
} |
103 |
} else { |
104 |
if (unroll) { |
105 |
if (patternIsUnrolled) { |
106 |
out->pattern=Paso_SystemMatrixPattern_getReference(pattern); |
107 |
} else { |
108 |
out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,row_block_size,col_block_size); |
109 |
} |
110 |
out->row_block_size=1; |
111 |
out->col_block_size=1; |
112 |
} else { |
113 |
out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,1,1); |
114 |
out->row_block_size=row_block_size; |
115 |
out->col_block_size=col_block_size; |
116 |
} |
117 |
if (Esys_noError()) { |
118 |
out->row_distribution=Paso_Distribution_getReference(out->pattern->output_distribution); |
119 |
out->col_distribution=Paso_Distribution_getReference(out->pattern->input_distribution); |
120 |
} |
121 |
} |
122 |
if (Esys_noError()) { |
123 |
out->block_size=out->row_block_size*out->col_block_size; |
124 |
out->col_coupler=Paso_Coupler_alloc(pattern->col_connector,out->col_block_size); |
125 |
out->row_coupler=Paso_Coupler_alloc(pattern->row_connector,out->row_block_size); |
126 |
/* this should be bypassed if trilinos is used */ |
127 |
if (type & MATRIX_FORMAT_TRILINOS_CRS) { |
128 |
#ifdef TRILINOS |
129 |
out->trilinos_data=Paso_TRILINOS_alloc(); |
130 |
#endif |
131 |
} else { |
132 |
out->solver_package=PASO_PASO; |
133 |
out->mainBlock=Paso_SparseMatrix_alloc(type,out->pattern->mainPattern,row_block_size,col_block_size,TRUE); |
134 |
out->col_coupleBlock=Paso_SparseMatrix_alloc(type,out->pattern->col_couplePattern,row_block_size,col_block_size,TRUE); |
135 |
out->row_coupleBlock=Paso_SparseMatrix_alloc(type,out->pattern->row_couplePattern,row_block_size,col_block_size,TRUE); |
136 |
if (Esys_noError()) { |
137 |
/* allocate memory for matrix entries */ |
138 |
n_norm = MAX(out->mainBlock->numCols * out->col_block_size, out->mainBlock->numRows * out->row_block_size); |
139 |
out->balance_vector=MEMALLOC(n_norm,double); |
140 |
out->is_balanced=FALSE; |
141 |
if (! Esys_checkPtr(out->balance_vector)) { |
142 |
#pragma omp parallel for private(i) schedule(static) |
143 |
for (i=0;i<n_norm;++i) out->balance_vector[i]=1.; |
144 |
} |
145 |
} |
146 |
} |
147 |
} |
148 |
} |
149 |
/* all done: */ |
150 |
if (! Esys_noError()) { |
151 |
Paso_SystemMatrix_free(out); |
152 |
return NULL; |
153 |
} else { |
154 |
return out; |
155 |
} |
156 |
} |
157 |
|
158 |
/* returns a reference to Paso_SystemMatrix in */ |
159 |
|
160 |
Paso_SystemMatrix* Paso_SystemMatrix_getReference(Paso_SystemMatrix* in) { |
161 |
if (in!=NULL) ++(in->reference_counter); |
162 |
return in; |
163 |
} |
164 |
|
165 |
/* deallocates a SystemMatrix: */ |
166 |
|
167 |
void Paso_SystemMatrix_free(Paso_SystemMatrix* in) { |
168 |
if (in!=NULL) { |
169 |
in->reference_counter--; |
170 |
if (in->reference_counter<=0) { |
171 |
Paso_solve_free(in); |
172 |
Paso_SystemMatrixPattern_free(in->pattern); |
173 |
Paso_Distribution_free(in->row_distribution); |
174 |
Paso_Distribution_free(in->col_distribution); |
175 |
Esys_MPIInfo_free(in->mpi_info); |
176 |
Paso_Coupler_free(in->row_coupler); |
177 |
Paso_Coupler_free(in->col_coupler); |
178 |
Paso_SparseMatrix_free(in->mainBlock); |
179 |
Paso_SparseMatrix_free(in->col_coupleBlock); |
180 |
Paso_SparseMatrix_free(in->row_coupleBlock); |
181 |
MEMFREE(in->balance_vector); |
182 |
Paso_solve_free(in); |
183 |
#ifdef TRILINOS |
184 |
Paso_TRILINOS_free(in->trilinos_data); |
185 |
#endif |
186 |
MEMFREE(in); |
187 |
#ifdef Paso_TRACE |
188 |
printf("Paso_SystemMatrix_free: system matrix as been deallocated.\n"); |
189 |
#endif |
190 |
} |
191 |
} |
192 |
} |
193 |
void Paso_SystemMatrix_startCollect(Paso_SystemMatrix* A,const double* in) |
194 |
{ |
195 |
Paso_SystemMatrix_startColCollect(A,in); |
196 |
} |
197 |
double* Paso_SystemMatrix_finishCollect(Paso_SystemMatrix* A) |
198 |
{ |
199 |
return Paso_SystemMatrix_finishColCollect(A); |
200 |
} |
201 |
|
202 |
void Paso_SystemMatrix_startColCollect(Paso_SystemMatrix* A,const double* in) |
203 |
{ |
204 |
Paso_Coupler_startCollect(A->col_coupler, in); |
205 |
} |
206 |
double* Paso_SystemMatrix_finishColCollect(Paso_SystemMatrix* A) |
207 |
{ |
208 |
Paso_Coupler_finishCollect(A->col_coupler); |
209 |
return A->col_coupler->recv_buffer; |
210 |
} |
211 |
void Paso_SystemMatrix_startRowCollect(Paso_SystemMatrix* A,const double* in) |
212 |
{ |
213 |
Paso_Coupler_startCollect(A->row_coupler, in); |
214 |
} |
215 |
double* Paso_SystemMatrix_finishRowCollect(Paso_SystemMatrix* A) |
216 |
{ |
217 |
Paso_Coupler_finishCollect(A->row_coupler); |
218 |
return A->row_coupler->recv_buffer; |
219 |
} |
220 |
|
221 |
dim_t Paso_SystemMatrix_getTotalNumRows(const Paso_SystemMatrix* A){ |
222 |
return A->mainBlock->numRows * A->row_block_size; |
223 |
} |
224 |
|
225 |
dim_t Paso_SystemMatrix_getTotalNumCols(const Paso_SystemMatrix* A){ |
226 |
return A->mainBlock->numCols * A->col_block_size; |
227 |
} |
228 |
dim_t Paso_SystemMatrix_getGlobalNumRows(Paso_SystemMatrix* A) { |
229 |
if (A->type & MATRIX_FORMAT_CSC) { |
230 |
return Paso_Distribution_getGlobalNumComponents(A->pattern->input_distribution); |
231 |
} else { |
232 |
return Paso_Distribution_getGlobalNumComponents(A->pattern->output_distribution); |
233 |
} |
234 |
} |
235 |
dim_t Paso_SystemMatrix_getGlobalNumCols(Paso_SystemMatrix* A) { |
236 |
if (A->type & MATRIX_FORMAT_CSC) { |
237 |
return Paso_Distribution_getGlobalNumComponents(A->pattern->output_distribution); |
238 |
} else { |
239 |
return Paso_Distribution_getGlobalNumComponents(A->pattern->input_distribution); |
240 |
} |
241 |
|
242 |
} |
243 |
dim_t Paso_SystemMatrix_getNumOutput(Paso_SystemMatrix* A) { |
244 |
return Paso_SystemMatrixPattern_getNumOutput(A->pattern); |
245 |
} |
246 |
|
247 |
index_t* Paso_SystemMatrix_borrowMainDiagonalPointer(Paso_SystemMatrix * A_p) |
248 |
{ |
249 |
index_t* out=NULL; |
250 |
int fail=0; |
251 |
out=Paso_SparseMatrix_borrowMainDiagonalPointer(A_p->mainBlock); |
252 |
if (out==NULL) fail=1; |
253 |
#ifdef ESYS_MPI |
254 |
{ |
255 |
int fail_loc = fail; |
256 |
MPI_Allreduce(&fail_loc, &fail, 1, MPI_INT, MPI_MAX, A_p->mpi_info->comm); |
257 |
} |
258 |
#endif |
259 |
if (fail>0) Esys_setError(VALUE_ERROR, "Paso_SystemMatrix_borrowMainDiagonalPointer: no main diagonal"); |
260 |
return out; |
261 |
} |
262 |
|
263 |
void Paso_SystemMatrix_makeZeroRowSums(Paso_SystemMatrix * A_p, double* left_over) |
264 |
{ |
265 |
index_t ir, ib, irow; |
266 |
register double rtmp1, rtmp2; |
267 |
const dim_t n = Paso_SystemMatrixPattern_getNumOutput(A_p->pattern); |
268 |
const dim_t nblk = A_p->block_size; |
269 |
const dim_t blk = A_p->row_block_size; |
270 |
const index_t* main_ptr=Paso_SystemMatrix_borrowMainDiagonalPointer(A_p); |
271 |
|
272 |
|
273 |
Paso_SystemMatrix_rowSum(A_p, left_over); /* left_over now hold the row sum */ |
274 |
|
275 |
#pragma omp parallel for private(ir,ib, rtmp1, rtmp2) schedule(static) |
276 |
for (ir=0;ir< n;ir++) { |
277 |
for (ib=0;ib<blk; ib++) { |
278 |
irow=ib+blk*ir; |
279 |
rtmp1=left_over[irow]; |
280 |
rtmp2=A_p->mainBlock->val[main_ptr[ir]*nblk+ib+blk*ib]; |
281 |
A_p->mainBlock->val[main_ptr[ir]*nblk+ib+blk*ib] = -rtmp1; |
282 |
left_over[irow]=rtmp2+rtmp1; |
283 |
} |
284 |
} |
285 |
} |
286 |
void Paso_SystemMatrix_copyBlockFromMainDiagonal(Paso_SystemMatrix * A_p, double* out) |
287 |
{ |
288 |
Paso_SparseMatrix_copyBlockFromMainDiagonal(A_p->mainBlock, out); |
289 |
return; |
290 |
} |
291 |
void Paso_SystemMatrix_copyBlockToMainDiagonal(Paso_SystemMatrix * A_p, const double* in) |
292 |
{ |
293 |
Paso_SparseMatrix_copyBlockToMainDiagonal(A_p->mainBlock, in); |
294 |
return; |
295 |
} |
296 |
void Paso_SystemMatrix_copyFromMainDiagonal(Paso_SystemMatrix * A_p, double* out) |
297 |
{ |
298 |
Paso_SparseMatrix_copyFromMainDiagonal(A_p->mainBlock, out); |
299 |
return; |
300 |
} |
301 |
void Paso_SystemMatrix_copyToMainDiagonal(Paso_SystemMatrix * A_p, const double* in) |
302 |
{ |
303 |
Paso_SparseMatrix_copyToMainDiagonal(A_p->mainBlock, in); |
304 |
return; |
305 |
} |
306 |
|
307 |
void Paso_SystemMatrix_setPreconditioner(Paso_SystemMatrix* A,Paso_Options* options) { |
308 |
if (A->solver_p==NULL) { |
309 |
A->solver_p=Paso_Preconditioner_alloc(A,options); |
310 |
} |
311 |
} |
312 |
|
313 |
/* applies the preconditioner */ |
314 |
/* has to be called within a parallel reqion */ |
315 |
/* barrier synchronization is performed before the evaluation to make sure that the input vector is available */ |
316 |
void Paso_SystemMatrix_solvePreconditioner(Paso_SystemMatrix* A,double* x,double* b){ |
317 |
Paso_Preconditioner* prec=(Paso_Preconditioner*) A->solver_p; |
318 |
Paso_Preconditioner_solve(prec, A,x,b); |
319 |
} |
320 |
void Paso_SystemMatrix_freePreconditioner(Paso_SystemMatrix* A) { |
321 |
Paso_Preconditioner* prec=NULL; |
322 |
if (A!=NULL) { |
323 |
prec=(Paso_Preconditioner*) A->solver_p; |
324 |
Paso_Preconditioner_free(prec); |
325 |
A->solver_p=NULL; |
326 |
} |
327 |
} |