1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2009 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
/**************************************************************/ |
16 |
|
17 |
/* Paso: AMG preconditioner */ |
18 |
|
19 |
/**************************************************************/ |
20 |
|
21 |
/* Author: artak@uq.edu.au */ |
22 |
|
23 |
/**************************************************************/ |
24 |
|
25 |
#include "Paso.h" |
26 |
#include "Solver.h" |
27 |
#include "Options.h" |
28 |
#include "PasoUtil.h" |
29 |
#include "UMFPACK.h" |
30 |
#include "MKL.h" |
31 |
#include "SystemMatrix.h" |
32 |
#include "Pattern_coupling.h" |
33 |
|
34 |
/**************************************************************/ |
35 |
|
36 |
/* free all memory used by AMG */ |
37 |
|
38 |
void Paso_Solver_AMG_free(Paso_Solver_AMG * in) { |
39 |
if (in!=NULL) { |
40 |
Paso_Solver_AMG_free(in->AMG_of_Schur); |
41 |
Paso_Solver_Jacobi_free(in->GS); |
42 |
MEMFREE(in->inv_A_FF); |
43 |
MEMFREE(in->A_FF_pivot); |
44 |
Paso_SparseMatrix_free(in->A_FC); |
45 |
Paso_SparseMatrix_free(in->A_CF); |
46 |
Paso_SparseMatrix_free(in->A); |
47 |
MEMFREE(in->rows_in_F); |
48 |
MEMFREE(in->rows_in_C); |
49 |
MEMFREE(in->mask_F); |
50 |
MEMFREE(in->mask_C); |
51 |
MEMFREE(in->x_F); |
52 |
MEMFREE(in->b_F); |
53 |
MEMFREE(in->x_C); |
54 |
MEMFREE(in->b_C); |
55 |
#ifdef MKL |
56 |
#else |
57 |
#ifdef UMFPACK |
58 |
Paso_UMFPACK1_free((Paso_UMFPACK_Handler*)(in->solver)); |
59 |
#endif |
60 |
#endif |
61 |
in->solver=NULL; |
62 |
MEMFREE(in->b_C); |
63 |
MEMFREE(in); |
64 |
} |
65 |
} |
66 |
|
67 |
/**************************************************************/ |
68 |
|
69 |
/* constructs the block-block factorization of |
70 |
|
71 |
[ A_FF A_FC ] |
72 |
A_p= |
73 |
[ A_CF A_FF ] |
74 |
|
75 |
to |
76 |
|
77 |
[ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ] |
78 |
[ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] |
79 |
|
80 |
|
81 |
where S=A_FF-ACF*invA_FF*A_FC within the shape of S |
82 |
|
83 |
then AMG is applied to S again until S becomes empty |
84 |
|
85 |
*/ |
86 |
Paso_Solver_AMG* Paso_Solver_getAMG(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) { |
87 |
Paso_Solver_AMG* out=NULL; |
88 |
Paso_Pattern* temp1=NULL; |
89 |
Paso_Pattern* temp2=NULL; |
90 |
bool_t verbose=options->verbose; |
91 |
dim_t n=A_p->numRows; |
92 |
dim_t n_block=A_p->row_block_size; |
93 |
index_t* mis_marker=NULL; |
94 |
index_t* counter=NULL; |
95 |
index_t iPtr,*index, *where_p, iPtr_s; |
96 |
dim_t i,k,j; |
97 |
Paso_SparseMatrix * schur=NULL; |
98 |
Paso_SparseMatrix * schur_withFillIn=NULL; |
99 |
double S=0; |
100 |
|
101 |
/*Make sure we have block sizes 1*/ |
102 |
if (A_p->col_block_size>1) { |
103 |
Paso_setError(TYPE_ERROR,"Paso_Solver_getAMG: AMG requires column block size 1."); |
104 |
return NULL; |
105 |
} |
106 |
if (A_p->row_block_size>1) { |
107 |
Paso_setError(TYPE_ERROR,"Paso_Solver_getAMG: AMG requires row block size 1."); |
108 |
return NULL; |
109 |
} |
110 |
out=MEMALLOC(1,Paso_Solver_AMG); |
111 |
/* identify independend set of rows/columns */ |
112 |
mis_marker=TMPMEMALLOC(n,index_t); |
113 |
counter=TMPMEMALLOC(n,index_t); |
114 |
if ( !( Paso_checkPtr(mis_marker) || Paso_checkPtr(counter) || Paso_checkPtr(out)) ) { |
115 |
out->AMG_of_Schur=NULL; |
116 |
out->inv_A_FF=NULL; |
117 |
out->A_FF_pivot=NULL; |
118 |
out->A_FC=NULL; |
119 |
out->A_CF=NULL; |
120 |
out->rows_in_F=NULL; |
121 |
out->rows_in_C=NULL; |
122 |
out->mask_F=NULL; |
123 |
out->mask_C=NULL; |
124 |
out->x_F=NULL; |
125 |
out->b_F=NULL; |
126 |
out->x_C=NULL; |
127 |
out->b_C=NULL; |
128 |
out->GS=NULL; |
129 |
out->A=Paso_SparseMatrix_getReference(A_p); |
130 |
out->GS=NULL; |
131 |
out->solver=NULL; |
132 |
/*out->GS=Paso_Solver_getGS(A_p,verbose);*/ |
133 |
out->level=level; |
134 |
|
135 |
if (level==0 || n<=options->min_coarse_matrix_size) { |
136 |
out->coarsest_level=TRUE; |
137 |
#ifdef MKL |
138 |
#else |
139 |
#ifdef UMFPACK |
140 |
#else |
141 |
out->GS=Paso_Solver_getJacobi(A_p); |
142 |
#endif |
143 |
#endif |
144 |
} else { |
145 |
out->coarsest_level=FALSE; |
146 |
out->GS=Paso_Solver_getJacobi(A_p); |
147 |
|
148 |
/* identify independend set of rows/columns */ |
149 |
#pragma omp parallel for private(i) schedule(static) |
150 |
for (i=0;i<n;++i) mis_marker[i]=-1; |
151 |
|
152 |
if (options->coarsening_method == PASO_YAIR_SHAPIRA_COARSENING) { |
153 |
Paso_Pattern_coup(A_p,mis_marker,options->coarsening_threshold); |
154 |
} |
155 |
else if (options->coarsening_method == PASO_RUGE_STUEBEN_COARSENING) { |
156 |
Paso_Pattern_RS(A_p,mis_marker,options->coarsening_threshold); |
157 |
} |
158 |
else if (options->coarsening_method == PASO_AGGREGATION_COARSENING) { |
159 |
Paso_Pattern_Aggregiation(A_p,mis_marker,options->coarsening_threshold); |
160 |
} |
161 |
else { |
162 |
/*Default coarseneing*/ |
163 |
Paso_Pattern_RS(A_p,mis_marker,options->coarsening_threshold); |
164 |
|
165 |
/*Paso_Pattern_Aggregiation(A_p,mis_marker,options->coarsening_threshold);*/ |
166 |
} |
167 |
|
168 |
if (Paso_noError()) { |
169 |
#pragma omp parallel for private(i) schedule(static) |
170 |
for (i = 0; i < n; ++i) counter[i]=mis_marker[i]; |
171 |
|
172 |
out->n=n; |
173 |
out->n_block=n_block; |
174 |
out->n_F=Paso_Util_cumsum(n,counter); |
175 |
out->mask_F=MEMALLOC(n,index_t); |
176 |
out->rows_in_F=MEMALLOC(out->n_F,index_t); |
177 |
out->inv_A_FF=MEMALLOC(n_block*n_block*out->n_F,double); |
178 |
out->A_FF_pivot=NULL; /* later use for block size>3 */ |
179 |
if (! (Paso_checkPtr(out->mask_F) || Paso_checkPtr(out->inv_A_FF) || Paso_checkPtr(out->rows_in_F) ) ) { |
180 |
/* creates an index for F from mask */ |
181 |
#pragma omp parallel for private(i) schedule(static) |
182 |
for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1; |
183 |
#pragma omp parallel for private(i) schedule(static) |
184 |
for (i = 0; i < n; ++i) { |
185 |
if (mis_marker[i]) { |
186 |
out->rows_in_F[counter[i]]=i; |
187 |
out->mask_F[i]=counter[i]; |
188 |
} else { |
189 |
out->mask_F[i]=-1; |
190 |
} |
191 |
} |
192 |
|
193 |
/* Compute row-sum for getting rs(A_FF)^-1*/ |
194 |
#pragma omp parallel for private(i,iPtr,j,S) schedule(static) |
195 |
for (i = 0; i < out->n_F; ++i) { |
196 |
S=0; |
197 |
/*printf("%d : %d -> ",i, out->rows_in_F[i]);*/ |
198 |
for (iPtr=A_p->pattern->ptr[out->rows_in_F[i]];iPtr<A_p->pattern->ptr[out->rows_in_F[i] + 1]; ++iPtr) { |
199 |
j=A_p->pattern->index[iPtr]; |
200 |
/*if (j==out->rows_in_F[i]) printf("diagonal %e",A_p->val[iPtr]);*/ |
201 |
if (mis_marker[j] && j==out->rows_in_F[i]) |
202 |
S=A_p->val[iPtr]; |
203 |
} |
204 |
/*printf("-> %e\n",S);*/ |
205 |
out->inv_A_FF[i]=1./S; |
206 |
} |
207 |
} |
208 |
} |
209 |
|
210 |
if ( Paso_noError()) { |
211 |
/* if there are no nodes in the coarse level there is no more work to do */ |
212 |
out->n_C=n-out->n_F; |
213 |
|
214 |
/*if (out->n_F>500) */ |
215 |
out->rows_in_C=MEMALLOC(out->n_C,index_t); |
216 |
out->mask_C=MEMALLOC(n,index_t); |
217 |
if (! (Paso_checkPtr(out->mask_C) || Paso_checkPtr(out->rows_in_C) ) ) { |
218 |
/* creates an index for C from mask */ |
219 |
#pragma omp parallel for private(i) schedule(static) |
220 |
for (i = 0; i < n; ++i) counter[i]=! mis_marker[i]; |
221 |
Paso_Util_cumsum(n,counter); |
222 |
#pragma omp parallel for private(i) schedule(static) |
223 |
for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1; |
224 |
#pragma omp parallel for private(i) schedule(static) |
225 |
for (i = 0; i < n; ++i) { |
226 |
if (! mis_marker[i]) { |
227 |
out->rows_in_C[counter[i]]=i; |
228 |
out->mask_C[i]=counter[i]; |
229 |
} else { |
230 |
out->mask_C[i]=-1; |
231 |
} |
232 |
} |
233 |
} |
234 |
} |
235 |
if ( Paso_noError()) { |
236 |
/* get A_CF block: */ |
237 |
out->A_CF=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_F,out->rows_in_C,out->mask_F); |
238 |
/* get A_FC block: */ |
239 |
out->A_FC=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C); |
240 |
/* get A_CC block: */ |
241 |
schur=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_C,out->rows_in_C,out->mask_C); |
242 |
|
243 |
} |
244 |
if ( Paso_noError()) { |
245 |
/*find the pattern of the schur complement with fill in*/ |
246 |
temp1=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,out->A_CF->pattern,out->A_FC->pattern); |
247 |
temp2=Paso_Pattern_binop(PATTERN_FORMAT_DEFAULT, schur->pattern, temp1); |
248 |
schur_withFillIn=Paso_SparseMatrix_alloc(A_p->type,temp2,1,1, TRUE); |
249 |
Paso_Pattern_free(temp1); |
250 |
Paso_Pattern_free(temp2); |
251 |
} |
252 |
if ( Paso_noError()) { |
253 |
/* copy values over*/ |
254 |
#pragma omp parallel for private(i,iPtr,j,iPtr_s,index,where_p) schedule(static) |
255 |
for (i = 0; i < schur_withFillIn->numRows; ++i) { |
256 |
for (iPtr=schur_withFillIn->pattern->ptr[i];iPtr<schur_withFillIn->pattern->ptr[i + 1]; ++iPtr) { |
257 |
j=schur_withFillIn->pattern->index[iPtr]; |
258 |
iPtr_s=schur->pattern->ptr[i]; |
259 |
schur_withFillIn->val[iPtr]=0.; |
260 |
index=&(schur->pattern->index[iPtr_s]); |
261 |
where_p=(index_t*)bsearch(&j, |
262 |
index, |
263 |
schur->pattern->ptr[i + 1]-schur->pattern->ptr[i], |
264 |
sizeof(index_t), |
265 |
Paso_comparIndex); |
266 |
if (where_p!=NULL) { |
267 |
schur_withFillIn->val[iPtr]=schur->val[iPtr_s+(index_t)(where_p-index)]; |
268 |
} |
269 |
} |
270 |
} |
271 |
Paso_Solver_updateIncompleteSchurComplement(schur_withFillIn,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC); |
272 |
out->AMG_of_Schur=Paso_Solver_getAMG(schur_withFillIn,level-1,options); |
273 |
} |
274 |
/* allocate work arrays for AMG application */ |
275 |
if (Paso_noError()) { |
276 |
out->x_F=MEMALLOC(n_block*out->n_F,double); |
277 |
out->b_F=MEMALLOC(n_block*out->n_F,double); |
278 |
out->x_C=MEMALLOC(n_block*out->n_C,double); |
279 |
out->b_C=MEMALLOC(n_block*out->n_C,double); |
280 |
|
281 |
if (! (Paso_checkPtr(out->x_F) || Paso_checkPtr(out->b_F) || Paso_checkPtr(out->x_C) || Paso_checkPtr(out->b_C) ) ) { |
282 |
#pragma omp parallel for private(i,k) schedule(static) |
283 |
for (i = 0; i < out->n_F; ++i) { |
284 |
for (k=0; k<n_block;++k) { |
285 |
out->x_F[i*n_block+k]=0.; |
286 |
out->b_F[i*n_block+k]=0.; |
287 |
} |
288 |
} |
289 |
#pragma omp parallel for private(i,k) schedule(static) |
290 |
for (i = 0; i < out->n_C; ++i) { |
291 |
for (k=0; k<n_block;++k) { |
292 |
out->x_C[i*n_block+k]=0.; |
293 |
out->b_C[i*n_block+k]=0.; |
294 |
} |
295 |
} |
296 |
} |
297 |
} |
298 |
} |
299 |
} |
300 |
TMPMEMFREE(mis_marker); |
301 |
TMPMEMFREE(counter); |
302 |
Paso_SparseMatrix_free(schur); |
303 |
Paso_SparseMatrix_free(schur_withFillIn); |
304 |
if (Paso_noError()) { |
305 |
if (verbose && level>0 && !out->coarsest_level) { |
306 |
printf("AMG: level: %d: %d unknowns eliminated. %d left.\n",level, out->n_F,out->n_C); |
307 |
} |
308 |
return out; |
309 |
} else { |
310 |
Paso_Solver_AMG_free(out); |
311 |
return NULL; |
312 |
} |
313 |
} |
314 |
|
315 |
/**************************************************************/ |
316 |
|
317 |
/* apply AMG precondition b-> x |
318 |
|
319 |
in fact it solves |
320 |
|
321 |
[ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FC ] [ x_F ] = [b_F] |
322 |
[ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C] |
323 |
|
324 |
in the form |
325 |
|
326 |
b->[b_F,b_C] |
327 |
x_F=invA_FF*b_F |
328 |
b_C=b_C-A_CF*x_F |
329 |
x_C=AMG(b_C) |
330 |
b_F=b_F-A_FC*x_C |
331 |
x_F=invA_FF*b_F |
332 |
x<-[x_F,x_C] |
333 |
|
334 |
should be called within a parallel region |
335 |
barrier synconization should be performed to make sure that the input vector available |
336 |
|
337 |
*/ |
338 |
|
339 |
void Paso_Solver_solveAMG(Paso_Solver_AMG * amg, double * x, double * b) { |
340 |
dim_t i; |
341 |
double time0=0; |
342 |
double *r=NULL, *x0=NULL; |
343 |
bool_t verbose=0; |
344 |
#ifdef MKL |
345 |
Paso_SparseMatrix *temp=NULL; |
346 |
#else |
347 |
#ifdef UMFPACK |
348 |
Paso_UMFPACK_Handler * ptr=NULL; |
349 |
#endif |
350 |
#endif |
351 |
r=MEMALLOC(amg->n,double); |
352 |
x0=MEMALLOC(amg->n,double); |
353 |
|
354 |
if (amg->coarsest_level) { |
355 |
|
356 |
time0=Paso_timer(); |
357 |
#ifdef MKL |
358 |
temp=Paso_SparseMatrix_alloc(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, amg->A->pattern,1,1, FALSE); |
359 |
#pragma omp parallel for private(i) schedule(static) |
360 |
for (i=0;i<amg->A->len;++i) { |
361 |
temp->val[i]=amg->A->val[i]; |
362 |
} |
363 |
Paso_MKL1(temp,x,b,0); |
364 |
Paso_SparseMatrix_free(temp); |
365 |
#else |
366 |
#ifdef UMFPACK |
367 |
ptr=(Paso_UMFPACK_Handler *)(amg->solver); |
368 |
Paso_UMFPACK1(&ptr,amg->A,x,b,0); |
369 |
amg->solver=(void*) ptr; |
370 |
#else |
371 |
Paso_Solver_solveJacobi(amg->GS,x,b); |
372 |
#endif |
373 |
#endif |
374 |
time0=Paso_timer()-time0; |
375 |
if (verbose) fprintf(stderr,"timing: DIRECT SOLVER: %e\n\n\n",time0); |
376 |
|
377 |
} else { |
378 |
/* presmoothing */ |
379 |
time0=Paso_timer(); |
380 |
Paso_Solver_solveJacobi(amg->GS,x,b); |
381 |
time0=Paso_timer()-time0; |
382 |
if (verbose) fprintf(stderr,"timing: Presmooting: %e\n",time0); |
383 |
/* end of presmoothing */ |
384 |
|
385 |
|
386 |
time0=Paso_timer(); |
387 |
#pragma omp parallel for private(i) schedule(static) |
388 |
for (i=0;i<amg->n;++i) r[i]=b[i]; |
389 |
|
390 |
/*r=b-Ax*/ |
391 |
Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r); |
392 |
|
393 |
/* b->[b_F,b_C] */ |
394 |
#pragma omp parallel for private(i) schedule(static) |
395 |
for (i=0;i<amg->n_F;++i) amg->b_F[i]=r[amg->rows_in_F[i]]; |
396 |
|
397 |
#pragma omp parallel for private(i) schedule(static) |
398 |
for (i=0;i<amg->n_C;++i) amg->b_C[i]=r[amg->rows_in_C[i]]; |
399 |
|
400 |
/* x_F=invA_FF*b_F */ |
401 |
Paso_Solver_applyBlockDiagonalMatrix(1,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,amg->x_F,amg->b_F); |
402 |
|
403 |
/* b_C=b_C-A_CF*x_F */ |
404 |
Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_CF,amg->x_F,1.,amg->b_C); |
405 |
|
406 |
time0=Paso_timer()-time0; |
407 |
if (verbose) fprintf(stderr,"timing: Before next level: %e\n",time0); |
408 |
|
409 |
/* x_C=AMG(b_C) */ |
410 |
Paso_Solver_solveAMG(amg->AMG_of_Schur,amg->x_C,amg->b_C); |
411 |
|
412 |
time0=Paso_timer(); |
413 |
/* b_F=b_F-A_FC*x_C */ |
414 |
Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_FC,amg->x_C,1.,amg->b_F); |
415 |
/* x_F=invA_FF*b_F */ |
416 |
Paso_Solver_applyBlockDiagonalMatrix(1,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,amg->x_F,amg->b_F); |
417 |
/* x<-[x_F,x_C] */ |
418 |
|
419 |
#pragma omp parallel for private(i) schedule(static) |
420 |
for (i=0;i<amg->n;++i) { |
421 |
if (amg->mask_C[i]>-1) { |
422 |
x[i]=amg->x_C[amg->mask_C[i]]; |
423 |
} else { |
424 |
x[i]=amg->x_F[amg->mask_F[i]]; |
425 |
} |
426 |
} |
427 |
|
428 |
time0=Paso_timer()-time0; |
429 |
if (verbose) fprintf(stderr,"timing: After next level: %e\n",time0); |
430 |
|
431 |
/*postsmoothing*/ |
432 |
time0=Paso_timer(); |
433 |
#pragma omp parallel for private(i) schedule(static) |
434 |
for (i=0;i<amg->n;++i) r[i]=b[i]; |
435 |
|
436 |
/*r=b-Ax */ |
437 |
Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r); |
438 |
Paso_Solver_solveJacobi(amg->GS,x0,r); |
439 |
|
440 |
#pragma omp parallel for private(i) schedule(static) |
441 |
for (i=0;i<amg->n;++i) x[i]+=x0[i]; |
442 |
|
443 |
time0=Paso_timer()-time0; |
444 |
if (verbose) fprintf(stderr,"timing: Postsmoothing: %e\n",time0); |
445 |
|
446 |
/*end of postsmoothing*/ |
447 |
|
448 |
} |
449 |
MEMFREE(r); |
450 |
MEMFREE(x0); |
451 |
return; |
452 |
} |