/[escript]/trunk/paso/src/Solver_AMG.c
ViewVC logotype

Annotation of /trunk/paso/src/Solver_AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2726 - (hide annotations)
Wed Oct 21 23:50:05 2009 UTC (11 years, 6 months ago) by artak
File MIME type: text/plain
File size: 19172 byte(s)
New diagonal dominancy based algorithm is added. The coarseneng strategy is changed.
1 jfenwick 1851
2     /*******************************************************
3     *
4 jfenwick 2548 * Copyright (c) 2003-2009 by University of Queensland
5 jfenwick 1851 * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17 artak 2475 /* Paso: AMG preconditioner */
18 jfenwick 1851
19     /**************************************************************/
20    
21 jfenwick 2625 /* Author: artak@uq.edu.au */
22 jfenwick 1851
23     /**************************************************************/
24    
25     #include "Paso.h"
26     #include "Solver.h"
27 artak 2475 #include "Options.h"
28 jfenwick 1851 #include "PasoUtil.h"
29 artak 1931 #include "UMFPACK.h"
30 artak 2507 #include "MKL.h"
31     #include "SystemMatrix.h"
32 phornby 1917 #include "Pattern_coupling.h"
33 jfenwick 1851
34     /**************************************************************/
35    
36     /* free all memory used by AMG */
37    
38 artak 2659 void Paso_Solver_AMG_System_free(Paso_Solver_AMG_System * in) {
39 artak 2662 dim_t i;
40 artak 2659 if (in!=NULL) {
41 artak 2662 for (i=0;i<in->block_size;++i) {
42     Paso_Solver_AMG_free(in->amgblock[i]);
43 artak 2670 Paso_SparseMatrix_free(in->block[i]);
44 artak 2662 }
45 artak 2659 MEMFREE(in);
46     }
47     }
48    
49    
50     /* free all memory used by AMG */
51    
52 jfenwick 1851 void Paso_Solver_AMG_free(Paso_Solver_AMG * in) {
53     if (in!=NULL) {
54 artak 2112 Paso_Solver_Jacobi_free(in->GS);
55 jfenwick 1851 MEMFREE(in->inv_A_FF);
56     MEMFREE(in->A_FF_pivot);
57     Paso_SparseMatrix_free(in->A_FC);
58     Paso_SparseMatrix_free(in->A_CF);
59 artak 2556 Paso_SparseMatrix_free(in->A);
60 artak 2711 if(in->coarsest_level==TRUE) {
61     #ifdef MKL
62     Paso_MKL_free1(in->AOffset1);
63     Paso_SparseMatrix_free(in->AOffset1);
64     #else
65     #ifdef UMFPACK
66     Paso_UMFPACK1_free((Paso_UMFPACK_Handler*)(in->solver));
67     #endif
68     #endif
69     }
70 jfenwick 1851 MEMFREE(in->rows_in_F);
71     MEMFREE(in->rows_in_C);
72     MEMFREE(in->mask_F);
73     MEMFREE(in->mask_C);
74     MEMFREE(in->x_F);
75     MEMFREE(in->b_F);
76     MEMFREE(in->x_C);
77     MEMFREE(in->b_C);
78 gross 2551 in->solver=NULL;
79 artak 2699 Paso_Solver_AMG_free(in->AMG_of_Schur);
80 gross 2551 MEMFREE(in->b_C);
81 jfenwick 1851 MEMFREE(in);
82     }
83     }
84    
85     /**************************************************************/
86    
87     /* constructs the block-block factorization of
88    
89     [ A_FF A_FC ]
90     A_p=
91     [ A_CF A_FF ]
92    
93     to
94    
95     [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
96     [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
97    
98    
99     where S=A_FF-ACF*invA_FF*A_FC within the shape of S
100    
101     then AMG is applied to S again until S becomes empty
102    
103     */
104 artak 2520 Paso_Solver_AMG* Paso_Solver_getAMG(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {
105 jfenwick 1851 Paso_Solver_AMG* out=NULL;
106 artak 2556 Paso_Pattern* temp1=NULL;
107     Paso_Pattern* temp2=NULL;
108 artak 2520 bool_t verbose=options->verbose;
109 jfenwick 1851 dim_t n=A_p->numRows;
110     dim_t n_block=A_p->row_block_size;
111     index_t* mis_marker=NULL;
112 artak 1881 index_t* counter=NULL;
113     index_t iPtr,*index, *where_p, iPtr_s;
114 artak 2686 dim_t i,j;
115 jfenwick 1851 Paso_SparseMatrix * schur=NULL;
116 artak 1881 Paso_SparseMatrix * schur_withFillIn=NULL;
117 artak 2381 double S=0;
118 artak 2475
119 artak 2726
120     /* char filename[8];
121     sprintf(filename,"AMGLevel%d",level);
122    
123     Paso_SparseMatrix_saveMM(A_p,filename);
124     */
125    
126 artak 2439 /*Make sure we have block sizes 1*/
127 gross 2550 if (A_p->col_block_size>1) {
128     Paso_setError(TYPE_ERROR,"Paso_Solver_getAMG: AMG requires column block size 1.");
129     return NULL;
130     }
131     if (A_p->row_block_size>1) {
132     Paso_setError(TYPE_ERROR,"Paso_Solver_getAMG: AMG requires row block size 1.");
133     return NULL;
134     }
135 gross 2551 out=MEMALLOC(1,Paso_Solver_AMG);
136 jfenwick 1851 /* identify independend set of rows/columns */
137     mis_marker=TMPMEMALLOC(n,index_t);
138     counter=TMPMEMALLOC(n,index_t);
139 gross 2551 if ( !( Paso_checkPtr(mis_marker) || Paso_checkPtr(counter) || Paso_checkPtr(out)) ) {
140     out->AMG_of_Schur=NULL;
141     out->inv_A_FF=NULL;
142     out->A_FF_pivot=NULL;
143     out->A_FC=NULL;
144     out->A_CF=NULL;
145     out->rows_in_F=NULL;
146     out->rows_in_C=NULL;
147     out->mask_F=NULL;
148     out->mask_C=NULL;
149     out->x_F=NULL;
150     out->b_F=NULL;
151     out->x_C=NULL;
152     out->b_C=NULL;
153     out->GS=NULL;
154     out->A=Paso_SparseMatrix_getReference(A_p);
155     out->GS=NULL;
156     out->solver=NULL;
157     /*out->GS=Paso_Solver_getGS(A_p,verbose);*/
158     out->level=level;
159 artak 2661 out->n=n;
160 artak 2726 out->n_F=n+1;
161 artak 2661 out->n_block=n_block;
162 artak 2556
163 gross 2551 if (level==0 || n<=options->min_coarse_matrix_size) {
164     out->coarsest_level=TRUE;
165 artak 2712 #ifdef MKL
166 artak 2711 out->AOffset1=Paso_SparseMatrix_alloc(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, out->A->pattern,1,1, FALSE);
167     #pragma omp parallel for private(i) schedule(static)
168     for (i=0;i<out->A->len;++i) {
169     out->AOffset1->val[i]=out->A->val[i];
170     }
171 artak 2712 #else
172     #ifdef UMFPACK
173     #else
174 gross 2551 out->GS=Paso_Solver_getJacobi(A_p);
175     #endif
176     #endif
177     } else {
178     out->coarsest_level=FALSE;
179     out->GS=Paso_Solver_getJacobi(A_p);
180 artak 2442
181 gross 2551 /* identify independend set of rows/columns */
182     #pragma omp parallel for private(i) schedule(static)
183     for (i=0;i<n;++i) mis_marker[i]=-1;
184    
185     if (options->coarsening_method == PASO_YAIR_SHAPIRA_COARSENING) {
186 artak 2652 Paso_Pattern_YS(A_p,mis_marker,options->coarsening_threshold);
187 gross 2551 }
188     else if (options->coarsening_method == PASO_RUGE_STUEBEN_COARSENING) {
189     Paso_Pattern_RS(A_p,mis_marker,options->coarsening_threshold);
190     }
191     else if (options->coarsening_method == PASO_AGGREGATION_COARSENING) {
192     Paso_Pattern_Aggregiation(A_p,mis_marker,options->coarsening_threshold);
193     }
194     else {
195     /*Default coarseneing*/
196 artak 2686 Paso_Pattern_RS(A_p,mis_marker,options->coarsening_threshold);
197 artak 2659 /*Paso_Pattern_YS(A_p,mis_marker,options->coarsening_threshold);*/
198 artak 2726 /*Paso_Pattern_greedy_diag(A_p,mis_marker,options->coarsening_threshold);*/
199     /*Paso_Pattern_Aggregiation(A_p,mis_marker,options->coarsening_threshold);*/
200 gross 2551 }
201 artak 2686
202     #pragma omp parallel for private(i) schedule(static)
203     for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
204 artak 2726
205 artak 2686 out->n_F=Paso_Util_cumsum(n,counter);
206    
207 artak 2726 if (out->n_F==0) {
208 artak 2686 out->coarsest_level=TRUE;
209 artak 2726 level=0;
210 artak 2686 if (verbose) {
211 artak 2726 /*printf("AMG coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/
212     printf("AMG coarsening does not eliminate any of the unknowns, switching to Jacobi preconditioner.\n");
213 artak 2686 }
214 artak 2726 }
215     else if (out->n_F==n) {
216     out->coarsest_level=TRUE;
217     level=0;
218     if (verbose) {
219     /*printf("AMG coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/
220     printf("AMG coarsening eliminates all of the unknowns, switching to Jacobi preconditioner.\n");
221    
222     }
223 artak 2686 } else {
224 artak 2475
225 artak 2686 if (Paso_noError()) {
226    
227     /*#pragma omp parallel for private(i) schedule(static)
228     for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
229     out->n_F=Paso_Util_cumsum(n,counter);
230     */
231     out->mask_F=MEMALLOC(n,index_t);
232     out->rows_in_F=MEMALLOC(out->n_F,index_t);
233     out->inv_A_FF=MEMALLOC(n_block*n_block*out->n_F,double);
234     out->A_FF_pivot=NULL; /* later use for block size>3 */
235     if (! (Paso_checkPtr(out->mask_F) || Paso_checkPtr(out->inv_A_FF) || Paso_checkPtr(out->rows_in_F) ) ) {
236     /* creates an index for F from mask */
237     #pragma omp parallel for private(i) schedule(static)
238     for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
239     #pragma omp parallel for private(i) schedule(static)
240     for (i = 0; i < n; ++i) {
241     if (mis_marker[i]) {
242     out->rows_in_F[counter[i]]=i;
243     out->mask_F[i]=counter[i];
244     } else {
245     out->mask_F[i]=-1;
246     }
247     }
248    
249     /* Compute row-sum for getting rs(A_FF)^-1*/
250     #pragma omp parallel for private(i,iPtr,j,S) schedule(static)
251     for (i = 0; i < out->n_F; ++i) {
252     S=0;
253     /*printf("[%d ]: [%d] -> ",i, out->rows_in_F[i]);*/
254     for (iPtr=A_p->pattern->ptr[out->rows_in_F[i]];iPtr<A_p->pattern->ptr[out->rows_in_F[i] + 1]; ++iPtr) {
255     j=A_p->pattern->index[iPtr];
256     /*if (j==out->rows_in_F[i]) printf("diagonal %e",A_p->val[iPtr]);*/
257     if (mis_marker[j])
258     S+=A_p->val[iPtr];
259     }
260     /*printf("-> %e \n",S);*/
261     out->inv_A_FF[i]=1./S;
262     }
263 jfenwick 1851 }
264     }
265 artak 2686
266     /*check whether coarsening process actually makes sense to continue.
267     if coarse matrix at least smaller by 30% then continue, otherwise we stop.*/
268     if ((out->n_F*100/n)<30) {
269     level=1;
270 artak 1881 }
271 gross 2551
272 artak 2686 if ( Paso_noError()) {
273     /* if there are no nodes in the coarse level there is no more work to do */
274     out->n_C=n-out->n_F;
275    
276     /*if (out->n_F>500) */
277     out->rows_in_C=MEMALLOC(out->n_C,index_t);
278     out->mask_C=MEMALLOC(n,index_t);
279     if (! (Paso_checkPtr(out->mask_C) || Paso_checkPtr(out->rows_in_C) ) ) {
280     /* creates an index for C from mask */
281     #pragma omp parallel for private(i) schedule(static)
282     for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
283     Paso_Util_cumsum(n,counter);
284     #pragma omp parallel for private(i) schedule(static)
285     for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
286     #pragma omp parallel for private(i) schedule(static)
287     for (i = 0; i < n; ++i) {
288     if (! mis_marker[i]) {
289     out->rows_in_C[counter[i]]=i;
290     out->mask_C[i]=counter[i];
291     } else {
292     out->mask_C[i]=-1;
293     }
294     }
295     }
296     }
297     if ( Paso_noError()) {
298     /* get A_CF block: */
299     out->A_CF=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_F,out->rows_in_C,out->mask_F);
300     /* get A_FC block: */
301     out->A_FC=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C);
302     /* get A_CC block: */
303     schur=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_C,out->rows_in_C,out->mask_C);
304    
305 gross 2551 }
306 artak 2686 if ( Paso_noError()) {
307     /*find the pattern of the schur complement with fill in*/
308     temp1=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,out->A_CF->pattern,out->A_FC->pattern);
309     temp2=Paso_Pattern_binop(PATTERN_FORMAT_DEFAULT, schur->pattern, temp1);
310     schur_withFillIn=Paso_SparseMatrix_alloc(A_p->type,temp2,1,1, TRUE);
311     Paso_Pattern_free(temp1);
312     Paso_Pattern_free(temp2);
313 gross 2551 }
314 artak 2686 if ( Paso_noError()) {
315     /* copy values over*/
316     #pragma omp parallel for private(i,iPtr,j,iPtr_s,index,where_p) schedule(static)
317     for (i = 0; i < schur_withFillIn->numRows; ++i) {
318     for (iPtr=schur_withFillIn->pattern->ptr[i];iPtr<schur_withFillIn->pattern->ptr[i + 1]; ++iPtr) {
319     j=schur_withFillIn->pattern->index[iPtr];
320     iPtr_s=schur->pattern->ptr[i];
321     index=&(schur->pattern->index[iPtr_s]);
322     where_p=(index_t*)bsearch(&j,
323     index,
324     schur->pattern->ptr[i + 1]-schur->pattern->ptr[i],
325     sizeof(index_t),
326     Paso_comparIndex);
327     if (where_p!=NULL) {
328     schur_withFillIn->val[iPtr]=schur->val[iPtr_s+(index_t)(where_p-index)];
329     }
330     }
331     }
332     Paso_Solver_updateIncompleteSchurComplement(schur_withFillIn,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC);
333     out->AMG_of_Schur=Paso_Solver_getAMG(schur_withFillIn,level-1,options);
334     }
335     /* allocate work arrays for AMG application */
336     if (Paso_noError()) {
337     out->x_F=MEMALLOC(n_block*out->n_F,double);
338     out->b_F=MEMALLOC(n_block*out->n_F,double);
339     out->x_C=MEMALLOC(n_block*out->n_C,double);
340     out->b_C=MEMALLOC(n_block*out->n_C,double);
341    
342     if (! (Paso_checkPtr(out->x_F) || Paso_checkPtr(out->b_F) || Paso_checkPtr(out->x_C) || Paso_checkPtr(out->b_C) ) ) {
343     #pragma omp parallel for private(i) schedule(static)
344     for (i = 0; i < out->n_F; ++i) {
345     out->x_F[i]=0.;
346     out->b_F[i]=0.;
347     }
348     #pragma omp parallel for private(i) schedule(static)
349     for (i = 0; i < out->n_C; ++i) {
350     out->x_C[i]=0.;
351     out->b_C[i]=0.;
352     }
353     }
354     }
355     Paso_SparseMatrix_free(schur);
356     Paso_SparseMatrix_free(schur_withFillIn);
357     }
358 jfenwick 1851 }
359     }
360     TMPMEMFREE(mis_marker);
361     TMPMEMFREE(counter);
362 artak 2686
363 jfenwick 1851 if (Paso_noError()) {
364 artak 2524 if (verbose && level>0 && !out->coarsest_level) {
365 gross 2551 printf("AMG: level: %d: %d unknowns eliminated. %d left.\n",level, out->n_F,out->n_C);
366 jfenwick 1851 }
367     return out;
368     } else {
369     Paso_Solver_AMG_free(out);
370     return NULL;
371     }
372     }
373    
374     /**************************************************************/
375    
376     /* apply AMG precondition b-> x
377    
378     in fact it solves
379    
380 artak 2381 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FC ] [ x_F ] = [b_F]
381 jfenwick 1851 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
382    
383     in the form
384    
385     b->[b_F,b_C]
386     x_F=invA_FF*b_F
387     b_C=b_C-A_CF*x_F
388     x_C=AMG(b_C)
389     b_F=b_F-A_FC*x_C
390     x_F=invA_FF*b_F
391     x<-[x_F,x_C]
392    
393     should be called within a parallel region
394     barrier synconization should be performed to make sure that the input vector available
395    
396     */
397    
398     void Paso_Solver_solveAMG(Paso_Solver_AMG * amg, double * x, double * b) {
399 artak 2107 dim_t i;
400 artak 2507 double time0=0;
401 gross 2551 double *r=NULL, *x0=NULL;
402 artak 2509 bool_t verbose=0;
403 artak 2699 #ifdef UMFPACK
404     Paso_UMFPACK_Handler * ptr=NULL;
405 artak 2507 #endif
406 gross 2551 r=MEMALLOC(amg->n,double);
407     x0=MEMALLOC(amg->n,double);
408 artak 2507
409 artak 2524 if (amg->coarsest_level) {
410 artak 2507
411     time0=Paso_timer();
412 artak 2699 /*If all unknown are eliminated then Jacobi is the best preconditioner*/
413 artak 2726 if (amg->n_F==0 || amg->n_F==amg->n) {
414 artak 2699 Paso_Solver_solveJacobi(amg->GS,x,b);
415     }
416     else {
417 artak 2712 #ifdef MKL
418     Paso_MKL1(amg->AOffset1,x,b,verbose);
419     #else
420     #ifdef UMFPACK
421 gross 2551 ptr=(Paso_UMFPACK_Handler *)(amg->solver);
422 artak 2652 Paso_UMFPACK1(&ptr,amg->A,x,b,verbose);
423 gross 2551 amg->solver=(void*) ptr;
424 artak 2712 #else
425     Paso_Solver_solveJacobi(amg->GS,x,b);
426 artak 2699 #endif
427 artak 2507 #endif
428 artak 2699 }
429 artak 2475 time0=Paso_timer()-time0;
430 artak 2652 if (verbose) fprintf(stderr,"timing: DIRECT SOLVER: %e\n",time0);
431 artak 2507
432 jfenwick 1851 } else {
433 artak 1902 /* presmoothing */
434 artak 2507 time0=Paso_timer();
435 artak 2112 Paso_Solver_solveJacobi(amg->GS,x,b);
436 artak 2507 time0=Paso_timer()-time0;
437     if (verbose) fprintf(stderr,"timing: Presmooting: %e\n",time0);
438 artak 1931 /* end of presmoothing */
439 artak 1939
440 artak 2507
441     time0=Paso_timer();
442 artak 1890 #pragma omp parallel for private(i) schedule(static)
443     for (i=0;i<amg->n;++i) r[i]=b[i];
444    
445     /*r=b-Ax*/
446     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
447 artak 1954
448 artak 2726 /* r->[b_F,b_C] */
449 artak 1954 #pragma omp parallel for private(i) schedule(static)
450     for (i=0;i<amg->n_F;++i) amg->b_F[i]=r[amg->rows_in_F[i]];
451    
452     #pragma omp parallel for private(i) schedule(static)
453     for (i=0;i<amg->n_C;++i) amg->b_C[i]=r[amg->rows_in_C[i]];
454 artak 1890
455 jfenwick 1851 /* x_F=invA_FF*b_F */
456 artak 1954 Paso_Solver_applyBlockDiagonalMatrix(1,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,amg->x_F,amg->b_F);
457 artak 1890
458 jfenwick 1851 /* b_C=b_C-A_CF*x_F */
459     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_CF,amg->x_F,1.,amg->b_C);
460 artak 1890
461 artak 2507 time0=Paso_timer()-time0;
462     if (verbose) fprintf(stderr,"timing: Before next level: %e\n",time0);
463    
464 jfenwick 1851 /* x_C=AMG(b_C) */
465     Paso_Solver_solveAMG(amg->AMG_of_Schur,amg->x_C,amg->b_C);
466 artak 1890
467 artak 2507 time0=Paso_timer();
468 jfenwick 1851 /* b_F=b_F-A_FC*x_C */
469     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_FC,amg->x_C,1.,amg->b_F);
470     /* x_F=invA_FF*b_F */
471 artak 1954 Paso_Solver_applyBlockDiagonalMatrix(1,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,amg->x_F,amg->b_F);
472 artak 2726
473 jfenwick 1851 /* x<-[x_F,x_C] */
474 artak 1954 #pragma omp parallel for private(i) schedule(static)
475     for (i=0;i<amg->n;++i) {
476     if (amg->mask_C[i]>-1) {
477 artak 2107 x[i]=amg->x_C[amg->mask_C[i]];
478 artak 1954 } else {
479 artak 2107 x[i]=amg->x_F[amg->mask_F[i]];
480 artak 1954 }
481 jfenwick 1851 }
482 artak 1954
483 artak 2507 time0=Paso_timer()-time0;
484     if (verbose) fprintf(stderr,"timing: After next level: %e\n",time0);
485    
486 artak 1931 /*postsmoothing*/
487 artak 2507 time0=Paso_timer();
488 artak 1954 #pragma omp parallel for private(i) schedule(static)
489     for (i=0;i<amg->n;++i) r[i]=b[i];
490 artak 2107
491 artak 2442 /*r=b-Ax */
492 artak 1954 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
493 artak 2112 Paso_Solver_solveJacobi(amg->GS,x0,r);
494 artak 2107
495     #pragma omp parallel for private(i) schedule(static)
496     for (i=0;i<amg->n;++i) x[i]+=x0[i];
497    
498 artak 2507 time0=Paso_timer()-time0;
499     if (verbose) fprintf(stderr,"timing: Postsmoothing: %e\n",time0);
500 artak 2509
501 artak 1931 /*end of postsmoothing*/
502    
503 jfenwick 1851 }
504 artak 1890 MEMFREE(r);
505 artak 1954 MEMFREE(x0);
506 gross 2551 return;
507 jfenwick 1851 }

  ViewVC Help
Powered by ViewVC 1.1.26