/[escript]/trunk/paso/src/Solver_AMG.c
ViewVC logotype

Annotation of /trunk/paso/src/Solver_AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2475 - (hide annotations)
Wed Jun 17 01:48:46 2009 UTC (10 years, 8 months ago) by artak
File MIME type: text/plain
File size: 16262 byte(s)
AMG now takes into account new SolverOptions class, namely one can select coarsening algorithms from python. Not all options are included into AMG, this will be implemented later on.
1 jfenwick 1851
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17 artak 2475 /* Paso: AMG preconditioner */
18 jfenwick 1851
19     /**************************************************************/
20    
21     /* Author: artak@access.edu.au */
22    
23     /**************************************************************/
24    
25     #include "Paso.h"
26     #include "Solver.h"
27 artak 2475 #include "Options.h"
28 jfenwick 1851 #include "PasoUtil.h"
29 artak 1931 #include "UMFPACK.h"
30 phornby 1917 #include "Pattern_coupling.h"
31 jfenwick 1851
32     /**************************************************************/
33    
34     /* free all memory used by AMG */
35    
36     void Paso_Solver_AMG_free(Paso_Solver_AMG * in) {
37     if (in!=NULL) {
38     Paso_Solver_AMG_free(in->AMG_of_Schur);
39 artak 2112 Paso_Solver_Jacobi_free(in->GS);
40 jfenwick 1851 MEMFREE(in->inv_A_FF);
41     MEMFREE(in->A_FF_pivot);
42     Paso_SparseMatrix_free(in->A_FC);
43     Paso_SparseMatrix_free(in->A_CF);
44     MEMFREE(in->rows_in_F);
45     MEMFREE(in->rows_in_C);
46     MEMFREE(in->mask_F);
47     MEMFREE(in->mask_C);
48     MEMFREE(in->x_F);
49     MEMFREE(in->b_F);
50     MEMFREE(in->x_C);
51     MEMFREE(in->b_C);
52     MEMFREE(in);
53     }
54     }
55    
56     /**************************************************************/
57    
58     /* constructs the block-block factorization of
59    
60     [ A_FF A_FC ]
61     A_p=
62     [ A_CF A_FF ]
63    
64     to
65    
66     [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
67     [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
68    
69    
70     where S=A_FF-ACF*invA_FF*A_FC within the shape of S
71    
72     then AMG is applied to S again until S becomes empty
73    
74     */
75 artak 2475 Paso_Solver_AMG* Paso_Solver_getAMG(Paso_SparseMatrix *A_p,bool_t verbose,dim_t level, double coarsening_threshold, dim_t coarsening_method) {
76 jfenwick 1851 Paso_Solver_AMG* out=NULL;
77     dim_t n=A_p->numRows;
78     dim_t n_block=A_p->row_block_size;
79     index_t* mis_marker=NULL;
80 artak 1881 index_t* counter=NULL;
81     index_t iPtr,*index, *where_p, iPtr_s;
82 artak 2107 dim_t i,k,j;
83 jfenwick 1851 Paso_SparseMatrix * schur=NULL;
84 artak 1881 Paso_SparseMatrix * schur_withFillIn=NULL;
85 artak 2381 double S=0;
86 artak 2475
87 artak 2439 /*Make sure we have block sizes 1*/
88     A_p->pattern->input_block_size=A_p->col_block_size;
89     A_p->pattern->output_block_size=A_p->row_block_size;
90     A_p->pattern->block_size=A_p->block_size;
91    
92 jfenwick 1851 /* identify independend set of rows/columns */
93     mis_marker=TMPMEMALLOC(n,index_t);
94     counter=TMPMEMALLOC(n,index_t);
95     out=MEMALLOC(1,Paso_Solver_AMG);
96     out->AMG_of_Schur=NULL;
97     out->inv_A_FF=NULL;
98     out->A_FF_pivot=NULL;
99     out->A_FC=NULL;
100     out->A_CF=NULL;
101     out->rows_in_F=NULL;
102     out->rows_in_C=NULL;
103     out->mask_F=NULL;
104     out->mask_C=NULL;
105     out->x_F=NULL;
106     out->b_F=NULL;
107     out->x_C=NULL;
108     out->b_C=NULL;
109 artak 2107 out->GS=NULL;
110 artak 1890 out->A=Paso_SparseMatrix_getReference(A_p);
111 artak 2112 /*out->GS=Paso_Solver_getGS(A_p,verbose);*/
112     out->GS=Paso_Solver_getJacobi(A_p);
113 artak 2107 /*out->GS->sweeps=2;*/
114 artak 1890 out->level=level;
115 artak 2442
116     if ( !(Paso_checkPtr(mis_marker) || Paso_checkPtr(out) || Paso_checkPtr(counter) || level==0 ) ) {
117 jfenwick 1851 /* identify independend set of rows/columns */
118     #pragma omp parallel for private(i) schedule(static)
119     for (i=0;i<n;++i) mis_marker[i]=-1;
120 artak 2475
121     /*Paso_Pattern_RS(A_p,mis_marker,coarsening_threshold);*/
122     /*fprintf(stderr,"\n Coarsening method == %d, %d \n",coarsening_method,PASO_RUGE_STUEBEN_COARSENING);*/
123     if (coarsening_method == PASO_YAIR_SHAPIRA_COARSENING) {
124     Paso_Pattern_coup(A_p,mis_marker,coarsening_threshold);
125     }
126     else if (coarsening_method == PASO_RUGE_STUEBEN_COARSENING) {
127     Paso_Pattern_RS(A_p,mis_marker,coarsening_threshold);
128     }
129     else if (coarsening_method == PASO_AGGREGATION_COARSENING) {
130     Paso_Pattern_Aggregiation(A_p,mis_marker,coarsening_threshold);
131     }
132     else {
133     /*Default coarseneing*/
134     Paso_Pattern_RS(A_p,mis_marker,coarsening_threshold);
135     }
136    
137     if (Paso_noError()) {
138 jfenwick 1851 #pragma omp parallel for private(i) schedule(static)
139     for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
140     out->n=n;
141     out->n_block=n_block;
142     out->n_F=Paso_Util_cumsum(n,counter);
143     out->mask_F=MEMALLOC(n,index_t);
144     out->rows_in_F=MEMALLOC(out->n_F,index_t);
145     out->inv_A_FF=MEMALLOC(n_block*n_block*out->n_F,double);
146     out->A_FF_pivot=NULL; /* later use for block size>3 */
147     if (! (Paso_checkPtr(out->mask_F) || Paso_checkPtr(out->inv_A_FF) || Paso_checkPtr(out->rows_in_F) ) ) {
148     /* creates an index for F from mask */
149 artak 2107 #pragma omp parallel for private(i) schedule(static)
150 jfenwick 1851 for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
151 artak 2107 #pragma omp parallel for private(i) schedule(static)
152 jfenwick 1851 for (i = 0; i < n; ++i) {
153     if (mis_marker[i]) {
154     out->rows_in_F[counter[i]]=i;
155     out->mask_F[i]=counter[i];
156     } else {
157     out->mask_F[i]=-1;
158     }
159     }
160 artak 2107
161     /* Compute row-sum for getting rs(A_FF)^-1*/
162 artak 2131 #pragma omp parallel for private(i,iPtr,j,S) schedule(static)
163 artak 1881 for (i = 0; i < out->n_F; ++i) {
164 artak 2107 S=0;
165 artak 1881 for (iPtr=A_p->pattern->ptr[out->rows_in_F[i]];iPtr<A_p->pattern->ptr[out->rows_in_F[i] + 1]; ++iPtr) {
166 artak 1975 j=A_p->pattern->index[iPtr];
167     if (mis_marker[j])
168 artak 2107 S+=A_p->val[iPtr];
169 artak 1881 }
170 artak 2442 out->inv_A_FF[i]=1./S;
171 artak 1881 }
172 jfenwick 1851
173     if( Paso_noError()) {
174     /* if there are no nodes in the coarse level there is no more work to do */
175     out->n_C=n-out->n_F;
176 artak 2381 if (level>0 && out->n_C>500) {
177 artak 2280 /*if (out->n_F>500) {*/
178 jfenwick 1851 out->rows_in_C=MEMALLOC(out->n_C,index_t);
179     out->mask_C=MEMALLOC(n,index_t);
180     if (! (Paso_checkPtr(out->mask_C) || Paso_checkPtr(out->rows_in_C) ) ) {
181     /* creates an index for C from mask */
182     #pragma omp parallel for private(i) schedule(static)
183     for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
184     Paso_Util_cumsum(n,counter);
185 artak 2107 #pragma omp parallel for private(i) schedule(static)
186 jfenwick 1851 for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
187 artak 2107 #pragma omp parallel for private(i) schedule(static)
188 jfenwick 1851 for (i = 0; i < n; ++i) {
189     if (! mis_marker[i]) {
190     out->rows_in_C[counter[i]]=i;
191     out->mask_C[i]=counter[i];
192     } else {
193     out->mask_C[i]=-1;
194     }
195     }
196     /* get A_CF block: */
197     out->A_CF=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_F,out->rows_in_C,out->mask_F);
198     if (Paso_noError()) {
199     /* get A_FC block: */
200     out->A_FC=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C);
201 artak 1881 /* get A_CC block: */
202 jfenwick 1851 if (Paso_noError()) {
203     schur=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_C,out->rows_in_C,out->mask_C);
204 artak 2107 /*find the pattern of the schur complement with fill in*/
205 artak 1881 schur_withFillIn=Paso_SparseMatrix_alloc(A_p->type,Paso_Pattern_binop(PATTERN_FORMAT_DEFAULT, schur->pattern, Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,out->A_CF->pattern,out->A_FC->pattern)),1,1);
206 artak 2475
207 artak 1881 /* copy values over*/
208 artak 2107 #pragma omp parallel for private(i,iPtr,j,iPtr_s,index,where_p) schedule(static)
209 artak 1881 for (i = 0; i < schur_withFillIn->numRows; ++i) {
210     for (iPtr=schur_withFillIn->pattern->ptr[i];iPtr<schur_withFillIn->pattern->ptr[i + 1]; ++iPtr) {
211     j=schur_withFillIn->pattern->index[iPtr];
212 artak 2107 iPtr_s=schur->pattern->ptr[i];
213 artak 2442 /*schur_withFillIn->val[iPtr]=0.;*/
214 artak 2107 index=&(schur->pattern->index[iPtr_s]);
215     where_p=(index_t*)bsearch(&j,
216     index,
217     schur->pattern->ptr[i + 1]-schur->pattern->ptr[i],
218     sizeof(index_t),
219     Paso_comparIndex);
220     if (where_p!=NULL) {
221     schur_withFillIn->val[iPtr]=schur->val[iPtr_s+(index_t)(where_p-index)];
222 artak 1881 }
223     }
224     }
225 artak 2107
226 jfenwick 1851 if (Paso_noError()) {
227 artak 1881 Paso_Solver_updateIncompleteSchurComplement(schur_withFillIn,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC);
228 artak 2475 out->AMG_of_Schur=Paso_Solver_getAMG(schur_withFillIn,verbose,level-1,coarsening_threshold,coarsening_method);
229 artak 2107 Paso_SparseMatrix_free(schur);
230 jfenwick 1851 }
231     /* allocate work arrays for AMG application */
232     if (Paso_noError()) {
233     out->x_F=MEMALLOC(n_block*out->n_F,double);
234     out->b_F=MEMALLOC(n_block*out->n_F,double);
235     out->x_C=MEMALLOC(n_block*out->n_C,double);
236     out->b_C=MEMALLOC(n_block*out->n_C,double);
237 artak 1890
238 jfenwick 1851 if (! (Paso_checkPtr(out->x_F) || Paso_checkPtr(out->b_F) || Paso_checkPtr(out->x_C) || Paso_checkPtr(out->b_C) ) ) {
239 artak 2107 #pragma omp parallel for private(i,k) schedule(static)
240     for (i = 0; i < out->n_F; ++i) {
241 jfenwick 1851 for (k=0; k<n_block;++k) {
242     out->x_F[i*n_block+k]=0.;
243     out->b_F[i*n_block+k]=0.;
244     }
245     }
246 artak 2107 #pragma omp parallel for private(i,k) schedule(static)
247 jfenwick 1851 for (i = 0; i < out->n_C; ++i) {
248     for (k=0; k<n_block;++k) {
249     out->x_C[i*n_block+k]=0.;
250     out->b_C[i*n_block+k]=0.;
251     }
252     }
253     }
254     }
255     }
256     }
257     }
258     }
259     }
260     }
261     }
262     }
263     TMPMEMFREE(mis_marker);
264     TMPMEMFREE(counter);
265     if (Paso_noError()) {
266 artak 2442 if (verbose && level>0) {
267     printf("AMG: %d unknowns eliminated. %d left.\n",out->n_F,out->n_C);
268 artak 2381 /*if (level>0 && out->n_F>500) {*/
269 artak 2280 /* if (out->n_F<500) {*/
270 artak 2381 /* printf("timing: AMG: MIS/reordering/elemination : %e/%e\n",time0,time1);
271 jfenwick 1851 } else {
272     printf("timing: AMG: MIS: %e\n",time2);
273 artak 2381 }*/
274 jfenwick 1851 }
275     return out;
276     } else {
277     Paso_Solver_AMG_free(out);
278     return NULL;
279     }
280     }
281    
282     /**************************************************************/
283    
284     /* apply AMG precondition b-> x
285    
286     in fact it solves
287    
288 artak 2381 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FC ] [ x_F ] = [b_F]
289 jfenwick 1851 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
290    
291     in the form
292    
293     b->[b_F,b_C]
294     x_F=invA_FF*b_F
295     b_C=b_C-A_CF*x_F
296     x_C=AMG(b_C)
297     b_F=b_F-A_FC*x_C
298     x_F=invA_FF*b_F
299     x<-[x_F,x_C]
300    
301     should be called within a parallel region
302     barrier synconization should be performed to make sure that the input vector available
303    
304     */
305    
306     void Paso_Solver_solveAMG(Paso_Solver_AMG * amg, double * x, double * b) {
307 artak 2107 dim_t i;
308 artak 1890 double *r=MEMALLOC(amg->n,double);
309 artak 1954 double *x0=MEMALLOC(amg->n,double);
310 artak 2381 /*double time0=0;*/
311 artak 2107
312 artak 2381 if (amg->level==0 || amg->n_C<=500) {
313 artak 2280 /*if (amg->n_F<=500) {*/
314 artak 2381 /*time0=Paso_timer();*/
315 artak 2107
316 artak 2475 /*Paso_Solver_solveJacobi(amg->GS,x,b);*/
317 artak 2107
318     /* #pragma omp parallel for private(i) schedule(static)
319     for (i=0;i<amg->n;++i) r[i]=b[i];
320     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
321     Paso_Solver_solveGS(amg->GS,x0,r);
322     #pragma omp parallel for private(i) schedule(static)
323     for (i=0;i<amg->n;++i) {
324     x[i]+=x0[i];
325     }
326     */
327 artak 2475 Paso_UMFPACK1(amg->A,x,b,0);
328 artak 2381
329 artak 2475 /*
330     time0=Paso_timer()-time0;
331     fprintf(stderr,"timing: DIRECT SOLVER: %e/\n",time0);
332     */
333 jfenwick 1851 } else {
334 artak 1902 /* presmoothing */
335 artak 2112 Paso_Solver_solveJacobi(amg->GS,x,b);
336 artak 2475
337     /* #pragma omp parallel for private(i) schedule(static)
338 artak 2107 for (i=0;i<amg->n;++i) r[i]=b[i];
339    
340     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
341 artak 2475 Paso_Solver_solveJacobi(amg->GS,x0,r);
342 artak 2107
343     #pragma omp parallel for private(i) schedule(static)
344     for (i=0;i<amg->n;++i) {
345     x[i]+=x0[i];
346     }
347 artak 2475 */
348 artak 1931 /* end of presmoothing */
349 artak 1939
350 artak 1890 #pragma omp parallel for private(i) schedule(static)
351     for (i=0;i<amg->n;++i) r[i]=b[i];
352    
353     /*r=b-Ax*/
354     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
355 artak 1954
356 jfenwick 1851 /* b->[b_F,b_C] */
357 artak 1954 #pragma omp parallel for private(i) schedule(static)
358     for (i=0;i<amg->n_F;++i) amg->b_F[i]=r[amg->rows_in_F[i]];
359    
360     #pragma omp parallel for private(i) schedule(static)
361     for (i=0;i<amg->n_C;++i) amg->b_C[i]=r[amg->rows_in_C[i]];
362 artak 1890
363 jfenwick 1851 /* x_F=invA_FF*b_F */
364 artak 1954 Paso_Solver_applyBlockDiagonalMatrix(1,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,amg->x_F,amg->b_F);
365 artak 1890
366 jfenwick 1851 /* b_C=b_C-A_CF*x_F */
367     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_CF,amg->x_F,1.,amg->b_C);
368 artak 1890
369 jfenwick 1851 /* x_C=AMG(b_C) */
370     Paso_Solver_solveAMG(amg->AMG_of_Schur,amg->x_C,amg->b_C);
371 artak 1890
372 jfenwick 1851 /* b_F=b_F-A_FC*x_C */
373     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_FC,amg->x_C,1.,amg->b_F);
374     /* x_F=invA_FF*b_F */
375 artak 1954 Paso_Solver_applyBlockDiagonalMatrix(1,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,amg->x_F,amg->b_F);
376 jfenwick 1851 /* x<-[x_F,x_C] */
377 artak 2113
378 artak 1954 #pragma omp parallel for private(i) schedule(static)
379     for (i=0;i<amg->n;++i) {
380     if (amg->mask_C[i]>-1) {
381 artak 2107 x[i]=amg->x_C[amg->mask_C[i]];
382 artak 1954 } else {
383 artak 2107 x[i]=amg->x_F[amg->mask_F[i]];
384 artak 1954 }
385 jfenwick 1851 }
386 artak 1954
387 artak 1931 /*postsmoothing*/
388 artak 2107
389 artak 1954 #pragma omp parallel for private(i) schedule(static)
390     for (i=0;i<amg->n;++i) r[i]=b[i];
391 artak 2107
392 artak 2442 /*r=b-Ax */
393 artak 1954 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
394 artak 2112 Paso_Solver_solveJacobi(amg->GS,x0,r);
395 artak 2107
396     #pragma omp parallel for private(i) schedule(static)
397     for (i=0;i<amg->n;++i) x[i]+=x0[i];
398    
399 artak 2475 /*
400     #pragma omp parallel for private(i) schedule(static)
401     for (i=0;i<amg->n;++i) r[i]=b[i];
402 artak 2107
403     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
404 artak 2442 Paso_Solver_solveJacobi(amg->GS,x0,r);
405 artak 1954
406     #pragma omp parallel for private(i) schedule(static)
407     for (i=0;i<amg->n;++i) {
408     x[i]+=x0[i];
409 artak 2475 }
410     */
411 artak 1931 /*end of postsmoothing*/
412    
413 jfenwick 1851 }
414 artak 1890 MEMFREE(r);
415 artak 1954 MEMFREE(x0);
416 artak 1931 return;
417 jfenwick 1851 }
418    
419     /*
420     * $Log$
421     *
422     */

  ViewVC Help
Powered by ViewVC 1.1.26