/[escript]/trunk/paso/src/Solver_AMG.c
ViewVC logotype

Annotation of /trunk/paso/src/Solver_AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2520 - (hide annotations)
Mon Jul 6 03:13:11 2009 UTC (10 years, 7 months ago) by artak
File MIME type: text/plain
File size: 15793 byte(s)
AMG now takes as an input Paso_options for selecting coarsenening algorithm and treshold variable.
1 jfenwick 1851
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17 artak 2475 /* Paso: AMG preconditioner */
18 jfenwick 1851
19     /**************************************************************/
20    
21     /* Author: artak@access.edu.au */
22    
23     /**************************************************************/
24    
25     #include "Paso.h"
26     #include "Solver.h"
27 artak 2475 #include "Options.h"
28 jfenwick 1851 #include "PasoUtil.h"
29 artak 1931 #include "UMFPACK.h"
30 artak 2507 #include "MKL.h"
31     #include "SystemMatrix.h"
32 phornby 1917 #include "Pattern_coupling.h"
33 jfenwick 1851
34     /**************************************************************/
35    
36     /* free all memory used by AMG */
37    
38     void Paso_Solver_AMG_free(Paso_Solver_AMG * in) {
39     if (in!=NULL) {
40     Paso_Solver_AMG_free(in->AMG_of_Schur);
41 artak 2112 Paso_Solver_Jacobi_free(in->GS);
42 jfenwick 1851 MEMFREE(in->inv_A_FF);
43     MEMFREE(in->A_FF_pivot);
44     Paso_SparseMatrix_free(in->A_FC);
45     Paso_SparseMatrix_free(in->A_CF);
46     MEMFREE(in->rows_in_F);
47     MEMFREE(in->rows_in_C);
48     MEMFREE(in->mask_F);
49     MEMFREE(in->mask_C);
50     MEMFREE(in->x_F);
51     MEMFREE(in->b_F);
52     MEMFREE(in->x_C);
53     MEMFREE(in->b_C);
54     MEMFREE(in);
55     }
56     }
57    
58     /**************************************************************/
59    
60     /* constructs the block-block factorization of
61    
62     [ A_FF A_FC ]
63     A_p=
64     [ A_CF A_FF ]
65    
66     to
67    
68     [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
69     [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
70    
71    
72     where S=A_FF-ACF*invA_FF*A_FC within the shape of S
73    
74     then AMG is applied to S again until S becomes empty
75    
76     */
77 artak 2520 Paso_Solver_AMG* Paso_Solver_getAMG(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {
78 jfenwick 1851 Paso_Solver_AMG* out=NULL;
79 artak 2520 bool_t verbose=options->verbose;
80 jfenwick 1851 dim_t n=A_p->numRows;
81     dim_t n_block=A_p->row_block_size;
82     index_t* mis_marker=NULL;
83 artak 1881 index_t* counter=NULL;
84     index_t iPtr,*index, *where_p, iPtr_s;
85 artak 2107 dim_t i,k,j;
86 jfenwick 1851 Paso_SparseMatrix * schur=NULL;
87 artak 1881 Paso_SparseMatrix * schur_withFillIn=NULL;
88 artak 2381 double S=0;
89 artak 2475
90 artak 2439 /*Make sure we have block sizes 1*/
91     A_p->pattern->input_block_size=A_p->col_block_size;
92     A_p->pattern->output_block_size=A_p->row_block_size;
93     A_p->pattern->block_size=A_p->block_size;
94    
95 jfenwick 1851 /* identify independend set of rows/columns */
96     mis_marker=TMPMEMALLOC(n,index_t);
97     counter=TMPMEMALLOC(n,index_t);
98     out=MEMALLOC(1,Paso_Solver_AMG);
99     out->AMG_of_Schur=NULL;
100     out->inv_A_FF=NULL;
101     out->A_FF_pivot=NULL;
102     out->A_FC=NULL;
103     out->A_CF=NULL;
104     out->rows_in_F=NULL;
105     out->rows_in_C=NULL;
106     out->mask_F=NULL;
107     out->mask_C=NULL;
108     out->x_F=NULL;
109     out->b_F=NULL;
110     out->x_C=NULL;
111     out->b_C=NULL;
112 artak 2107 out->GS=NULL;
113 artak 1890 out->A=Paso_SparseMatrix_getReference(A_p);
114 artak 2112 /*out->GS=Paso_Solver_getGS(A_p,verbose);*/
115     out->GS=Paso_Solver_getJacobi(A_p);
116 artak 2107 /*out->GS->sweeps=2;*/
117 artak 1890 out->level=level;
118 artak 2442
119     if ( !(Paso_checkPtr(mis_marker) || Paso_checkPtr(out) || Paso_checkPtr(counter) || level==0 ) ) {
120 jfenwick 1851 /* identify independend set of rows/columns */
121     #pragma omp parallel for private(i) schedule(static)
122     for (i=0;i<n;++i) mis_marker[i]=-1;
123 artak 2475
124 artak 2520 if (options->coarsening_method == PASO_YAIR_SHAPIRA_COARSENING) {
125     Paso_Pattern_coup(A_p,mis_marker,options->coarsening_threshold);
126 artak 2475 }
127 artak 2520 else if (options->coarsening_method == PASO_RUGE_STUEBEN_COARSENING) {
128     Paso_Pattern_RS(A_p,mis_marker,options->coarsening_threshold);
129 artak 2475 }
130 artak 2520 else if (options->coarsening_method == PASO_AGGREGATION_COARSENING) {
131     Paso_Pattern_Aggregiation(A_p,mis_marker,options->coarsening_threshold);
132 artak 2475 }
133     else {
134     /*Default coarseneing*/
135 artak 2520 Paso_Pattern_RS(A_p,mis_marker,options->coarsening_threshold);
136 artak 2475 }
137 artak 2509
138 artak 2475 if (Paso_noError()) {
139 jfenwick 1851 #pragma omp parallel for private(i) schedule(static)
140     for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
141     out->n=n;
142     out->n_block=n_block;
143     out->n_F=Paso_Util_cumsum(n,counter);
144     out->mask_F=MEMALLOC(n,index_t);
145     out->rows_in_F=MEMALLOC(out->n_F,index_t);
146     out->inv_A_FF=MEMALLOC(n_block*n_block*out->n_F,double);
147     out->A_FF_pivot=NULL; /* later use for block size>3 */
148     if (! (Paso_checkPtr(out->mask_F) || Paso_checkPtr(out->inv_A_FF) || Paso_checkPtr(out->rows_in_F) ) ) {
149     /* creates an index for F from mask */
150 artak 2107 #pragma omp parallel for private(i) schedule(static)
151 jfenwick 1851 for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
152 artak 2107 #pragma omp parallel for private(i) schedule(static)
153 jfenwick 1851 for (i = 0; i < n; ++i) {
154     if (mis_marker[i]) {
155     out->rows_in_F[counter[i]]=i;
156     out->mask_F[i]=counter[i];
157     } else {
158     out->mask_F[i]=-1;
159     }
160     }
161 artak 2107
162     /* Compute row-sum for getting rs(A_FF)^-1*/
163 artak 2131 #pragma omp parallel for private(i,iPtr,j,S) schedule(static)
164 artak 1881 for (i = 0; i < out->n_F; ++i) {
165 artak 2107 S=0;
166 artak 1881 for (iPtr=A_p->pattern->ptr[out->rows_in_F[i]];iPtr<A_p->pattern->ptr[out->rows_in_F[i] + 1]; ++iPtr) {
167 artak 1975 j=A_p->pattern->index[iPtr];
168     if (mis_marker[j])
169 artak 2107 S+=A_p->val[iPtr];
170 artak 1881 }
171 artak 2442 out->inv_A_FF[i]=1./S;
172 artak 1881 }
173 jfenwick 1851
174     if( Paso_noError()) {
175     /* if there are no nodes in the coarse level there is no more work to do */
176     out->n_C=n-out->n_F;
177 artak 2381 if (level>0 && out->n_C>500) {
178 artak 2280 /*if (out->n_F>500) {*/
179 jfenwick 1851 out->rows_in_C=MEMALLOC(out->n_C,index_t);
180     out->mask_C=MEMALLOC(n,index_t);
181     if (! (Paso_checkPtr(out->mask_C) || Paso_checkPtr(out->rows_in_C) ) ) {
182     /* creates an index for C from mask */
183     #pragma omp parallel for private(i) schedule(static)
184     for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
185     Paso_Util_cumsum(n,counter);
186 artak 2107 #pragma omp parallel for private(i) schedule(static)
187 jfenwick 1851 for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
188 artak 2107 #pragma omp parallel for private(i) schedule(static)
189 jfenwick 1851 for (i = 0; i < n; ++i) {
190     if (! mis_marker[i]) {
191     out->rows_in_C[counter[i]]=i;
192     out->mask_C[i]=counter[i];
193     } else {
194     out->mask_C[i]=-1;
195     }
196     }
197     /* get A_CF block: */
198     out->A_CF=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_F,out->rows_in_C,out->mask_F);
199     if (Paso_noError()) {
200     /* get A_FC block: */
201     out->A_FC=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C);
202 artak 1881 /* get A_CC block: */
203 jfenwick 1851 if (Paso_noError()) {
204     schur=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_C,out->rows_in_C,out->mask_C);
205 artak 2107 /*find the pattern of the schur complement with fill in*/
206 artak 2507
207 artak 1881 schur_withFillIn=Paso_SparseMatrix_alloc(A_p->type,Paso_Pattern_binop(PATTERN_FORMAT_DEFAULT, schur->pattern, Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,out->A_CF->pattern,out->A_FC->pattern)),1,1);
208     /* copy values over*/
209 artak 2107 #pragma omp parallel for private(i,iPtr,j,iPtr_s,index,where_p) schedule(static)
210 artak 1881 for (i = 0; i < schur_withFillIn->numRows; ++i) {
211     for (iPtr=schur_withFillIn->pattern->ptr[i];iPtr<schur_withFillIn->pattern->ptr[i + 1]; ++iPtr) {
212     j=schur_withFillIn->pattern->index[iPtr];
213 artak 2107 iPtr_s=schur->pattern->ptr[i];
214 artak 2507 schur_withFillIn->val[iPtr]=0.;
215 artak 2107 index=&(schur->pattern->index[iPtr_s]);
216     where_p=(index_t*)bsearch(&j,
217     index,
218     schur->pattern->ptr[i + 1]-schur->pattern->ptr[i],
219     sizeof(index_t),
220     Paso_comparIndex);
221     if (where_p!=NULL) {
222     schur_withFillIn->val[iPtr]=schur->val[iPtr_s+(index_t)(where_p-index)];
223 artak 1881 }
224     }
225 artak 2509 }
226 jfenwick 1851 if (Paso_noError()) {
227 artak 1881 Paso_Solver_updateIncompleteSchurComplement(schur_withFillIn,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC);
228 artak 2520 out->AMG_of_Schur=Paso_Solver_getAMG(schur_withFillIn,level-1,options);
229 artak 2107 Paso_SparseMatrix_free(schur);
230 jfenwick 1851 }
231     /* allocate work arrays for AMG application */
232     if (Paso_noError()) {
233     out->x_F=MEMALLOC(n_block*out->n_F,double);
234     out->b_F=MEMALLOC(n_block*out->n_F,double);
235     out->x_C=MEMALLOC(n_block*out->n_C,double);
236     out->b_C=MEMALLOC(n_block*out->n_C,double);
237 artak 1890
238 jfenwick 1851 if (! (Paso_checkPtr(out->x_F) || Paso_checkPtr(out->b_F) || Paso_checkPtr(out->x_C) || Paso_checkPtr(out->b_C) ) ) {
239 artak 2107 #pragma omp parallel for private(i,k) schedule(static)
240     for (i = 0; i < out->n_F; ++i) {
241 jfenwick 1851 for (k=0; k<n_block;++k) {
242     out->x_F[i*n_block+k]=0.;
243     out->b_F[i*n_block+k]=0.;
244     }
245     }
246 artak 2107 #pragma omp parallel for private(i,k) schedule(static)
247 jfenwick 1851 for (i = 0; i < out->n_C; ++i) {
248     for (k=0; k<n_block;++k) {
249     out->x_C[i*n_block+k]=0.;
250     out->b_C[i*n_block+k]=0.;
251     }
252     }
253     }
254     }
255     }
256     }
257     }
258     }
259     }
260     }
261     }
262     }
263     TMPMEMFREE(mis_marker);
264     TMPMEMFREE(counter);
265     if (Paso_noError()) {
266 artak 2442 if (verbose && level>0) {
267     printf("AMG: %d unknowns eliminated. %d left.\n",out->n_F,out->n_C);
268 jfenwick 1851 }
269     return out;
270     } else {
271     Paso_Solver_AMG_free(out);
272     return NULL;
273     }
274     }
275    
276     /**************************************************************/
277    
278     /* apply AMG precondition b-> x
279    
280     in fact it solves
281    
282 artak 2381 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FC ] [ x_F ] = [b_F]
283 jfenwick 1851 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
284    
285     in the form
286    
287     b->[b_F,b_C]
288     x_F=invA_FF*b_F
289     b_C=b_C-A_CF*x_F
290     x_C=AMG(b_C)
291     b_F=b_F-A_FC*x_C
292     x_F=invA_FF*b_F
293     x<-[x_F,x_C]
294    
295     should be called within a parallel region
296     barrier synconization should be performed to make sure that the input vector available
297    
298     */
299    
300     void Paso_Solver_solveAMG(Paso_Solver_AMG * amg, double * x, double * b) {
301 artak 2107 dim_t i;
302 artak 1890 double *r=MEMALLOC(amg->n,double);
303 artak 1954 double *x0=MEMALLOC(amg->n,double);
304 artak 2507 double time0=0;
305 artak 2509 bool_t verbose=0;
306 artak 2107
307 artak 2507 #ifdef MKL
308     Paso_SparseMatrix *temp=NULL;
309     #endif
310    
311    
312 artak 2381 if (amg->level==0 || amg->n_C<=500) {
313 artak 2507
314     time0=Paso_timer();
315     #ifdef MKL
316     temp=Paso_SparseMatrix_alloc(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, amg->A->pattern,1,1);
317     #pragma omp parallel for private(i) schedule(static)
318     for (i=0;i<amg->A->len;++i) {
319     temp->val[i]=amg->A->val[i];
320     }
321     Paso_MKL1(temp,x,b,0);
322     Paso_SparseMatrix_free(temp);
323     #else
324     #ifdef UMFPACK
325     Paso_UMFPACK1(amg->A,x,b,0);
326     #else
327     Paso_Solver_solveJacobi(amg->GS,x,b);
328     #endif
329     #endif
330    
331 artak 2475 time0=Paso_timer()-time0;
332 artak 2507 if (verbose) fprintf(stderr,"timing: DIRECT SOLVER: %e\n\n\n",time0);
333    
334 jfenwick 1851 } else {
335 artak 1902 /* presmoothing */
336 artak 2507 time0=Paso_timer();
337 artak 2112 Paso_Solver_solveJacobi(amg->GS,x,b);
338 artak 2507 time0=Paso_timer()-time0;
339     if (verbose) fprintf(stderr,"timing: Presmooting: %e\n",time0);
340 artak 1931 /* end of presmoothing */
341 artak 1939
342 artak 2507
343     time0=Paso_timer();
344 artak 1890 #pragma omp parallel for private(i) schedule(static)
345     for (i=0;i<amg->n;++i) r[i]=b[i];
346    
347     /*r=b-Ax*/
348     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
349 artak 1954
350 jfenwick 1851 /* b->[b_F,b_C] */
351 artak 1954 #pragma omp parallel for private(i) schedule(static)
352     for (i=0;i<amg->n_F;++i) amg->b_F[i]=r[amg->rows_in_F[i]];
353    
354     #pragma omp parallel for private(i) schedule(static)
355     for (i=0;i<amg->n_C;++i) amg->b_C[i]=r[amg->rows_in_C[i]];
356 artak 1890
357 jfenwick 1851 /* x_F=invA_FF*b_F */
358 artak 1954 Paso_Solver_applyBlockDiagonalMatrix(1,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,amg->x_F,amg->b_F);
359 artak 1890
360 jfenwick 1851 /* b_C=b_C-A_CF*x_F */
361     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_CF,amg->x_F,1.,amg->b_C);
362 artak 1890
363 artak 2507 time0=Paso_timer()-time0;
364     if (verbose) fprintf(stderr,"timing: Before next level: %e\n",time0);
365    
366 jfenwick 1851 /* x_C=AMG(b_C) */
367     Paso_Solver_solveAMG(amg->AMG_of_Schur,amg->x_C,amg->b_C);
368 artak 1890
369 artak 2507 time0=Paso_timer();
370 jfenwick 1851 /* b_F=b_F-A_FC*x_C */
371     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_FC,amg->x_C,1.,amg->b_F);
372     /* x_F=invA_FF*b_F */
373 artak 1954 Paso_Solver_applyBlockDiagonalMatrix(1,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,amg->x_F,amg->b_F);
374 jfenwick 1851 /* x<-[x_F,x_C] */
375 artak 2113
376 artak 1954 #pragma omp parallel for private(i) schedule(static)
377     for (i=0;i<amg->n;++i) {
378     if (amg->mask_C[i]>-1) {
379 artak 2107 x[i]=amg->x_C[amg->mask_C[i]];
380 artak 1954 } else {
381 artak 2107 x[i]=amg->x_F[amg->mask_F[i]];
382 artak 1954 }
383 jfenwick 1851 }
384 artak 1954
385 artak 2507 time0=Paso_timer()-time0;
386     if (verbose) fprintf(stderr,"timing: After next level: %e\n",time0);
387    
388 artak 1931 /*postsmoothing*/
389 artak 2507 time0=Paso_timer();
390 artak 1954 #pragma omp parallel for private(i) schedule(static)
391     for (i=0;i<amg->n;++i) r[i]=b[i];
392 artak 2107
393 artak 2442 /*r=b-Ax */
394 artak 1954 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
395 artak 2112 Paso_Solver_solveJacobi(amg->GS,x0,r);
396 artak 2107
397     #pragma omp parallel for private(i) schedule(static)
398     for (i=0;i<amg->n;++i) x[i]+=x0[i];
399    
400 artak 2507 time0=Paso_timer()-time0;
401     if (verbose) fprintf(stderr,"timing: Postsmoothing: %e\n",time0);
402 artak 2509
403 artak 1931 /*end of postsmoothing*/
404    
405 jfenwick 1851 }
406 artak 1890 MEMFREE(r);
407 artak 1954 MEMFREE(x0);
408 artak 1931 return;
409 jfenwick 1851 }
410    
411     /*
412     * $Log$
413     *
414     */

  ViewVC Help
Powered by ViewVC 1.1.26