/[escript]/trunk/paso/src/Solver_AMG.c
ViewVC logotype

Annotation of /trunk/paso/src/Solver_AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2439 - (hide annotations)
Tue May 26 04:34:21 2009 UTC (10 years, 5 months ago) by artak
File MIME type: text/plain
File size: 15935 byte(s)
AMG now works with blocksize 3. Logical block sizes replaced with real block size before starting AMG. But this happens only when we use AMG as a preconditioner.
1 jfenwick 1851
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17     /* Paso: AMG preconditioner with reordering */
18    
19     /**************************************************************/
20    
21     /* Author: artak@access.edu.au */
22    
23     /**************************************************************/
24    
25     #include "Paso.h"
26     #include "Solver.h"
27     #include "PasoUtil.h"
28 artak 1931 #include "UMFPACK.h"
29 phornby 1917 #include "Pattern_coupling.h"
30 jfenwick 1851
31     /**************************************************************/
32    
33     /* free all memory used by AMG */
34    
35     void Paso_Solver_AMG_free(Paso_Solver_AMG * in) {
36     if (in!=NULL) {
37     Paso_Solver_AMG_free(in->AMG_of_Schur);
38 artak 2112 Paso_Solver_Jacobi_free(in->GS);
39 jfenwick 1851 MEMFREE(in->inv_A_FF);
40     MEMFREE(in->A_FF_pivot);
41     Paso_SparseMatrix_free(in->A_FC);
42     Paso_SparseMatrix_free(in->A_CF);
43     MEMFREE(in->rows_in_F);
44     MEMFREE(in->rows_in_C);
45     MEMFREE(in->mask_F);
46     MEMFREE(in->mask_C);
47     MEMFREE(in->x_F);
48     MEMFREE(in->b_F);
49     MEMFREE(in->x_C);
50     MEMFREE(in->b_C);
51     MEMFREE(in);
52     }
53     }
54    
55     /**************************************************************/
56    
57     /* constructs the block-block factorization of
58    
59     [ A_FF A_FC ]
60     A_p=
61     [ A_CF A_FF ]
62    
63     to
64    
65     [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
66     [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
67    
68    
69     where S=A_FF-ACF*invA_FF*A_FC within the shape of S
70    
71     then AMG is applied to S again until S becomes empty
72    
73     */
74 artak 2178 Paso_Solver_AMG* Paso_Solver_getAMG(Paso_SparseMatrix *A_p,bool_t verbose,dim_t level, double couplingParam) {
75 jfenwick 1851 Paso_Solver_AMG* out=NULL;
76     dim_t n=A_p->numRows;
77     dim_t n_block=A_p->row_block_size;
78     index_t* mis_marker=NULL;
79 artak 1881 index_t* counter=NULL;
80     index_t iPtr,*index, *where_p, iPtr_s;
81 artak 2107 dim_t i,k,j;
82 jfenwick 1851 Paso_SparseMatrix * schur=NULL;
83 artak 1881 Paso_SparseMatrix * schur_withFillIn=NULL;
84 artak 2381 double S=0;
85 artak 1890
86 artak 2439 /*Make sure we have block sizes 1*/
87     A_p->pattern->input_block_size=A_p->col_block_size;
88     A_p->pattern->output_block_size=A_p->row_block_size;
89     A_p->pattern->block_size=A_p->block_size;
90    
91 jfenwick 1851 /* identify independend set of rows/columns */
92     mis_marker=TMPMEMALLOC(n,index_t);
93     counter=TMPMEMALLOC(n,index_t);
94     out=MEMALLOC(1,Paso_Solver_AMG);
95     out->AMG_of_Schur=NULL;
96     out->inv_A_FF=NULL;
97     out->A_FF_pivot=NULL;
98     out->A_FC=NULL;
99     out->A_CF=NULL;
100     out->rows_in_F=NULL;
101     out->rows_in_C=NULL;
102     out->mask_F=NULL;
103     out->mask_C=NULL;
104     out->x_F=NULL;
105     out->b_F=NULL;
106     out->x_C=NULL;
107     out->b_C=NULL;
108 artak 2107 out->GS=NULL;
109 artak 1890 out->A=Paso_SparseMatrix_getReference(A_p);
110 artak 2112 /*out->GS=Paso_Solver_getGS(A_p,verbose);*/
111     out->GS=Paso_Solver_getJacobi(A_p);
112 artak 2107 /*out->GS->sweeps=2;*/
113 artak 1890 out->level=level;
114 artak 2439
115 jfenwick 1851 if ( !(Paso_checkPtr(mis_marker) || Paso_checkPtr(out) || Paso_checkPtr(counter) ) ) {
116     /* identify independend set of rows/columns */
117     #pragma omp parallel for private(i) schedule(static)
118     for (i=0;i<n;++i) mis_marker[i]=-1;
119 artak 2439 Paso_Pattern_coup(A_p,mis_marker,couplingParam);
120     /*Paso_Pattern_RS(A_p,mis_marker,couplingParam);*/
121 artak 2381 /*Paso_Pattern_Aggregiation(A_p,mis_marker,couplingParam);*/
122 jfenwick 1851 if (Paso_noError()) {
123     #pragma omp parallel for private(i) schedule(static)
124     for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
125     out->n=n;
126     out->n_block=n_block;
127     out->n_F=Paso_Util_cumsum(n,counter);
128     out->mask_F=MEMALLOC(n,index_t);
129     out->rows_in_F=MEMALLOC(out->n_F,index_t);
130     out->inv_A_FF=MEMALLOC(n_block*n_block*out->n_F,double);
131     out->A_FF_pivot=NULL; /* later use for block size>3 */
132     if (! (Paso_checkPtr(out->mask_F) || Paso_checkPtr(out->inv_A_FF) || Paso_checkPtr(out->rows_in_F) ) ) {
133     /* creates an index for F from mask */
134 artak 2107 #pragma omp parallel for private(i) schedule(static)
135 jfenwick 1851 for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
136 artak 2107 #pragma omp parallel for private(i) schedule(static)
137 jfenwick 1851 for (i = 0; i < n; ++i) {
138     if (mis_marker[i]) {
139     out->rows_in_F[counter[i]]=i;
140     out->mask_F[i]=counter[i];
141     } else {
142     out->mask_F[i]=-1;
143     }
144     }
145 artak 2107
146     /* Compute row-sum for getting rs(A_FF)^-1*/
147 artak 2131 #pragma omp parallel for private(i,iPtr,j,S) schedule(static)
148 artak 1881 for (i = 0; i < out->n_F; ++i) {
149 artak 2107 S=0;
150 artak 1881 for (iPtr=A_p->pattern->ptr[out->rows_in_F[i]];iPtr<A_p->pattern->ptr[out->rows_in_F[i] + 1]; ++iPtr) {
151 artak 1975 j=A_p->pattern->index[iPtr];
152     if (mis_marker[j])
153 artak 2107 S+=A_p->val[iPtr];
154 artak 1881 }
155 artak 2107 out->inv_A_FF[i]=1./S;
156 artak 1881 }
157 jfenwick 1851
158     if( Paso_noError()) {
159     /* if there are no nodes in the coarse level there is no more work to do */
160     out->n_C=n-out->n_F;
161 artak 2381 if (level>0 && out->n_C>500) {
162 artak 2280 /*if (out->n_F>500) {*/
163 jfenwick 1851 out->rows_in_C=MEMALLOC(out->n_C,index_t);
164     out->mask_C=MEMALLOC(n,index_t);
165     if (! (Paso_checkPtr(out->mask_C) || Paso_checkPtr(out->rows_in_C) ) ) {
166     /* creates an index for C from mask */
167     #pragma omp parallel for private(i) schedule(static)
168     for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
169     Paso_Util_cumsum(n,counter);
170 artak 2107 #pragma omp parallel for private(i) schedule(static)
171 jfenwick 1851 for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
172 artak 2107 #pragma omp parallel for private(i) schedule(static)
173 jfenwick 1851 for (i = 0; i < n; ++i) {
174     if (! mis_marker[i]) {
175     out->rows_in_C[counter[i]]=i;
176     out->mask_C[i]=counter[i];
177     } else {
178     out->mask_C[i]=-1;
179     }
180     }
181     /* get A_CF block: */
182     out->A_CF=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_F,out->rows_in_C,out->mask_F);
183     if (Paso_noError()) {
184     /* get A_FC block: */
185     out->A_FC=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C);
186 artak 1881 /* get A_CC block: */
187 jfenwick 1851 if (Paso_noError()) {
188     schur=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_C,out->rows_in_C,out->mask_C);
189 artak 2107 /*find the pattern of the schur complement with fill in*/
190 artak 2112
191 artak 1881 schur_withFillIn=Paso_SparseMatrix_alloc(A_p->type,Paso_Pattern_binop(PATTERN_FORMAT_DEFAULT, schur->pattern, Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,out->A_CF->pattern,out->A_FC->pattern)),1,1);
192 artak 2107
193 artak 2280 /*fprintf(stderr,"Sparsity of Schure: %dx%d LEN %d Percentage %f\n",schur_withFillIn->pattern->numOutput,schur_withFillIn->pattern->numInput,schur_withFillIn->len,schur_withFillIn->len/(1.*schur_withFillIn->pattern->numOutput*schur_withFillIn->pattern->numInput));*/
194 artak 2107
195 artak 1881 /* copy values over*/
196 artak 2107 #pragma omp parallel for private(i,iPtr,j,iPtr_s,index,where_p) schedule(static)
197 artak 1881 for (i = 0; i < schur_withFillIn->numRows; ++i) {
198     for (iPtr=schur_withFillIn->pattern->ptr[i];iPtr<schur_withFillIn->pattern->ptr[i + 1]; ++iPtr) {
199     j=schur_withFillIn->pattern->index[iPtr];
200 artak 2107 iPtr_s=schur->pattern->ptr[i];
201 artak 1881 schur_withFillIn->val[iPtr]=0.;
202 artak 2107 index=&(schur->pattern->index[iPtr_s]);
203     where_p=(index_t*)bsearch(&j,
204     index,
205     schur->pattern->ptr[i + 1]-schur->pattern->ptr[i],
206     sizeof(index_t),
207     Paso_comparIndex);
208     if (where_p!=NULL) {
209     schur_withFillIn->val[iPtr]=schur->val[iPtr_s+(index_t)(where_p-index)];
210 artak 1881 }
211     }
212     }
213 artak 2107
214 jfenwick 1851 if (Paso_noError()) {
215 artak 1881 Paso_Solver_updateIncompleteSchurComplement(schur_withFillIn,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC);
216 artak 2353 out->AMG_of_Schur=Paso_Solver_getAMG(schur_withFillIn,verbose,level-1,couplingParam);
217 artak 2107 Paso_SparseMatrix_free(schur);
218 jfenwick 1851 }
219     /* allocate work arrays for AMG application */
220     if (Paso_noError()) {
221     out->x_F=MEMALLOC(n_block*out->n_F,double);
222     out->b_F=MEMALLOC(n_block*out->n_F,double);
223     out->x_C=MEMALLOC(n_block*out->n_C,double);
224     out->b_C=MEMALLOC(n_block*out->n_C,double);
225 artak 1890
226 jfenwick 1851 if (! (Paso_checkPtr(out->x_F) || Paso_checkPtr(out->b_F) || Paso_checkPtr(out->x_C) || Paso_checkPtr(out->b_C) ) ) {
227 artak 2107 #pragma omp parallel for private(i,k) schedule(static)
228     for (i = 0; i < out->n_F; ++i) {
229 jfenwick 1851 for (k=0; k<n_block;++k) {
230     out->x_F[i*n_block+k]=0.;
231     out->b_F[i*n_block+k]=0.;
232     }
233     }
234 artak 2107 #pragma omp parallel for private(i,k) schedule(static)
235 jfenwick 1851 for (i = 0; i < out->n_C; ++i) {
236     for (k=0; k<n_block;++k) {
237     out->x_C[i*n_block+k]=0.;
238     out->b_C[i*n_block+k]=0.;
239     }
240     }
241     }
242     }
243     }
244     }
245     }
246     }
247     }
248     }
249     }
250     }
251     TMPMEMFREE(mis_marker);
252     TMPMEMFREE(counter);
253     if (Paso_noError()) {
254     if (verbose) {
255     printf("AMG: %d unknowns eliminated. %d left.\n",out->n_F,n-out->n_F);
256 artak 2381 /*if (level>0 && out->n_F>500) {*/
257 artak 2280 /* if (out->n_F<500) {*/
258 artak 2381 /* printf("timing: AMG: MIS/reordering/elemination : %e/%e\n",time0,time1);
259 jfenwick 1851 } else {
260     printf("timing: AMG: MIS: %e\n",time2);
261 artak 2381 }*/
262 jfenwick 1851 }
263     return out;
264     } else {
265     Paso_Solver_AMG_free(out);
266     return NULL;
267     }
268     }
269    
270     /**************************************************************/
271    
272     /* apply AMG precondition b-> x
273    
274     in fact it solves
275    
276 artak 2381 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FC ] [ x_F ] = [b_F]
277 jfenwick 1851 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
278    
279     in the form
280    
281     b->[b_F,b_C]
282     x_F=invA_FF*b_F
283     b_C=b_C-A_CF*x_F
284     x_C=AMG(b_C)
285     b_F=b_F-A_FC*x_C
286     x_F=invA_FF*b_F
287     x<-[x_F,x_C]
288    
289     should be called within a parallel region
290     barrier synconization should be performed to make sure that the input vector available
291    
292     */
293    
294     void Paso_Solver_solveAMG(Paso_Solver_AMG * amg, double * x, double * b) {
295 artak 2107 dim_t i;
296 artak 1890 double *r=MEMALLOC(amg->n,double);
297 artak 1954 double *x0=MEMALLOC(amg->n,double);
298 artak 2381 /*double time0=0;*/
299 artak 2107
300 artak 2381 if (amg->level==0 || amg->n_C<=500) {
301 artak 2280 /*if (amg->n_F<=500) {*/
302 artak 2381 /*time0=Paso_timer();*/
303 artak 2107
304 artak 2112 Paso_Solver_solveJacobi(amg->GS,x,b);
305 artak 2107
306     /* #pragma omp parallel for private(i) schedule(static)
307     for (i=0;i<amg->n;++i) r[i]=b[i];
308     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
309     Paso_Solver_solveGS(amg->GS,x0,r);
310     #pragma omp parallel for private(i) schedule(static)
311     for (i=0;i<amg->n;++i) {
312     x[i]+=x0[i];
313     }
314     */
315 artak 2112 /*Paso_UMFPACK1(amg->A,x,b,0);*/
316 artak 2381
317     /*time0=Paso_timer()-time0;
318     fprintf(stderr,"timing: DIRECT SOLVER: %e/\n",time0);*/
319 jfenwick 1851 } else {
320 artak 1902 /* presmoothing */
321 artak 2112 Paso_Solver_solveJacobi(amg->GS,x,b);
322 artak 2107 /*
323     #pragma omp parallel for private(i) schedule(static)
324     for (i=0;i<amg->n;++i) r[i]=b[i];
325    
326     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
327     Paso_Solver_solveGS(amg->GS,x0,r);
328    
329     #pragma omp parallel for private(i) schedule(static)
330     for (i=0;i<amg->n;++i) {
331     x[i]+=x0[i];
332     }
333     */
334 artak 1931 /* end of presmoothing */
335 artak 1939
336 artak 1890 #pragma omp parallel for private(i) schedule(static)
337     for (i=0;i<amg->n;++i) r[i]=b[i];
338    
339     /*r=b-Ax*/
340     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
341 artak 1954
342 jfenwick 1851 /* b->[b_F,b_C] */
343 artak 1954 #pragma omp parallel for private(i) schedule(static)
344     for (i=0;i<amg->n_F;++i) amg->b_F[i]=r[amg->rows_in_F[i]];
345    
346     #pragma omp parallel for private(i) schedule(static)
347     for (i=0;i<amg->n_C;++i) amg->b_C[i]=r[amg->rows_in_C[i]];
348 artak 1890
349 jfenwick 1851 /* x_F=invA_FF*b_F */
350 artak 1954 Paso_Solver_applyBlockDiagonalMatrix(1,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,amg->x_F,amg->b_F);
351 artak 1890
352 jfenwick 1851 /* b_C=b_C-A_CF*x_F */
353     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_CF,amg->x_F,1.,amg->b_C);
354 artak 1890
355 jfenwick 1851 /* x_C=AMG(b_C) */
356     Paso_Solver_solveAMG(amg->AMG_of_Schur,amg->x_C,amg->b_C);
357 artak 1890
358 jfenwick 1851 /* b_F=b_F-A_FC*x_C */
359     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_FC,amg->x_C,1.,amg->b_F);
360     /* x_F=invA_FF*b_F */
361 artak 1954 Paso_Solver_applyBlockDiagonalMatrix(1,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,amg->x_F,amg->b_F);
362 jfenwick 1851 /* x<-[x_F,x_C] */
363 artak 2113
364 artak 1954 #pragma omp parallel for private(i) schedule(static)
365     for (i=0;i<amg->n;++i) {
366     if (amg->mask_C[i]>-1) {
367 artak 2107 x[i]=amg->x_C[amg->mask_C[i]];
368 artak 1954 } else {
369 artak 2107 x[i]=amg->x_F[amg->mask_F[i]];
370 artak 1954 }
371 jfenwick 1851 }
372 artak 1954
373 artak 1931 /*postsmoothing*/
374 artak 2107
375 artak 1954 #pragma omp parallel for private(i) schedule(static)
376     for (i=0;i<amg->n;++i) r[i]=b[i];
377 artak 2107
378 artak 1954 /*r=b-Ax*/
379     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
380 artak 2112 Paso_Solver_solveJacobi(amg->GS,x0,r);
381 artak 2107
382     #pragma omp parallel for private(i) schedule(static)
383     for (i=0;i<amg->n;++i) x[i]+=x0[i];
384    
385    
386     /*#pragma omp parallel for private(i) schedule(static)
387     for (i=0;i<amg->n;++i) r[i]=b[i];
388    
389     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
390 artak 1954 Paso_Solver_solveGS(amg->GS,x0,r);
391    
392     #pragma omp parallel for private(i) schedule(static)
393     for (i=0;i<amg->n;++i) {
394     x[i]+=x0[i];
395     }
396 artak 2107 */
397 artak 1931 /*end of postsmoothing*/
398    
399 jfenwick 1851 }
400 artak 1890 MEMFREE(r);
401 artak 1954 MEMFREE(x0);
402 artak 1931 return;
403 jfenwick 1851 }
404    
405     /*
406     * $Log$
407     *
408     */

  ViewVC Help
Powered by ViewVC 1.1.26