/[escript]/trunk/paso/src/Solver_AMG.c
ViewVC logotype

Annotation of /trunk/paso/src/Solver_AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1887 - (hide annotations)
Wed Oct 15 03:26:25 2008 UTC (11 years, 4 months ago) by ksteube
File MIME type: text/plain
File size: 16383 byte(s)
Fixed two typos that stopped the test suite from running.

Also, gcc 4.3.2 issued several warnings not seen before:
ignoring the return value of fscanf and using the wrong format
with printf.

1 jfenwick 1851
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17     /* Paso: AMG preconditioner with reordering */
18    
19     /**************************************************************/
20    
21     /* Author: artak@access.edu.au */
22    
23     /**************************************************************/
24    
25     #include "Paso.h"
26     #include "Solver.h"
27     #include "PasoUtil.h"
28    
29     /**************************************************************/
30    
31     /* free all memory used by AMG */
32    
33     void Paso_Solver_AMG_free(Paso_Solver_AMG * in) {
34     if (in!=NULL) {
35     Paso_Solver_AMG_free(in->AMG_of_Schur);
36     MEMFREE(in->inv_A_FF);
37     MEMFREE(in->A_FF_pivot);
38     Paso_SparseMatrix_free(in->A_FC);
39     Paso_SparseMatrix_free(in->A_CF);
40     MEMFREE(in->rows_in_F);
41     MEMFREE(in->rows_in_C);
42     MEMFREE(in->mask_F);
43     MEMFREE(in->mask_C);
44     MEMFREE(in->x_F);
45     MEMFREE(in->b_F);
46     MEMFREE(in->x_C);
47     MEMFREE(in->b_C);
48     MEMFREE(in);
49     }
50     }
51    
52     /**************************************************************/
53    
54     /* constructs the block-block factorization of
55    
56     [ A_FF A_FC ]
57     A_p=
58     [ A_CF A_FF ]
59    
60     to
61    
62     [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
63     [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
64    
65    
66     where S=A_FF-ACF*invA_FF*A_FC within the shape of S
67    
68     then AMG is applied to S again until S becomes empty
69    
70     */
71     Paso_Solver_AMG* Paso_Solver_getAMG(Paso_SparseMatrix *A_p,bool_t verbose) {
72     Paso_Solver_AMG* out=NULL;
73     dim_t n=A_p->numRows;
74     dim_t n_block=A_p->row_block_size;
75     index_t* mis_marker=NULL;
76 artak 1881 index_t* counter=NULL;
77     double *rs=NULL;
78     index_t iPtr,*index, *where_p, iPtr_s;
79     dim_t i,k,j,j0;
80 jfenwick 1851 Paso_SparseMatrix * schur=NULL;
81 artak 1881 Paso_SparseMatrix * schur_withFillIn=NULL;
82 ksteube 1887 double A11,A12,A13,A21,A22,A23,A31,A32,A33,D,time0,time1,time2;
83 artak 1881 schur_withFillIn=MEMALLOC(1,Paso_SparseMatrix);
84 jfenwick 1851
85    
86     /* identify independend set of rows/columns */
87     mis_marker=TMPMEMALLOC(n,index_t);
88     counter=TMPMEMALLOC(n,index_t);
89 artak 1881 rs=TMPMEMALLOC(n,double);
90 jfenwick 1851 out=MEMALLOC(1,Paso_Solver_AMG);
91     out->AMG_of_Schur=NULL;
92     out->inv_A_FF=NULL;
93     out->A_FF_pivot=NULL;
94     out->A_FC=NULL;
95     out->A_CF=NULL;
96     out->rows_in_F=NULL;
97     out->rows_in_C=NULL;
98     out->mask_F=NULL;
99     out->mask_C=NULL;
100     out->x_F=NULL;
101     out->b_F=NULL;
102     out->x_C=NULL;
103     out->b_C=NULL;
104 artak 1881
105     /* fprintf(stderr,"START OF MATRIX \n\n");
106     for (i = 0; i < A_p->numRows; ++i) {
107     for (iPtr=A_p->pattern->ptr[i];iPtr<A_p->pattern->ptr[i + 1]; ++iPtr) {
108     j=A_p->pattern->index[iPtr];
109     fprintf(stderr,"A[%d,%d]=%.2f ",i,j,A_p->val[iPtr]);
110     }
111     fprintf(stderr,"\n");
112     }
113     fprintf(stderr,"END OF MATRIX \n\n");
114     */
115 jfenwick 1851 if ( !(Paso_checkPtr(mis_marker) || Paso_checkPtr(out) || Paso_checkPtr(counter) ) ) {
116     /* identify independend set of rows/columns */
117     time0=Paso_timer();
118     #pragma omp parallel for private(i) schedule(static)
119     for (i=0;i<n;++i) mis_marker[i]=-1;
120 artak 1881 Paso_Pattern_coup(A_p,mis_marker);
121 jfenwick 1851 time2=Paso_timer()-time0;
122     if (Paso_noError()) {
123     #pragma omp parallel for private(i) schedule(static)
124     for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
125     out->n=n;
126     out->n_block=n_block;
127     out->n_F=Paso_Util_cumsum(n,counter);
128     out->mask_F=MEMALLOC(n,index_t);
129     out->rows_in_F=MEMALLOC(out->n_F,index_t);
130     out->inv_A_FF=MEMALLOC(n_block*n_block*out->n_F,double);
131     out->A_FF_pivot=NULL; /* later use for block size>3 */
132     if (! (Paso_checkPtr(out->mask_F) || Paso_checkPtr(out->inv_A_FF) || Paso_checkPtr(out->rows_in_F) ) ) {
133     #pragma omp parallel
134     {
135     /* creates an index for F from mask */
136     #pragma omp for private(i) schedule(static)
137     for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
138     #pragma omp for private(i) schedule(static)
139     for (i = 0; i < n; ++i) {
140     if (mis_marker[i]) {
141     out->rows_in_F[counter[i]]=i;
142     out->mask_F[i]=counter[i];
143     } else {
144     out->mask_F[i]=-1;
145     }
146     }
147 artak 1881 /* Compute row-sum for getting rs(A_FF)*/
148     #pragma omp for private(i,iPtr) schedule(static)
149     for (i = 0; i < out->n_F; ++i) {
150     rs[i]=0;
151     for (iPtr=A_p->pattern->ptr[out->rows_in_F[i]];iPtr<A_p->pattern->ptr[out->rows_in_F[i] + 1]; ++iPtr) {
152     rs[i]+=A_p->val[iPtr];
153     }
154     }
155    
156 jfenwick 1851 #pragma omp for private(i, where_p,iPtr,A11,A12,A13,A21,A22,A23,A31,A32,A33,D,index) schedule(static)
157     for (i = 0; i < out->n_F; i++) {
158     /* find main diagonal */
159     iPtr=A_p->pattern->ptr[out->rows_in_F[i]];
160     index=&(A_p->pattern->index[iPtr]);
161     where_p=(index_t*)bsearch(&out->rows_in_F[i],
162     index,
163     A_p->pattern->ptr[out->rows_in_F[i] + 1]-A_p->pattern->ptr[out->rows_in_F[i]],
164     sizeof(index_t),
165     Paso_comparIndex);
166     if (where_p==NULL) {
167     Paso_setError(VALUE_ERROR, "Paso_Solver_getAMG: main diagonal element missing.");
168     } else {
169     iPtr+=(index_t)(where_p-index);
170     /* get inverse of A_FF block: */
171 artak 1881 if (ABS(rs[i])>0.) {
172     out->inv_A_FF[i]=1./rs[i];
173     } else {
174 jfenwick 1851 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getAMG: Break-down in AMG decomposition: non-regular main diagonal block.");
175 artak 1881 }
176     }
177 jfenwick 1851 }
178     } /* end parallel region */
179    
180     if( Paso_noError()) {
181     /* if there are no nodes in the coarse level there is no more work to do */
182     out->n_C=n-out->n_F;
183     if (out->n_C>0) {
184     out->rows_in_C=MEMALLOC(out->n_C,index_t);
185     out->mask_C=MEMALLOC(n,index_t);
186     if (! (Paso_checkPtr(out->mask_C) || Paso_checkPtr(out->rows_in_C) ) ) {
187     /* creates an index for C from mask */
188     #pragma omp parallel for private(i) schedule(static)
189     for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
190     Paso_Util_cumsum(n,counter);
191     #pragma omp parallel
192     {
193     #pragma omp for private(i) schedule(static)
194     for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
195     #pragma omp for private(i) schedule(static)
196     for (i = 0; i < n; ++i) {
197     if (! mis_marker[i]) {
198     out->rows_in_C[counter[i]]=i;
199     out->mask_C[i]=counter[i];
200     } else {
201     out->mask_C[i]=-1;
202     }
203     }
204     } /* end parallel region */
205     /* get A_CF block: */
206     out->A_CF=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_F,out->rows_in_C,out->mask_F);
207     if (Paso_noError()) {
208     /* get A_FC block: */
209     out->A_FC=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C);
210 artak 1881 /* get A_CC block: */
211 jfenwick 1851 if (Paso_noError()) {
212     schur=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_C,out->rows_in_C,out->mask_C);
213 artak 1881
214     /*find the pattern of the schur complement with fill in*/
215     schur_withFillIn=Paso_SparseMatrix_alloc(A_p->type,Paso_Pattern_binop(PATTERN_FORMAT_DEFAULT, schur->pattern, Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,out->A_CF->pattern,out->A_FC->pattern)),1,1);
216    
217     /* copy values over*/
218     #pragma omp for private(i,iPtr,iPtr_s,j,j0) schedule(static)
219     for (i = 0; i < schur_withFillIn->numRows; ++i) {
220     for (iPtr=schur_withFillIn->pattern->ptr[i];iPtr<schur_withFillIn->pattern->ptr[i + 1]; ++iPtr) {
221     j=schur_withFillIn->pattern->index[iPtr];
222     schur_withFillIn->val[iPtr]=0.;
223     for (iPtr_s=schur->pattern->ptr[i];iPtr_s<schur->pattern->ptr[i + 1]; ++iPtr_s){
224     j0=schur->pattern->index[iPtr_s];
225     if (j==j0) {
226     schur_withFillIn->val[iPtr]=schur->val[iPtr_s];
227     break;
228     }
229     }
230     }
231     }
232    
233     /* for (i = 0; i < schur_withFillIn->numRows; ++i) {
234     for (iPtr=schur_withFillIn->pattern->ptr[i];iPtr<schur_withFillIn->pattern->ptr[i + 1]; ++iPtr) {
235     j=schur_withFillIn->pattern->index[iPtr];
236     fprintf(stderr,"A_CC[%d,%d]=%.2f ",i,j,schur_withFillIn->val[iPtr]);
237     }
238     fprintf(stderr,"\n");
239     }*/
240 jfenwick 1851 time0=Paso_timer()-time0;
241     if (Paso_noError()) {
242     time1=Paso_timer();
243     /* update A_CC block to get Schur complement and then apply AMG to it */
244 artak 1881 Paso_Solver_updateIncompleteSchurComplement(schur_withFillIn,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC);
245 jfenwick 1851 time1=Paso_timer()-time1;
246 artak 1881 out->AMG_of_Schur=Paso_Solver_getAMG(schur_withFillIn,verbose);
247    
248 jfenwick 1851 Paso_SparseMatrix_free(schur);
249 artak 1881 Paso_SparseMatrix_free(schur_withFillIn);
250 jfenwick 1851 }
251     /* allocate work arrays for AMG application */
252     if (Paso_noError()) {
253     out->x_F=MEMALLOC(n_block*out->n_F,double);
254     out->b_F=MEMALLOC(n_block*out->n_F,double);
255     out->x_C=MEMALLOC(n_block*out->n_C,double);
256     out->b_C=MEMALLOC(n_block*out->n_C,double);
257     if (! (Paso_checkPtr(out->x_F) || Paso_checkPtr(out->b_F) || Paso_checkPtr(out->x_C) || Paso_checkPtr(out->b_C) ) ) {
258     #pragma omp parallel
259     {
260     #pragma omp for private(i,k) schedule(static)
261     for (i = 0; i < out->n_F; ++i) {
262     for (k=0; k<n_block;++k) {
263     out->x_F[i*n_block+k]=0.;
264     out->b_F[i*n_block+k]=0.;
265     }
266     }
267     #pragma omp for private(i,k) schedule(static)
268     for (i = 0; i < out->n_C; ++i) {
269     for (k=0; k<n_block;++k) {
270     out->x_C[i*n_block+k]=0.;
271     out->b_C[i*n_block+k]=0.;
272     }
273     }
274     } /* end parallel region */
275     }
276     }
277     }
278     }
279     }
280     }
281     }
282     }
283     }
284     }
285     TMPMEMFREE(mis_marker);
286     TMPMEMFREE(counter);
287 artak 1881 TMPMEMFREE(rs);
288 jfenwick 1851 if (Paso_noError()) {
289     if (verbose) {
290     printf("AMG: %d unknowns eliminated. %d left.\n",out->n_F,n-out->n_F);
291     if (out->n_C>0) {
292     printf("timing: AMG: MIS/reordering/elemination : %e/%e/%e\n",time2,time0,time1);
293     } else {
294     printf("timing: AMG: MIS: %e\n",time2);
295     }
296     }
297     return out;
298     } else {
299     Paso_Solver_AMG_free(out);
300     return NULL;
301     }
302     }
303    
304     /**************************************************************/
305    
306     /* apply AMG precondition b-> x
307    
308     in fact it solves
309    
310     [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ] [ x_F ] = [b_F]
311     [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
312    
313     in the form
314    
315     b->[b_F,b_C]
316     x_F=invA_FF*b_F
317     b_C=b_C-A_CF*x_F
318     x_C=AMG(b_C)
319     b_F=b_F-A_FC*x_C
320     x_F=invA_FF*b_F
321     x<-[x_F,x_C]
322    
323     should be called within a parallel region
324     barrier synconization should be performed to make sure that the input vector available
325    
326     */
327    
328     void Paso_Solver_solveAMG(Paso_Solver_AMG * amg, double * x, double * b) {
329     dim_t i,k;
330     dim_t n_block=amg->n_block;
331 artak 1881
332 jfenwick 1851 if (amg->n_C==0) {
333     /* x=invA_FF*b */
334     Paso_Solver_applyBlockDiagonalMatrix(n_block,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,x,b);
335     } else {
336 artak 1881 /* presmoothing on (Shure, x, b, r) */
337 jfenwick 1851 /* b->[b_F,b_C] */
338     if (n_block==1) {
339     #pragma omp parallel for private(i) schedule(static)
340     for (i=0;i<amg->n_F;++i) amg->b_F[i]=b[amg->rows_in_F[i]];
341     #pragma omp parallel for private(i) schedule(static)
342     for (i=0;i<amg->n_C;++i) amg->b_C[i]=b[amg->rows_in_C[i]];
343     } else {
344     #pragma omp parallel for private(i,k) schedule(static)
345     for (i=0;i<amg->n_F;++i)
346     for (k=0;k<n_block;k++) amg->b_F[amg->n_block*i+k]=b[n_block*amg->rows_in_F[i]+k];
347     #pragma omp parallel for private(i,k) schedule(static)
348     for (i=0;i<amg->n_C;++i)
349     for (k=0;k<n_block;k++) amg->b_C[amg->n_block*i+k]=b[n_block*amg->rows_in_C[i]+k];
350     }
351     /* x_F=invA_FF*b_F */
352     Paso_Solver_applyBlockDiagonalMatrix(n_block,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,amg->x_F,amg->b_F);
353     /* b_C=b_C-A_CF*x_F */
354     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_CF,amg->x_F,1.,amg->b_C);
355     /* x_C=AMG(b_C) */
356     Paso_Solver_solveAMG(amg->AMG_of_Schur,amg->x_C,amg->b_C);
357     /* b_F=b_F-A_FC*x_C */
358     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_FC,amg->x_C,1.,amg->b_F);
359     /* x_F=invA_FF*b_F */
360     Paso_Solver_applyBlockDiagonalMatrix(n_block,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,amg->x_F,amg->b_F);
361     /* x<-[x_F,x_C] */
362     if (n_block==1) {
363     #pragma omp parallel for private(i) schedule(static)
364     for (i=0;i<amg->n;++i) {
365     if (amg->mask_C[i]>-1) {
366     x[i]=amg->x_C[amg->mask_C[i]];
367     } else {
368     x[i]=amg->x_F[amg->mask_F[i]];
369     }
370     }
371     } else {
372     #pragma omp parallel for private(i,k) schedule(static)
373     for (i=0;i<amg->n;++i) {
374     if (amg->mask_C[i]>-1) {
375     for (k=0;k<n_block;k++) x[n_block*i+k]=amg->x_C[n_block*amg->mask_C[i]+k];
376     } else {
377     for (k=0;k<n_block;k++) x[n_block*i+k]=amg->x_F[n_block*amg->mask_F[i]+k];
378     }
379     }
380     }
381     /* all done */
382     }
383     return;
384     }
385    
386     /*
387     * $Log$
388     *
389     */

  ViewVC Help
Powered by ViewVC 1.1.26