/[escript]/trunk/paso/src/Solver_ILU.c
ViewVC logotype

Annotation of /trunk/paso/src/Solver_ILU.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 415 - (hide annotations)
Wed Jan 4 05:37:33 2006 UTC (13 years, 8 months ago) by gross
Original Path: trunk/paso/src/Solvers/Solver_ILU.c
File MIME type: text/plain
File size: 15618 byte(s)
a better way of representing the matrix format type is introduced. this is needed for the Paradiso and UMFPACK interface
1 jgs 150 /* $Id$ */
2    
3     /**************************************************************/
4    
5     /* Paso: ILU preconditioner with reordering */
6    
7     /**************************************************************/
8    
9     /* Copyrights by ACcESS Australia 2003,2004,2005 */
10     /* Author: gross@access.edu.au */
11    
12     /**************************************************************/
13    
14     #include "Paso.h"
15     #include "Solver.h"
16     #include "Util.h"
17    
18     /**************************************************************/
19    
20     /* free all memory used by ILU */
21    
22     void Paso_Solver_ILU_free(Paso_Solver_ILU * in) {
23     if (in!=NULL) {
24     Paso_Solver_ILU_free(in->ILU_of_Schur);
25     MEMFREE(in->inv_A_FF);
26     MEMFREE(in->A_FF_pivot);
27     Paso_SystemMatrix_dealloc(in->A_FC);
28     Paso_SystemMatrix_dealloc(in->A_CF);
29     MEMFREE(in->rows_in_F);
30     MEMFREE(in->rows_in_C);
31     MEMFREE(in->mask_F);
32     MEMFREE(in->mask_C);
33     MEMFREE(in->x_F);
34     MEMFREE(in->b_F);
35     MEMFREE(in->x_C);
36     MEMFREE(in->b_C);
37     MEMFREE(in);
38     }
39     }
40    
41     /**************************************************************/
42    
43     /* constructs the block-block factorization of
44    
45     [ A_FF A_FC ]
46     A_p=
47     [ A_CF A_FF ]
48    
49     to
50    
51     [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
52     [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
53    
54    
55     where S=A_FF-ACF*invA_FF*A_FC within the shape of S
56    
57     then ILU is applied to S again until S becomes empty
58    
59     */
60     Paso_Solver_ILU* Paso_Solver_getILU(Paso_SystemMatrix * A_p,bool_t verbose) {
61     Paso_Solver_ILU* out=NULL;
62     dim_t n=A_p->num_rows;
63     dim_t n_block=A_p->row_block_size;
64     index_t* mis_marker=NULL;
65     index_t* counter=NULL;
66     index_t iPtr,*index, *where_p;
67     dim_t i,k;
68     Paso_SystemMatrix * schur=NULL;
69     double A11,A12,A13,A21,A22,A23,A31,A32,A33,D,time0,time1,time2;
70    
71    
72     /* identify independend set of rows/columns */
73     mis_marker=TMPMEMALLOC(n,index_t);
74     counter=TMPMEMALLOC(n,index_t);
75     out=MEMALLOC(1,Paso_Solver_ILU);
76     out->ILU_of_Schur=NULL;
77     out->inv_A_FF=NULL;
78     out->A_FF_pivot=NULL;
79     out->A_FC=NULL;
80     out->A_CF=NULL;
81     out->rows_in_F=NULL;
82     out->rows_in_C=NULL;
83     out->mask_F=NULL;
84     out->mask_C=NULL;
85     out->x_F=NULL;
86     out->b_F=NULL;
87     out->x_C=NULL;
88     out->b_C=NULL;
89    
90     if ( !(Paso_checkPtr(mis_marker) || Paso_checkPtr(out) || Paso_checkPtr(counter) ) ) {
91     /* identify independend set of rows/columns */
92     time0=Paso_timer();
93     Paso_SystemMatrixPattern_mis(A_p->pattern,mis_marker);
94     time2=Paso_timer()-time0;
95     if (Paso_noError()) {
96     #pragma omp parallel for private(i) schedule(static)
97     for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
98     out->n=n;
99     out->n_block=n_block;
100     out->n_F=Paso_Util_cumsum(n,counter);
101     out->mask_F=MEMALLOC(n,index_t);
102     out->rows_in_F=MEMALLOC(out->n_F,index_t);
103     out->inv_A_FF=MEMALLOC(n_block*n_block*out->n_F,double);
104     out->A_FF_pivot=NULL; /* later use for block size>3 */
105     if (! (Paso_checkPtr(out->mask_F) || Paso_checkPtr(out->inv_A_FF) || Paso_checkPtr(out->rows_in_F) ) ) {
106     #pragma omp parallel
107     {
108     /* creates an index for F from mask */
109     #pragma omp for private(i) schedule(static)
110     for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
111     #pragma omp for private(i) schedule(static)
112     for (i = 0; i < n; ++i) {
113     if (mis_marker[i]) {
114     out->rows_in_F[counter[i]]=i;
115     out->mask_F[i]=counter[i];
116     } else {
117     out->mask_F[i]=-1;
118     }
119     }
120     #pragma omp for private(i, where_p,iPtr,A11,A12,A13,A21,A22,A23,A31,A32,A33,D,index) schedule(static)
121     for (i = 0; i < out->n_F; i++) {
122     /* find main diagonal */
123     iPtr=A_p->pattern->ptr[out->rows_in_F[i]];
124     index=&(A_p->pattern->index[iPtr]);
125     where_p=(index_t*)bsearch(&out->rows_in_F[i],
126     index,
127     A_p->pattern->ptr[out->rows_in_F[i] + 1]-A_p->pattern->ptr[out->rows_in_F[i]],
128     sizeof(index_t),
129     Paso_comparIndex);
130     if (where_p==NULL) {
131     Paso_setError(VALUE_ERROR, "__FILE__: main diagonal element missing.");
132     } else {
133     iPtr+=(index_t)(where_p-index);
134     /* get inverse of A_FF block: */
135     if (n_block==1) {
136     if (ABS(A_p->val[iPtr])>0.) {
137     out->inv_A_FF[i]=1./A_p->val[iPtr];
138     } else {
139     Paso_setError(ZERO_DIVISION_ERROR, "__FILE__: Break-down in ILU decomposition: non-regular main diagonal block.");
140     }
141     } else if (n_block==2) {
142     A11=A_p->val[iPtr*4];
143     A21=A_p->val[iPtr*4+1];
144     A12=A_p->val[iPtr*4+2];
145     A22=A_p->val[iPtr*4+3];
146     D = A11*A22-A12*A21;
147     if (ABS(D) > 0 ){
148     D=1./D;
149     out->inv_A_FF[i*4]= A22*D;
150     out->inv_A_FF[i*4+1]=-A21*D;
151     out->inv_A_FF[i*4+2]=-A12*D;
152     out->inv_A_FF[i*4+3]= A11*D;
153     } else {
154     Paso_setError(ZERO_DIVISION_ERROR, "__FILE__:Break-down in ILU decomposition: non-regular main diagonal block.");
155     }
156     } else if (n_block==3) {
157     A11=A_p->val[iPtr*9 ];
158     A21=A_p->val[iPtr*9+1];
159     A31=A_p->val[iPtr*9+2];
160     A12=A_p->val[iPtr*9+3];
161     A22=A_p->val[iPtr*9+4];
162     A32=A_p->val[iPtr*9+5];
163     A13=A_p->val[iPtr*9+6];
164     A23=A_p->val[iPtr*9+7];
165     A33=A_p->val[iPtr*9+8];
166     D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
167     if (ABS(D) > 0 ){
168     D=1./D;
169     out->inv_A_FF[i*9 ]=(A22*A33-A23*A32)*D;
170     out->inv_A_FF[i*9+1]=(A31*A23-A21*A33)*D;
171     out->inv_A_FF[i*9+2]=(A21*A32-A31*A22)*D;
172     out->inv_A_FF[i*9+3]=(A13*A32-A12*A33)*D;
173     out->inv_A_FF[i*9+4]=(A11*A33-A31*A13)*D;
174     out->inv_A_FF[i*9+5]=(A12*A31-A11*A32)*D;
175     out->inv_A_FF[i*9+6]=(A12*A23-A13*A22)*D;
176     out->inv_A_FF[i*9+7]=(A13*A21-A11*A23)*D;
177     out->inv_A_FF[i*9+8]=(A11*A22-A12*A21)*D;
178     } else {
179     Paso_setError(ZERO_DIVISION_ERROR, "__FILE__:Break-down in ILU decomposition: non-regular main diagonal block.");
180     }
181     }
182     }
183     }
184     } /* end parallel region */
185    
186     if( Paso_noError()) {
187     /* if there are no nodes in the coarse level there is no more work to do */
188     out->n_C=n-out->n_F;
189     if (out->n_C>0) {
190     out->rows_in_C=MEMALLOC(out->n_C,index_t);
191     out->mask_C=MEMALLOC(n,index_t);
192     if (! (Paso_checkPtr(out->mask_C) || Paso_checkPtr(out->rows_in_C) ) ) {
193     /* creates an index for C from mask */
194     #pragma omp parallel for private(i) schedule(static)
195     for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
196     Paso_Util_cumsum(n,counter);
197     #pragma omp parallel
198     {
199     #pragma omp for private(i) schedule(static)
200     for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
201     #pragma omp for private(i) schedule(static)
202     for (i = 0; i < n; ++i) {
203     if (! mis_marker[i]) {
204     out->rows_in_C[counter[i]]=i;
205     out->mask_C[i]=counter[i];
206     } else {
207     out->mask_C[i]=-1;
208     }
209     }
210     } /* end parallel region */
211     /* get A_CF block: */
212     out->A_CF=Paso_SystemMatrix_getSubmatrix(A_p,out->n_C,out->rows_in_C,out->mask_F);
213     if (Paso_noError()) {
214     /* get A_FC block: */
215     out->A_FC=Paso_SystemMatrix_getSubmatrix(A_p,out->n_F,out->rows_in_F,out->mask_C);
216     /* get A_FF block: */
217     if (Paso_noError()) {
218     schur=Paso_SystemMatrix_getSubmatrix(A_p,out->n_C,out->rows_in_C,out->mask_C);
219     time0=Paso_timer()-time0;
220     if (Paso_noError()) {
221     time1=Paso_timer();
222     /* update A_CC block to get Schur complement and then apply ILU to it */
223     Paso_Solver_updateIncompleteSchurComplement(schur,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC);
224     time1=Paso_timer()-time1;
225     out->ILU_of_Schur=Paso_Solver_getILU(schur,verbose);
226     Paso_SystemMatrix_dealloc(schur);
227     }
228     /* allocate work arrays for ILU application */
229     if (Paso_noError()) {
230     out->x_F=MEMALLOC(n_block*out->n_F,double);
231     out->b_F=MEMALLOC(n_block*out->n_F,double);
232     out->x_C=MEMALLOC(n_block*out->n_C,double);
233     out->b_C=MEMALLOC(n_block*out->n_C,double);
234     if (! (Paso_checkPtr(out->x_F) || Paso_checkPtr(out->b_F) || Paso_checkPtr(out->x_C) || Paso_checkPtr(out->b_C) ) ) {
235     #pragma omp parallel
236     {
237     #pragma omp for private(i,k) schedule(static)
238     for (i = 0; i < out->n_F; ++i) {
239     for (k=0; k<n_block;++k) {
240     out->x_F[i*n_block+k]=0.;
241     out->b_F[i*n_block+k]=0.;
242     }
243     }
244     #pragma omp for private(i,k) schedule(static)
245     for (i = 0; i < out->n_C; ++i) {
246     for (k=0; k<n_block;++k) {
247     out->x_C[i*n_block+k]=0.;
248     out->b_C[i*n_block+k]=0.;
249     }
250     }
251     } /* end parallel region */
252     }
253     }
254     }
255     }
256     }
257     }
258     }
259     }
260     }
261     }
262     TMPMEMFREE(mis_marker);
263     TMPMEMFREE(counter);
264     if (Paso_noError()) {
265     if (verbose) {
266     printf("ILU: %d unknowns eliminated. %d left.\n",out->n_F,n-out->n_F);
267     if (out->n_C>0) {
268     printf("timing: ILU: MIS/reordering/elemination : %e/%e/%e\n",time2,time0,time1);
269     } else {
270     printf("timing: ILU: MIS: %e\n",time2);
271     }
272     }
273     return out;
274     } else {
275     Paso_Solver_ILU_free(out);
276     return NULL;
277     }
278     }
279    
280     /**************************************************************/
281    
282     /* apply ILU precondition b-> x
283    
284     in fact it solves
285    
286     [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ] [ x_F ] = [b_F]
287     [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
288    
289     in the form
290    
291     b->[b_F,b_C]
292     x_F=invA_FF*b_F
293     b_C=b_C-A_CF*x_F
294     x_C=ILU(b_C)
295     b_F=b_F-A_FC*x_C
296     x_F=invA_FF*b_F
297     x<-[x_F,x_C]
298    
299     should be called within a parallel region
300     barrier synconization should be performed to make sure that the input vector available
301    
302     */
303    
304     void Paso_Solver_solveILU(Paso_Solver_ILU * ilu, double * x, double * b) {
305     dim_t i,k;
306     dim_t n_block=ilu->n_block;
307    
308     if (ilu->n_C==0) {
309     /* x=invA_FF*b */
310     Paso_Solver_applyBlockDiagonalMatrix(n_block,ilu->n_F,ilu->inv_A_FF,ilu->A_FF_pivot,x,b);
311     } else {
312     /* b->[b_F,b_C] */
313     if (n_block==1) {
314     #pragma omp for private(i) schedule(static)
315     for (i=0;i<ilu->n_F;++i) ilu->b_F[i]=b[ilu->rows_in_F[i]];
316     #pragma omp for private(i) schedule(static)
317     for (i=0;i<ilu->n_C;++i) ilu->b_C[i]=b[ilu->rows_in_C[i]];
318     } else {
319     #pragma omp for private(i,k) schedule(static)
320     for (i=0;i<ilu->n_F;++i)
321     for (k=0;k<n_block;k++) ilu->b_F[ilu->n_block*i+k]=b[n_block*ilu->rows_in_F[i]+k];
322     #pragma omp for private(i,k) schedule(static)
323     for (i=0;i<ilu->n_C;++i)
324     for (k=0;k<n_block;k++) ilu->b_C[ilu->n_block*i+k]=b[n_block*ilu->rows_in_C[i]+k];
325     }
326     /* x_F=invA_FF*b_F */
327     Paso_Solver_applyBlockDiagonalMatrix(n_block,ilu->n_F,ilu->inv_A_FF,ilu->A_FF_pivot,ilu->x_F,ilu->b_F);
328     /* b_C=b_C-A_CF*x_F */
329 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,ilu->A_CF,ilu->x_F,1.,ilu->b_C);
330 jgs 150 /* x_C=ILU(b_C) */
331     Paso_Solver_solveILU(ilu->ILU_of_Schur,ilu->x_C,ilu->b_C);
332     /* b_F=b_F-A_FC*x_C */
333 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,ilu->A_FC,ilu->x_C,1.,ilu->b_F);
334 jgs 150 /* x_F=invA_FF*b_F */
335     Paso_Solver_applyBlockDiagonalMatrix(n_block,ilu->n_F,ilu->inv_A_FF,ilu->A_FF_pivot,ilu->x_F,ilu->b_F);
336     /* x<-[x_F,x_C] */
337     if (n_block==1) {
338     #pragma omp for private(i) schedule(static)
339     for (i=0;i<ilu->n;++i) {
340     if (ilu->mask_C[i]>-1) {
341     x[i]=ilu->x_C[ilu->mask_C[i]];
342     } else {
343     x[i]=ilu->x_F[ilu->mask_F[i]];
344     }
345     }
346     } else {
347     #pragma omp for private(i,k) schedule(static)
348     for (i=0;i<ilu->n;++i) {
349     if (ilu->mask_C[i]>-1) {
350     for (k=0;k<n_block;k++) x[n_block*i+k]=ilu->x_C[n_block*ilu->mask_C[i]+k];
351     } else {
352     for (k=0;k<n_block;k++) x[n_block*i+k]=ilu->x_F[n_block*ilu->mask_F[i]+k];
353     }
354     }
355     }
356     /* all done */
357     }
358     return;
359     }
360    
361     /*
362     * $Log$
363     * Revision 1.2 2005/09/15 03:44:40 jgs
364     * Merge of development branch dev-02 back to main trunk on 2005-09-15
365     *
366     * Revision 1.1.2.1 2005/09/05 06:29:50 gross
367     * These files have been extracted from finley to define a stand alone libray for iterative
368     * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
369     * has not been tested yet.
370     *
371     *
372     */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26