/[escript]/trunk/paso/src/Solver_ILU.c
ViewVC logotype

Contents of /trunk/paso/src/Solver_ILU.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 430 - (show annotations)
Wed Jan 11 06:40:50 2006 UTC (13 years, 9 months ago) by gross
Original Path: trunk/paso/src/Solvers/Solver_ILU.c
File MIME type: text/plain
File size: 15658 byte(s)
ILU has been replicated is called RILU (recursive ILU) now. ILU will now be reimplemented.
1 /* $Id$ */
2
3 /**************************************************************/
4
5 /* Paso: ILU preconditioner with reordering */
6
7 /**************************************************************/
8
9 /* Copyrights by ACcESS Australia 2003,2004,2005 */
10 /* Author: gross@access.edu.au */
11
12 /**************************************************************/
13
14 #include "Paso.h"
15 #include "Solver.h"
16 #include "Util.h"
17
18 /**************************************************************/
19
20 /* free all memory used by ILU */
21
22 void Paso_Solver_ILU_free(Paso_Solver_ILU * in) {
23 if (in!=NULL) {
24 Paso_Solver_ILU_free(in->ILU_of_Schur);
25 MEMFREE(in->inv_A_FF);
26 MEMFREE(in->A_FF_pivot);
27 Paso_SystemMatrix_dealloc(in->A_FC);
28 Paso_SystemMatrix_dealloc(in->A_CF);
29 MEMFREE(in->rows_in_F);
30 MEMFREE(in->rows_in_C);
31 MEMFREE(in->mask_F);
32 MEMFREE(in->mask_C);
33 MEMFREE(in->x_F);
34 MEMFREE(in->b_F);
35 MEMFREE(in->x_C);
36 MEMFREE(in->b_C);
37 MEMFREE(in);
38 }
39 }
40
41 /**************************************************************/
42
43 /* constructs the block-block factorization of
44
45 [ A_FF A_FC ]
46 A_p=
47 [ A_CF A_FF ]
48
49 to
50
51 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
52 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
53
54
55 where S=A_FF-ACF*invA_FF*A_FC within the shape of S
56
57 then ILU is applied to S again until S becomes empty
58
59 */
60 Paso_Solver_ILU* Paso_Solver_getILU(Paso_SystemMatrix * A_p,bool_t verbose) {
61 Paso_Solver_ILU* out=NULL;
62 dim_t n=A_p->num_rows;
63 dim_t n_block=A_p->row_block_size;
64 index_t* mis_marker=NULL;
65 index_t* counter=NULL;
66 index_t iPtr,*index, *where_p;
67 dim_t i,k;
68 Paso_SystemMatrix * schur=NULL;
69 double A11,A12,A13,A21,A22,A23,A31,A32,A33,D,time0,time1,time2;
70
71
72 /* identify independend set of rows/columns */
73 mis_marker=TMPMEMALLOC(n,index_t);
74 counter=TMPMEMALLOC(n,index_t);
75 out=MEMALLOC(1,Paso_Solver_ILU);
76 out->ILU_of_Schur=NULL;
77 out->inv_A_FF=NULL;
78 out->A_FF_pivot=NULL;
79 out->A_FC=NULL;
80 out->A_CF=NULL;
81 out->rows_in_F=NULL;
82 out->rows_in_C=NULL;
83 out->mask_F=NULL;
84 out->mask_C=NULL;
85 out->x_F=NULL;
86 out->b_F=NULL;
87 out->x_C=NULL;
88 out->b_C=NULL;
89
90 if ( !(Paso_checkPtr(mis_marker) || Paso_checkPtr(out) || Paso_checkPtr(counter) ) ) {
91 /* identify independend set of rows/columns */
92 time0=Paso_timer();
93 Paso_SystemMatrixPattern_mis(A_p->pattern,mis_marker);
94 time2=Paso_timer()-time0;
95 if (Paso_noError()) {
96 #pragma omp parallel for private(i) schedule(static)
97 for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
98 out->n=n;
99 out->n_block=n_block;
100 out->n_F=Paso_Util_cumsum(n,counter);
101 out->mask_F=MEMALLOC(n,index_t);
102 out->rows_in_F=MEMALLOC(out->n_F,index_t);
103 out->inv_A_FF=MEMALLOC(n_block*n_block*out->n_F,double);
104 out->A_FF_pivot=NULL; /* later use for block size>3 */
105 if (! (Paso_checkPtr(out->mask_F) || Paso_checkPtr(out->inv_A_FF) || Paso_checkPtr(out->rows_in_F) ) ) {
106 #pragma omp parallel
107 {
108 /* creates an index for F from mask */
109 #pragma omp for private(i) schedule(static)
110 for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
111 #pragma omp for private(i) schedule(static)
112 for (i = 0; i < n; ++i) {
113 if (mis_marker[i]) {
114 out->rows_in_F[counter[i]]=i;
115 out->mask_F[i]=counter[i];
116 } else {
117 out->mask_F[i]=-1;
118 }
119 }
120 #pragma omp for private(i, where_p,iPtr,A11,A12,A13,A21,A22,A23,A31,A32,A33,D,index) schedule(static)
121 for (i = 0; i < out->n_F; i++) {
122 /* find main diagonal */
123 iPtr=A_p->pattern->ptr[out->rows_in_F[i]];
124 index=&(A_p->pattern->index[iPtr]);
125 where_p=(index_t*)bsearch(&out->rows_in_F[i],
126 index,
127 A_p->pattern->ptr[out->rows_in_F[i] + 1]-A_p->pattern->ptr[out->rows_in_F[i]],
128 sizeof(index_t),
129 Paso_comparIndex);
130 if (where_p==NULL) {
131 Paso_setError(VALUE_ERROR, "Paso_Solver_getILU: main diagonal element missing.");
132 } else {
133 iPtr+=(index_t)(where_p-index);
134 /* get inverse of A_FF block: */
135 if (n_block==1) {
136 if (ABS(A_p->val[iPtr])>0.) {
137 out->inv_A_FF[i]=1./A_p->val[iPtr];
138 } else {
139 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: Break-down in ILU decomposition: non-regular main diagonal block.");
140 }
141 } else if (n_block==2) {
142 A11=A_p->val[iPtr*4];
143 A21=A_p->val[iPtr*4+1];
144 A12=A_p->val[iPtr*4+2];
145 A22=A_p->val[iPtr*4+3];
146 D = A11*A22-A12*A21;
147 if (ABS(D) > 0 ){
148 D=1./D;
149 out->inv_A_FF[i*4]= A22*D;
150 out->inv_A_FF[i*4+1]=-A21*D;
151 out->inv_A_FF[i*4+2]=-A12*D;
152 out->inv_A_FF[i*4+3]= A11*D;
153 } else {
154 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU:Break-down in ILU decomposition: non-regular main diagonal block.");
155 }
156 } else if (n_block==3) {
157 A11=A_p->val[iPtr*9 ];
158 A21=A_p->val[iPtr*9+1];
159 A31=A_p->val[iPtr*9+2];
160 A12=A_p->val[iPtr*9+3];
161 A22=A_p->val[iPtr*9+4];
162 A32=A_p->val[iPtr*9+5];
163 A13=A_p->val[iPtr*9+6];
164 A23=A_p->val[iPtr*9+7];
165 A33=A_p->val[iPtr*9+8];
166 D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
167 if (ABS(D) > 0 ){
168 D=1./D;
169 out->inv_A_FF[i*9 ]=(A22*A33-A23*A32)*D;
170 out->inv_A_FF[i*9+1]=(A31*A23-A21*A33)*D;
171 out->inv_A_FF[i*9+2]=(A21*A32-A31*A22)*D;
172 out->inv_A_FF[i*9+3]=(A13*A32-A12*A33)*D;
173 out->inv_A_FF[i*9+4]=(A11*A33-A31*A13)*D;
174 out->inv_A_FF[i*9+5]=(A12*A31-A11*A32)*D;
175 out->inv_A_FF[i*9+6]=(A12*A23-A13*A22)*D;
176 out->inv_A_FF[i*9+7]=(A13*A21-A11*A23)*D;
177 out->inv_A_FF[i*9+8]=(A11*A22-A12*A21)*D;
178 } else {
179 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU:Break-down in ILU decomposition: non-regular main diagonal block.");
180 }
181 }
182 }
183 }
184 } /* end parallel region */
185
186 if( Paso_noError()) {
187 /* if there are no nodes in the coarse level there is no more work to do */
188 out->n_C=n-out->n_F;
189 if (out->n_C>0) {
190 out->rows_in_C=MEMALLOC(out->n_C,index_t);
191 out->mask_C=MEMALLOC(n,index_t);
192 if (! (Paso_checkPtr(out->mask_C) || Paso_checkPtr(out->rows_in_C) ) ) {
193 /* creates an index for C from mask */
194 #pragma omp parallel for private(i) schedule(static)
195 for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
196 Paso_Util_cumsum(n,counter);
197 #pragma omp parallel
198 {
199 #pragma omp for private(i) schedule(static)
200 for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
201 #pragma omp for private(i) schedule(static)
202 for (i = 0; i < n; ++i) {
203 if (! mis_marker[i]) {
204 out->rows_in_C[counter[i]]=i;
205 out->mask_C[i]=counter[i];
206 } else {
207 out->mask_C[i]=-1;
208 }
209 }
210 } /* end parallel region */
211 /* get A_CF block: */
212 out->A_CF=Paso_SystemMatrix_getSubmatrix(A_p,out->n_C,out->rows_in_C,out->mask_F);
213 if (Paso_noError()) {
214 /* get A_FC block: */
215 out->A_FC=Paso_SystemMatrix_getSubmatrix(A_p,out->n_F,out->rows_in_F,out->mask_C);
216 /* get A_FF block: */
217 if (Paso_noError()) {
218 schur=Paso_SystemMatrix_getSubmatrix(A_p,out->n_C,out->rows_in_C,out->mask_C);
219 time0=Paso_timer()-time0;
220 if (Paso_noError()) {
221 time1=Paso_timer();
222 /* update A_CC block to get Schur complement and then apply ILU to it */
223 Paso_Solver_updateIncompleteSchurComplement(schur,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC);
224 time1=Paso_timer()-time1;
225 out->ILU_of_Schur=Paso_Solver_getILU(schur,verbose);
226 Paso_SystemMatrix_dealloc(schur);
227 }
228 /* allocate work arrays for ILU application */
229 if (Paso_noError()) {
230 out->x_F=MEMALLOC(n_block*out->n_F,double);
231 out->b_F=MEMALLOC(n_block*out->n_F,double);
232 out->x_C=MEMALLOC(n_block*out->n_C,double);
233 out->b_C=MEMALLOC(n_block*out->n_C,double);
234 if (! (Paso_checkPtr(out->x_F) || Paso_checkPtr(out->b_F) || Paso_checkPtr(out->x_C) || Paso_checkPtr(out->b_C) ) ) {
235 #pragma omp parallel
236 {
237 #pragma omp for private(i,k) schedule(static)
238 for (i = 0; i < out->n_F; ++i) {
239 for (k=0; k<n_block;++k) {
240 out->x_F[i*n_block+k]=0.;
241 out->b_F[i*n_block+k]=0.;
242 }
243 }
244 #pragma omp for private(i,k) schedule(static)
245 for (i = 0; i < out->n_C; ++i) {
246 for (k=0; k<n_block;++k) {
247 out->x_C[i*n_block+k]=0.;
248 out->b_C[i*n_block+k]=0.;
249 }
250 }
251 } /* end parallel region */
252 }
253 }
254 }
255 }
256 }
257 }
258 }
259 }
260 }
261 }
262 TMPMEMFREE(mis_marker);
263 TMPMEMFREE(counter);
264 if (Paso_noError()) {
265 if (verbose) {
266 printf("ILU: %d unknowns eliminated. %d left.\n",out->n_F,n-out->n_F);
267 if (out->n_C>0) {
268 printf("timing: ILU: MIS/reordering/elemination : %e/%e/%e\n",time2,time0,time1);
269 } else {
270 printf("timing: ILU: MIS: %e\n",time2);
271 }
272 }
273 return out;
274 } else {
275 Paso_Solver_ILU_free(out);
276 return NULL;
277 }
278 }
279
280 /**************************************************************/
281
282 /* apply ILU precondition b-> x
283
284 in fact it solves
285
286 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ] [ x_F ] = [b_F]
287 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
288
289 in the form
290
291 b->[b_F,b_C]
292 x_F=invA_FF*b_F
293 b_C=b_C-A_CF*x_F
294 x_C=ILU(b_C)
295 b_F=b_F-A_FC*x_C
296 x_F=invA_FF*b_F
297 x<-[x_F,x_C]
298
299 should be called within a parallel region
300 barrier synconization should be performed to make sure that the input vector available
301
302 */
303
304 void Paso_Solver_solveILU(Paso_Solver_ILU * ilu, double * x, double * b) {
305 dim_t i,k;
306 dim_t n_block=ilu->n_block;
307
308 if (ilu->n_C==0) {
309 /* x=invA_FF*b */
310 Paso_Solver_applyBlockDiagonalMatrix(n_block,ilu->n_F,ilu->inv_A_FF,ilu->A_FF_pivot,x,b);
311 } else {
312 /* b->[b_F,b_C] */
313 if (n_block==1) {
314 #pragma omp for private(i) schedule(static)
315 for (i=0;i<ilu->n_F;++i) ilu->b_F[i]=b[ilu->rows_in_F[i]];
316 #pragma omp for private(i) schedule(static)
317 for (i=0;i<ilu->n_C;++i) ilu->b_C[i]=b[ilu->rows_in_C[i]];
318 } else {
319 #pragma omp for private(i,k) schedule(static)
320 for (i=0;i<ilu->n_F;++i)
321 for (k=0;k<n_block;k++) ilu->b_F[ilu->n_block*i+k]=b[n_block*ilu->rows_in_F[i]+k];
322 #pragma omp for private(i,k) schedule(static)
323 for (i=0;i<ilu->n_C;++i)
324 for (k=0;k<n_block;k++) ilu->b_C[ilu->n_block*i+k]=b[n_block*ilu->rows_in_C[i]+k];
325 }
326 /* x_F=invA_FF*b_F */
327 Paso_Solver_applyBlockDiagonalMatrix(n_block,ilu->n_F,ilu->inv_A_FF,ilu->A_FF_pivot,ilu->x_F,ilu->b_F);
328 /* b_C=b_C-A_CF*x_F */
329 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,ilu->A_CF,ilu->x_F,1.,ilu->b_C);
330 /* x_C=ILU(b_C) */
331 Paso_Solver_solveILU(ilu->ILU_of_Schur,ilu->x_C,ilu->b_C);
332 /* b_F=b_F-A_FC*x_C */
333 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,ilu->A_FC,ilu->x_C,1.,ilu->b_F);
334 /* x_F=invA_FF*b_F */
335 Paso_Solver_applyBlockDiagonalMatrix(n_block,ilu->n_F,ilu->inv_A_FF,ilu->A_FF_pivot,ilu->x_F,ilu->b_F);
336 /* x<-[x_F,x_C] */
337 if (n_block==1) {
338 #pragma omp for private(i) schedule(static)
339 for (i=0;i<ilu->n;++i) {
340 if (ilu->mask_C[i]>-1) {
341 x[i]=ilu->x_C[ilu->mask_C[i]];
342 } else {
343 x[i]=ilu->x_F[ilu->mask_F[i]];
344 }
345 }
346 } else {
347 #pragma omp for private(i,k) schedule(static)
348 for (i=0;i<ilu->n;++i) {
349 if (ilu->mask_C[i]>-1) {
350 for (k=0;k<n_block;k++) x[n_block*i+k]=ilu->x_C[n_block*ilu->mask_C[i]+k];
351 } else {
352 for (k=0;k<n_block;k++) x[n_block*i+k]=ilu->x_F[n_block*ilu->mask_F[i]+k];
353 }
354 }
355 }
356 /* all done */
357 }
358 return;
359 }
360
361 /*
362 * $Log$
363 * Revision 1.2 2005/09/15 03:44:40 jgs
364 * Merge of development branch dev-02 back to main trunk on 2005-09-15
365 *
366 * Revision 1.1.2.1 2005/09/05 06:29:50 gross
367 * These files have been extracted from finley to define a stand alone libray for iterative
368 * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
369 * has not been tested yet.
370 *
371 *
372 */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26