/[escript]/trunk/paso/src/ILU.c
ViewVC logotype

Contents of /trunk/paso/src/ILU.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3094 - (show annotations)
Fri Aug 13 08:38:06 2010 UTC (9 years, 7 months ago) by gross
Original Path: trunk/paso/src/Solver_ILU.c
File MIME type: text/plain
File size: 24236 byte(s)
The MPI and sequational GAUSS_SEIDEL have been merged.
The couring and main diagonal pointer is now manged by the patternm which means that they are calculated once only even if the preconditioner is deleted.



1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: ILU preconditioner with reordering */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia 2003,2004,2005 */
22 /* Author: Lutz Gross, l.gross@uq.edu.au */
23
24 /**************************************************************/
25
26 #include "Paso.h"
27 #include "Solver.h"
28 #include "PasoUtil.h"
29
30 /**************************************************************/
31
32 /* free all memory used by ILU */
33
34 void Paso_Solver_ILU_free(Paso_Solver_ILU * in) {
35 if (in!=NULL) {
36 MEMFREE(in->factors);
37 MEMFREE(in);
38 }
39 }
40
41 /**************************************************************/
42
43 /* constructs the incomplete block factorization of
44
45 */
46 Paso_Solver_ILU* Paso_Solver_getILU(Paso_SparseMatrix * A,bool_t verbose) {
47 const dim_t n=A->numRows;
48 const dim_t n_block=A->row_block_size;
49 const index_t* colorOf = Paso_Pattern_borrowColoringPointer(A->pattern);
50 const dim_t num_colors = Paso_Pattern_getNumColors(A->pattern);
51 const index_t *ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A);
52 register double A11,A12,A13,A21,A22,A23,A31,A32,A33,D;
53 register double S11,S12,S13,S21,S22,S23,S31,S32,S33;
54 register index_t i,iptr_main,iptr_ik,k,iptr_kj,j,iptr_ij,color,color2;
55 double time0=0,time_fac=0;
56 /* allocations: */
57 Paso_Solver_ILU* out=MEMALLOC(1,Paso_Solver_ILU);
58 if (Paso_checkPtr(out)) return NULL;
59 out->factors=MEMALLOC(A->len,double);
60
61 if ( ! Paso_checkPtr(out->factors) ) {
62
63 time0=Paso_timer();
64 /* start factorization */
65
66 #pragma omp barrier
67 for (color=0;color<num_colors && Paso_noError();++color) {
68 if (n_block==1) {
69 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,j,iptr_ij,A11,iptr_main,D)
70 for (i = 0; i < n; ++i) {
71 if (colorOf[i]==color) {
72 for (color2=0;color2<color;++color2) {
73 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
74 k=A->pattern->index[iptr_ik];
75 if (colorOf[k]==color2) {
76 A11=out->factors[iptr_ik];
77 /* a_ij=a_ij-a_ik*a_kj */
78 for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
79 j=A->pattern->index[iptr_kj];
80 if (colorOf[j]>color2) {
81 S11=out->factors[iptr_kj];
82 for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
83 if (j==A->pattern->index[iptr_ij]) {
84 out->factors[iptr_ij]-=A11*S11;
85 break;
86 }
87 }
88 }
89 }
90 }
91 }
92 }
93 iptr_main=ptr_main[i];
94 D=out->factors[iptr_main];
95 if (ABS(D)>0.) {
96 D=1./D;
97 out->factors[iptr_main]=D;
98 /* a_ik=a_ii^{-1}*a_ik */
99 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
100 k=A->pattern->index[iptr_ik];
101 if (colorOf[k]>color) {
102 A11=out->factors[iptr_ik];
103 out->factors[iptr_ik]=A11*D;
104 }
105 }
106 } else {
107 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
108 }
109 }
110 }
111 } else if (n_block==2) {
112 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S12,S22,j,iptr_ij,A11,A21,A12,A22,iptr_main,D)
113 for (i = 0; i < n; ++i) {
114 if (colorOf[i]==color) {
115 for (color2=0;color2<color;++color2) {
116 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
117 k=A->pattern->index[iptr_ik];
118 if (colorOf[k]==color2) {
119 A11=out->factors[iptr_ik*4 ];
120 A21=out->factors[iptr_ik*4+1];
121 A12=out->factors[iptr_ik*4+2];
122 A22=out->factors[iptr_ik*4+3];
123 /* a_ij=a_ij-a_ik*a_kj */
124 for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
125 j=A->pattern->index[iptr_kj];
126 if (colorOf[j]>color2) {
127 S11=out->factors[iptr_kj*4];
128 S21=out->factors[iptr_kj*4+1];
129 S12=out->factors[iptr_kj*4+2];
130 S22=out->factors[iptr_kj*4+3];
131 for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
132 if (j==A->pattern->index[iptr_ij]) {
133 out->factors[4*iptr_ij ]-=A11*S11+A12*S21;
134 out->factors[4*iptr_ij+1]-=A21*S11+A22*S21;
135 out->factors[4*iptr_ij+2]-=A11*S12+A12*S22;
136 out->factors[4*iptr_ij+3]-=A21*S12+A22*S22;
137 break;
138 }
139 }
140 }
141 }
142 }
143 }
144 }
145 iptr_main=ptr_main[i];
146 A11=out->factors[iptr_main*4];
147 A21=out->factors[iptr_main*4+1];
148 A12=out->factors[iptr_main*4+2];
149 A22=out->factors[iptr_main*4+3];
150 D = A11*A22-A12*A21;
151 if (ABS(D)>0.) {
152 D=1./D;
153 S11= A22*D;
154 S21=-A21*D;
155 S12=-A12*D;
156 S22= A11*D;
157 out->factors[iptr_main*4] = S11;
158 out->factors[iptr_main*4+1]= S21;
159 out->factors[iptr_main*4+2]= S12;
160 out->factors[iptr_main*4+3]= S22;
161 /* a_ik=a_ii^{-1}*a_ik */
162 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
163 k=A->pattern->index[iptr_ik];
164 if (colorOf[k]>color) {
165 A11=out->factors[iptr_ik*4 ];
166 A21=out->factors[iptr_ik*4+1];
167 A12=out->factors[iptr_ik*4+2];
168 A22=out->factors[iptr_ik*4+3];
169 out->factors[4*iptr_ik ]=S11*A11+S12*A21;
170 out->factors[4*iptr_ik+1]=S21*A11+S22*A21;
171 out->factors[4*iptr_ik+2]=S11*A12+S12*A22;
172 out->factors[4*iptr_ik+3]=S21*A12+S22*A22;
173 }
174 }
175 } else {
176 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
177 }
178 }
179 }
180 } else if (n_block==3) {
181 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S31,S12,S22,S32,S13,S23,S33,j,iptr_ij,A11,A21,A31,A12,A22,A32,A13,A23,A33,iptr_main,D)
182 for (i = 0; i < n; ++i) {
183 if (colorOf[i]==color) {
184 for (color2=0;color2<color;++color2) {
185 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
186 k=A->pattern->index[iptr_ik];
187 if (colorOf[k]==color2) {
188 A11=out->factors[iptr_ik*9 ];
189 A21=out->factors[iptr_ik*9+1];
190 A31=out->factors[iptr_ik*9+2];
191 A12=out->factors[iptr_ik*9+3];
192 A22=out->factors[iptr_ik*9+4];
193 A32=out->factors[iptr_ik*9+5];
194 A13=out->factors[iptr_ik*9+6];
195 A23=out->factors[iptr_ik*9+7];
196 A33=out->factors[iptr_ik*9+8];
197 /* a_ij=a_ij-a_ik*a_kj */
198 for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
199 j=A->pattern->index[iptr_kj];
200 if (colorOf[j]>color2) {
201 S11=out->factors[iptr_kj*9 ];
202 S21=out->factors[iptr_kj*9+1];
203 S31=out->factors[iptr_kj*9+2];
204 S12=out->factors[iptr_kj*9+3];
205 S22=out->factors[iptr_kj*9+4];
206 S32=out->factors[iptr_kj*9+5];
207 S13=out->factors[iptr_kj*9+6];
208 S23=out->factors[iptr_kj*9+7];
209 S33=out->factors[iptr_kj*9+8];
210 for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
211 if (j==A->pattern->index[iptr_ij]) {
212 out->factors[iptr_ij*9 ]-=A11*S11+A12*S21+A13*S31;
213 out->factors[iptr_ij*9+1]-=A21*S11+A22*S21+A23*S31;
214 out->factors[iptr_ij*9+2]-=A31*S11+A32*S21+A33*S31;
215 out->factors[iptr_ij*9+3]-=A11*S12+A12*S22+A13*S32;
216 out->factors[iptr_ij*9+4]-=A21*S12+A22*S22+A23*S32;
217 out->factors[iptr_ij*9+5]-=A31*S12+A32*S22+A33*S32;
218 out->factors[iptr_ij*9+6]-=A11*S13+A12*S23+A13*S33;
219 out->factors[iptr_ij*9+7]-=A21*S13+A22*S23+A23*S33;
220 out->factors[iptr_ij*9+8]-=A31*S13+A32*S23+A33*S33;
221 break;
222 }
223 }
224 }
225 }
226 }
227 }
228 }
229 iptr_main=ptr_main[i];
230 A11=out->factors[iptr_main*9 ];
231 A21=out->factors[iptr_main*9+1];
232 A31=out->factors[iptr_main*9+2];
233 A12=out->factors[iptr_main*9+3];
234 A22=out->factors[iptr_main*9+4];
235 A32=out->factors[iptr_main*9+5];
236 A13=out->factors[iptr_main*9+6];
237 A23=out->factors[iptr_main*9+7];
238 A33=out->factors[iptr_main*9+8];
239 D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
240 if (ABS(D)>0.) {
241 D=1./D;
242 S11=(A22*A33-A23*A32)*D;
243 S21=(A31*A23-A21*A33)*D;
244 S31=(A21*A32-A31*A22)*D;
245 S12=(A13*A32-A12*A33)*D;
246 S22=(A11*A33-A31*A13)*D;
247 S32=(A12*A31-A11*A32)*D;
248 S13=(A12*A23-A13*A22)*D;
249 S23=(A13*A21-A11*A23)*D;
250 S33=(A11*A22-A12*A21)*D;
251
252 out->factors[iptr_main*9 ]=S11;
253 out->factors[iptr_main*9+1]=S21;
254 out->factors[iptr_main*9+2]=S31;
255 out->factors[iptr_main*9+3]=S12;
256 out->factors[iptr_main*9+4]=S22;
257 out->factors[iptr_main*9+5]=S32;
258 out->factors[iptr_main*9+6]=S13;
259 out->factors[iptr_main*9+7]=S23;
260 out->factors[iptr_main*9+8]=S33;
261
262 /* a_ik=a_ii^{-1}*a_ik */
263 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
264 k=A->pattern->index[iptr_ik];
265 if (colorOf[k]>color) {
266 A11=out->factors[iptr_ik*9 ];
267 A21=out->factors[iptr_ik*9+1];
268 A31=out->factors[iptr_ik*9+2];
269 A12=out->factors[iptr_ik*9+3];
270 A22=out->factors[iptr_ik*9+4];
271 A32=out->factors[iptr_ik*9+5];
272 A13=out->factors[iptr_ik*9+6];
273 A23=out->factors[iptr_ik*9+7];
274 A33=out->factors[iptr_ik*9+8];
275 out->factors[iptr_ik*9 ]=S11*A11+S12*A21+S13*A31;
276 out->factors[iptr_ik*9+1]=S21*A11+S22*A21+S23*A31;
277 out->factors[iptr_ik*9+2]=S31*A11+S32*A21+S33*A31;
278 out->factors[iptr_ik*9+3]=S11*A12+S12*A22+S13*A32;
279 out->factors[iptr_ik*9+4]=S21*A12+S22*A22+S23*A32;
280 out->factors[iptr_ik*9+5]=S31*A12+S32*A22+S33*A32;
281 out->factors[iptr_ik*9+6]=S11*A13+S12*A23+S13*A33;
282 out->factors[iptr_ik*9+7]=S21*A13+S22*A23+S23*A33;
283 out->factors[iptr_ik*9+8]=S31*A13+S32*A23+S33*A33;
284 }
285 }
286 } else {
287 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
288 }
289 }
290 }
291 } else {
292 Paso_setError(VALUE_ERROR, "Paso_Solver_getILU: block size greater than 3 is not supported.");
293 }
294 #pragma omp barrier
295 }
296 time_fac=Paso_timer()-time0;
297 }
298 if (Paso_noError()) {
299 if (verbose) printf("timing: ILU: coloring/elemination : %e sec\n",time_fac);
300 return out;
301 } else {
302 Paso_Solver_ILU_free(out);
303 return NULL;
304 }
305 }
306
307 /**************************************************************/
308
309 /* apply ILU precondition b-> x
310
311 in fact it solves LUx=b in the form x= U^{-1} L^{-1}b
312
313 should be called within a parallel region
314 barrier synconization should be performed to make sure that the input vector available
315
316 */
317
318 void Paso_Solver_solveILU(Paso_SparseMatrix * A, Paso_Solver_ILU * ilu, double * x, const double * b) {
319 register dim_t i,k;
320 register index_t color,iptr_ik,iptr_main;
321 register double S1,S2,S3,R1,R2,R3;
322 const dim_t n=A->numRows;
323 const dim_t n_block=A->row_block_size;
324 const index_t* colorOf = Paso_Pattern_borrowColoringPointer(A->pattern);
325 const dim_t num_colors = Paso_Pattern_getNumColors(A->pattern);
326 const index_t *ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A);
327
328
329 /* copy x into b*/
330 #pragma omp parallel for private(i) schedule(static)
331 for (i=0;i<n*n_block;++i) x[i]=b[i];
332 /* forward substitution */
333 for (color=0;color<num_colors;++color) {
334 if (n_block==1) {
335 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
336 for (i = 0; i < n; ++i) {
337 if (colorOf[i]==color) {
338 /* x_i=x_i-a_ik*x_k */
339 S1=x[i];
340 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
341 k=A->pattern->index[iptr_ik];
342 if (colorOf[k]<color) {
343 R1=x[k];
344 S1-=ilu->factors[iptr_ik]*R1;
345 }
346 }
347 iptr_main=ptr_main[i];
348 x[i]=ilu->factors[iptr_main]*S1;
349 }
350 }
351 } else if (n_block==2) {
352 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
353 for (i = 0; i < n; ++i) {
354 if (colorOf[i]==color) {
355 /* x_i=x_i-a_ik*x_k */
356 S1=x[2*i];
357 S2=x[2*i+1];
358 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
359 k=A->pattern->index[iptr_ik];
360 if (colorOf[k]<color) {
361 R1=x[2*k];
362 R2=x[2*k+1];
363 S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
364 S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
365 }
366 }
367 iptr_main=ptr_main[i];
368 x[2*i ]=ilu->factors[4*iptr_main ]*S1+ilu->factors[4*iptr_main+2]*S2;
369 x[2*i+1]=ilu->factors[4*iptr_main+1]*S1+ilu->factors[4*iptr_main+3]*S2;
370 }
371
372 }
373 } else if (n_block==3) {
374 #pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
375 for (i = 0; i < n; ++i) {
376 if (colorOf[i]==color) {
377 /* x_i=x_i-a_ik*x_k */
378 S1=x[3*i];
379 S2=x[3*i+1];
380 S3=x[3*i+2];
381 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
382 k=A->pattern->index[iptr_ik];
383 if (colorOf[k]<color) {
384 R1=x[3*k];
385 R2=x[3*k+1];
386 R3=x[3*k+2];
387 S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
388 S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
389 S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
390 }
391 }
392 iptr_main=ptr_main[i];
393 x[3*i ]=ilu->factors[9*iptr_main ]*S1+ilu->factors[9*iptr_main+3]*S2+ilu->factors[9*iptr_main+6]*S3;
394 x[3*i+1]=ilu->factors[9*iptr_main+1]*S1+ilu->factors[9*iptr_main+4]*S2+ilu->factors[9*iptr_main+7]*S3;
395 x[3*i+2]=ilu->factors[9*iptr_main+2]*S1+ilu->factors[9*iptr_main+5]*S2+ilu->factors[9*iptr_main+8]*S3;
396 }
397 }
398 }
399 }
400 /* backward substitution */
401 for (color=(num_colors)-1;color>-1;--color) {
402 if (n_block==1) {
403 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1)
404 for (i = 0; i < n; ++i) {
405 if (colorOf[i]==color) {
406 /* x_i=x_i-a_ik*x_k */
407 S1=x[i];
408 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
409 k=A->pattern->index[iptr_ik];
410 if (colorOf[k]>color) {
411 R1=x[k];
412 S1-=ilu->factors[iptr_ik]*R1;
413 }
414 }
415 x[i]=S1;
416 }
417 }
418 } else if (n_block==2) {
419 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
420 for (i = 0; i < n; ++i) {
421 if (colorOf[i]==color) {
422 /* x_i=x_i-a_ik*x_k */
423 S1=x[2*i];
424 S2=x[2*i+1];
425 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
426 k=A->pattern->index[iptr_ik];
427 if (colorOf[k]>color) {
428 R1=x[2*k];
429 R2=x[2*k+1];
430 S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
431 S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
432 }
433 }
434 x[2*i]=S1;
435 x[2*i+1]=S2;
436 }
437 }
438 } else if (n_block==3) {
439 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
440 for (i = 0; i < n; ++i) {
441 if (colorOf[i]==color) {
442 /* x_i=x_i-a_ik*x_k */
443 S1=x[3*i ];
444 S2=x[3*i+1];
445 S3=x[3*i+2];
446 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
447 k=A->pattern->index[iptr_ik];
448 if (colorOf[k]>color) {
449 R1=x[3*k];
450 R2=x[3*k+1];
451 R3=x[3*k+2];
452 S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
453 S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
454 S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
455 }
456 }
457 x[3*i]=S1;
458 x[3*i+1]=S2;
459 x[3*i+2]=S3;
460 }
461 }
462 }
463 #pragma omp barrier
464 }
465 return;
466 }
467

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26