/[escript]/trunk/paso/src/ILU.cpp
ViewVC logotype

Contents of /trunk/paso/src/ILU.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4843 - (show annotations)
Tue Apr 8 05:32:07 2014 UTC (5 years, 5 months ago) by caltinay
File size: 23738 byte(s)
checkpointing paso::Preconditioner.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /****************************************************************************/
19
20 /* Paso: ILU preconditioner with reordering */
21
22 /****************************************************************************/
23
24 /* Copyrights by ACcESS Australia 2003,2004,2005 */
25 /* Author: Lutz Gross, l.gross@uq.edu.au */
26
27 /****************************************************************************/
28
29 #include "Paso.h"
30 #include "Preconditioner.h"
31 #include "PasoUtil.h"
32
33 namespace paso {
34
35 void Solver_ILU_free(Solver_ILU * in)
36 {
37 if (in!=NULL) {
38 delete[] in->factors;
39 delete in;
40 }
41 }
42
43 /// constructs the incomplete block factorization
44 Solver_ILU* Solver_getILU(SparseMatrix_ptr A, bool verbose)
45 {
46 const dim_t n=A->numRows;
47 const dim_t n_block=A->row_block_size;
48 const index_t* colorOf = A->pattern->borrowColoringPointer();
49 const dim_t num_colors = A->pattern->getNumColors();
50 const index_t *ptr_main = A->borrowMainDiagonalPointer();
51 register double A11,A12,A13,A21,A22,A23,A31,A32,A33,D;
52 register double S11,S12,S13,S21,S22,S23,S31,S32,S33;
53 register index_t i,iptr_main,iptr_ik,k,iptr_kj,j,iptr_ij,color,color2, iptr;
54 double time0=0,time_fac=0;
55 /* allocations: */
56 Solver_ILU* out=new Solver_ILU;
57 out->factors=new double[A->len];
58
59 if ( ! Esys_checkPtr(out->factors) ) {
60
61 time0=Esys_timer();
62
63 #pragma omp parallel for schedule(static) private(i,iptr,k)
64 for (i = 0; i < n; ++i) {
65 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; iptr++) {
66 for (k=0;k<n_block*n_block;++k) out->factors[n_block*n_block*iptr+k]=A->val[n_block*n_block*iptr+k];
67 }
68 }
69 /* start factorization */
70 for (color=0;color<num_colors && Esys_noError();++color) {
71 if (n_block==1) {
72 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,j,iptr_ij,A11,iptr_main,D)
73 for (i = 0; i < n; ++i) {
74 if (colorOf[i]==color) {
75 for (color2=0;color2<color;++color2) {
76 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
77 k=A->pattern->index[iptr_ik];
78 if (colorOf[k]==color2) {
79 A11=out->factors[iptr_ik];
80 /* a_ij=a_ij-a_ik*a_kj */
81 for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
82 j=A->pattern->index[iptr_kj];
83 if (colorOf[j]>color2) {
84 S11=out->factors[iptr_kj];
85 for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
86 if (j==A->pattern->index[iptr_ij]) {
87 out->factors[iptr_ij]-=A11*S11;
88 break;
89 }
90 }
91 }
92 }
93 }
94 }
95 }
96 iptr_main=ptr_main[i];
97 D=out->factors[iptr_main];
98 if (ABS(D)>0.) {
99 D=1./D;
100 out->factors[iptr_main]=D;
101 /* a_ik=a_ii^{-1}*a_ik */
102 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
103 k=A->pattern->index[iptr_ik];
104 if (colorOf[k]>color) {
105 A11=out->factors[iptr_ik];
106 out->factors[iptr_ik]=A11*D;
107 }
108 }
109 } else {
110 Esys_setError(ZERO_DIVISION_ERROR, "Solver_getILU: non-regular main diagonal block.");
111 }
112 }
113 }
114 } else if (n_block==2) {
115 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S12,S22,j,iptr_ij,A11,A21,A12,A22,iptr_main,D)
116 for (i = 0; i < n; ++i) {
117 if (colorOf[i]==color) {
118 for (color2=0;color2<color;++color2) {
119 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
120 k=A->pattern->index[iptr_ik];
121 if (colorOf[k]==color2) {
122 A11=out->factors[iptr_ik*4 ];
123 A21=out->factors[iptr_ik*4+1];
124 A12=out->factors[iptr_ik*4+2];
125 A22=out->factors[iptr_ik*4+3];
126 /* a_ij=a_ij-a_ik*a_kj */
127 for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
128 j=A->pattern->index[iptr_kj];
129 if (colorOf[j]>color2) {
130 S11=out->factors[iptr_kj*4];
131 S21=out->factors[iptr_kj*4+1];
132 S12=out->factors[iptr_kj*4+2];
133 S22=out->factors[iptr_kj*4+3];
134 for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
135 if (j==A->pattern->index[iptr_ij]) {
136 out->factors[4*iptr_ij ]-=A11*S11+A12*S21;
137 out->factors[4*iptr_ij+1]-=A21*S11+A22*S21;
138 out->factors[4*iptr_ij+2]-=A11*S12+A12*S22;
139 out->factors[4*iptr_ij+3]-=A21*S12+A22*S22;
140 break;
141 }
142 }
143 }
144 }
145 }
146 }
147 }
148 iptr_main=ptr_main[i];
149 A11=out->factors[iptr_main*4];
150 A21=out->factors[iptr_main*4+1];
151 A12=out->factors[iptr_main*4+2];
152 A22=out->factors[iptr_main*4+3];
153 D = A11*A22-A12*A21;
154 if (ABS(D)>0.) {
155 D=1./D;
156 S11= A22*D;
157 S21=-A21*D;
158 S12=-A12*D;
159 S22= A11*D;
160 out->factors[iptr_main*4] = S11;
161 out->factors[iptr_main*4+1]= S21;
162 out->factors[iptr_main*4+2]= S12;
163 out->factors[iptr_main*4+3]= S22;
164 /* a_ik=a_ii^{-1}*a_ik */
165 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
166 k=A->pattern->index[iptr_ik];
167 if (colorOf[k]>color) {
168 A11=out->factors[iptr_ik*4 ];
169 A21=out->factors[iptr_ik*4+1];
170 A12=out->factors[iptr_ik*4+2];
171 A22=out->factors[iptr_ik*4+3];
172 out->factors[4*iptr_ik ]=S11*A11+S12*A21;
173 out->factors[4*iptr_ik+1]=S21*A11+S22*A21;
174 out->factors[4*iptr_ik+2]=S11*A12+S12*A22;
175 out->factors[4*iptr_ik+3]=S21*A12+S22*A22;
176 }
177 }
178 } else {
179 Esys_setError(ZERO_DIVISION_ERROR, "Solver_getILU: non-regular main diagonal block.");
180 }
181 }
182 }
183 } else if (n_block==3) {
184 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S31,S12,S22,S32,S13,S23,S33,j,iptr_ij,A11,A21,A31,A12,A22,A32,A13,A23,A33,iptr_main,D)
185 for (i = 0; i < n; ++i) {
186 if (colorOf[i]==color) {
187 for (color2=0;color2<color;++color2) {
188 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
189 k=A->pattern->index[iptr_ik];
190 if (colorOf[k]==color2) {
191 A11=out->factors[iptr_ik*9 ];
192 A21=out->factors[iptr_ik*9+1];
193 A31=out->factors[iptr_ik*9+2];
194 A12=out->factors[iptr_ik*9+3];
195 A22=out->factors[iptr_ik*9+4];
196 A32=out->factors[iptr_ik*9+5];
197 A13=out->factors[iptr_ik*9+6];
198 A23=out->factors[iptr_ik*9+7];
199 A33=out->factors[iptr_ik*9+8];
200 /* a_ij=a_ij-a_ik*a_kj */
201 for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
202 j=A->pattern->index[iptr_kj];
203 if (colorOf[j]>color2) {
204 S11=out->factors[iptr_kj*9 ];
205 S21=out->factors[iptr_kj*9+1];
206 S31=out->factors[iptr_kj*9+2];
207 S12=out->factors[iptr_kj*9+3];
208 S22=out->factors[iptr_kj*9+4];
209 S32=out->factors[iptr_kj*9+5];
210 S13=out->factors[iptr_kj*9+6];
211 S23=out->factors[iptr_kj*9+7];
212 S33=out->factors[iptr_kj*9+8];
213 for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
214 if (j==A->pattern->index[iptr_ij]) {
215 out->factors[iptr_ij*9 ]-=A11*S11+A12*S21+A13*S31;
216 out->factors[iptr_ij*9+1]-=A21*S11+A22*S21+A23*S31;
217 out->factors[iptr_ij*9+2]-=A31*S11+A32*S21+A33*S31;
218 out->factors[iptr_ij*9+3]-=A11*S12+A12*S22+A13*S32;
219 out->factors[iptr_ij*9+4]-=A21*S12+A22*S22+A23*S32;
220 out->factors[iptr_ij*9+5]-=A31*S12+A32*S22+A33*S32;
221 out->factors[iptr_ij*9+6]-=A11*S13+A12*S23+A13*S33;
222 out->factors[iptr_ij*9+7]-=A21*S13+A22*S23+A23*S33;
223 out->factors[iptr_ij*9+8]-=A31*S13+A32*S23+A33*S33;
224 break;
225 }
226 }
227 }
228 }
229 }
230 }
231 }
232 iptr_main=ptr_main[i];
233 A11=out->factors[iptr_main*9 ];
234 A21=out->factors[iptr_main*9+1];
235 A31=out->factors[iptr_main*9+2];
236 A12=out->factors[iptr_main*9+3];
237 A22=out->factors[iptr_main*9+4];
238 A32=out->factors[iptr_main*9+5];
239 A13=out->factors[iptr_main*9+6];
240 A23=out->factors[iptr_main*9+7];
241 A33=out->factors[iptr_main*9+8];
242 D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
243 if (ABS(D)>0.) {
244 D=1./D;
245 S11=(A22*A33-A23*A32)*D;
246 S21=(A31*A23-A21*A33)*D;
247 S31=(A21*A32-A31*A22)*D;
248 S12=(A13*A32-A12*A33)*D;
249 S22=(A11*A33-A31*A13)*D;
250 S32=(A12*A31-A11*A32)*D;
251 S13=(A12*A23-A13*A22)*D;
252 S23=(A13*A21-A11*A23)*D;
253 S33=(A11*A22-A12*A21)*D;
254
255 out->factors[iptr_main*9 ]=S11;
256 out->factors[iptr_main*9+1]=S21;
257 out->factors[iptr_main*9+2]=S31;
258 out->factors[iptr_main*9+3]=S12;
259 out->factors[iptr_main*9+4]=S22;
260 out->factors[iptr_main*9+5]=S32;
261 out->factors[iptr_main*9+6]=S13;
262 out->factors[iptr_main*9+7]=S23;
263 out->factors[iptr_main*9+8]=S33;
264
265 /* a_ik=a_ii^{-1}*a_ik */
266 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
267 k=A->pattern->index[iptr_ik];
268 if (colorOf[k]>color) {
269 A11=out->factors[iptr_ik*9 ];
270 A21=out->factors[iptr_ik*9+1];
271 A31=out->factors[iptr_ik*9+2];
272 A12=out->factors[iptr_ik*9+3];
273 A22=out->factors[iptr_ik*9+4];
274 A32=out->factors[iptr_ik*9+5];
275 A13=out->factors[iptr_ik*9+6];
276 A23=out->factors[iptr_ik*9+7];
277 A33=out->factors[iptr_ik*9+8];
278 out->factors[iptr_ik*9 ]=S11*A11+S12*A21+S13*A31;
279 out->factors[iptr_ik*9+1]=S21*A11+S22*A21+S23*A31;
280 out->factors[iptr_ik*9+2]=S31*A11+S32*A21+S33*A31;
281 out->factors[iptr_ik*9+3]=S11*A12+S12*A22+S13*A32;
282 out->factors[iptr_ik*9+4]=S21*A12+S22*A22+S23*A32;
283 out->factors[iptr_ik*9+5]=S31*A12+S32*A22+S33*A32;
284 out->factors[iptr_ik*9+6]=S11*A13+S12*A23+S13*A33;
285 out->factors[iptr_ik*9+7]=S21*A13+S22*A23+S23*A33;
286 out->factors[iptr_ik*9+8]=S31*A13+S32*A23+S33*A33;
287 }
288 }
289 } else {
290 Esys_setError(ZERO_DIVISION_ERROR, "Solver_getILU: non-regular main diagonal block.");
291 }
292 }
293 }
294 } else {
295 Esys_setError(VALUE_ERROR, "Solver_getILU: block size greater than 3 is not supported.");
296 }
297 #pragma omp barrier
298 }
299 time_fac=Esys_timer()-time0;
300 }
301 if (Esys_noError()) {
302 if (verbose) printf("timing: ILU: coloring/elimination: %e sec\n",time_fac);
303 return out;
304 } else {
305 Solver_ILU_free(out);
306 return NULL;
307 }
308 }
309
310 /************************************************************************************/
311
312 /* Applies ILU precondition b-> x
313
314 In fact it solves LUx=b in the form x= U^{-1} L^{-1}b
315
316 Should be called within a parallel region.
317 Barrier synchronization should be performed to make sure that the input
318 vector is available.
319 */
320
321 void Solver_solveILU(SparseMatrix_ptr A, Solver_ILU* ilu, double* x,
322 const double* b)
323 {
324 register dim_t i,k;
325 register index_t color,iptr_ik,iptr_main;
326 register double S1,S2,S3,R1,R2,R3;
327 const dim_t n=A->numRows;
328 const dim_t n_block=A->row_block_size;
329 const index_t* colorOf = A->pattern->borrowColoringPointer();
330 const dim_t num_colors = A->pattern->getNumColors();
331 const index_t *ptr_main = A->borrowMainDiagonalPointer();
332
333 /* copy x into b */
334 #pragma omp parallel for private(i) schedule(static)
335 for (i=0;i<n*n_block;++i) x[i]=b[i];
336 /* forward substitution */
337 for (color=0;color<num_colors;++color) {
338 if (n_block==1) {
339 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
340 for (i = 0; i < n; ++i) {
341 if (colorOf[i]==color) {
342 /* x_i=x_i-a_ik*x_k */
343 S1=x[i];
344 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
345 k=A->pattern->index[iptr_ik];
346 if (colorOf[k]<color) {
347 R1=x[k];
348 S1-=ilu->factors[iptr_ik]*R1;
349 }
350 }
351 iptr_main=ptr_main[i];
352 x[i]=ilu->factors[iptr_main]*S1;
353 }
354 }
355 } else if (n_block==2) {
356 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
357 for (i = 0; i < n; ++i) {
358 if (colorOf[i]==color) {
359 /* x_i=x_i-a_ik*x_k */
360 S1=x[2*i];
361 S2=x[2*i+1];
362 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
363 k=A->pattern->index[iptr_ik];
364 if (colorOf[k]<color) {
365 R1=x[2*k];
366 R2=x[2*k+1];
367 S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
368 S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
369 }
370 }
371 iptr_main=ptr_main[i];
372 x[2*i ]=ilu->factors[4*iptr_main ]*S1+ilu->factors[4*iptr_main+2]*S2;
373 x[2*i+1]=ilu->factors[4*iptr_main+1]*S1+ilu->factors[4*iptr_main+3]*S2;
374 }
375
376 }
377 } else if (n_block==3) {
378 #pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
379 for (i = 0; i < n; ++i) {
380 if (colorOf[i]==color) {
381 /* x_i=x_i-a_ik*x_k */
382 S1=x[3*i];
383 S2=x[3*i+1];
384 S3=x[3*i+2];
385 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
386 k=A->pattern->index[iptr_ik];
387 if (colorOf[k]<color) {
388 R1=x[3*k];
389 R2=x[3*k+1];
390 R3=x[3*k+2];
391 S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
392 S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
393 S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
394 }
395 }
396 iptr_main=ptr_main[i];
397 x[3*i ]=ilu->factors[9*iptr_main ]*S1+ilu->factors[9*iptr_main+3]*S2+ilu->factors[9*iptr_main+6]*S3;
398 x[3*i+1]=ilu->factors[9*iptr_main+1]*S1+ilu->factors[9*iptr_main+4]*S2+ilu->factors[9*iptr_main+7]*S3;
399 x[3*i+2]=ilu->factors[9*iptr_main+2]*S1+ilu->factors[9*iptr_main+5]*S2+ilu->factors[9*iptr_main+8]*S3;
400 }
401 }
402 }
403 }
404 /* backward substitution */
405 for (color=(num_colors)-1;color>-1;--color) {
406 if (n_block==1) {
407 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1)
408 for (i = 0; i < n; ++i) {
409 if (colorOf[i]==color) {
410 /* x_i=x_i-a_ik*x_k */
411 S1=x[i];
412 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
413 k=A->pattern->index[iptr_ik];
414 if (colorOf[k]>color) {
415 R1=x[k];
416 S1-=ilu->factors[iptr_ik]*R1;
417 }
418 }
419 x[i]=S1;
420 }
421 }
422 } else if (n_block==2) {
423 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
424 for (i = 0; i < n; ++i) {
425 if (colorOf[i]==color) {
426 /* x_i=x_i-a_ik*x_k */
427 S1=x[2*i];
428 S2=x[2*i+1];
429 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
430 k=A->pattern->index[iptr_ik];
431 if (colorOf[k]>color) {
432 R1=x[2*k];
433 R2=x[2*k+1];
434 S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
435 S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
436 }
437 }
438 x[2*i]=S1;
439 x[2*i+1]=S2;
440 }
441 }
442 } else if (n_block==3) {
443 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
444 for (i = 0; i < n; ++i) {
445 if (colorOf[i]==color) {
446 /* x_i=x_i-a_ik*x_k */
447 S1=x[3*i ];
448 S2=x[3*i+1];
449 S3=x[3*i+2];
450 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
451 k=A->pattern->index[iptr_ik];
452 if (colorOf[k]>color) {
453 R1=x[3*k];
454 R2=x[3*k+1];
455 R3=x[3*k+2];
456 S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
457 S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
458 S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
459 }
460 }
461 x[3*i]=S1;
462 x[3*i+1]=S2;
463 x[3*i+2]=S3;
464 }
465 }
466 }
467 #pragma omp barrier
468 }
469 }
470
471 } // namespace paso
472

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision
svn:mergeinfo /branches/amg_from_3530/paso/src/ILU.cpp:3531-3826 /branches/lapack2681/paso/src/ILU.cpp:2682-2741 /branches/pasowrap/paso/src/ILU.cpp:3661-3674 /branches/py3_attempt2/paso/src/ILU.cpp:3871-3891 /branches/restext/paso/src/ILU.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/ILU.cpp:3669-3791 /branches/stage3.0/paso/src/ILU.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/ILU.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/ILU.cpp:3517-3974 /release/3.0/paso/src/ILU.cpp:2591-2601 /trunk/paso/src/ILU.cpp:4257-4344 /trunk/ripley/test/python/paso/src/ILU.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26