/[escript]/trunk/paso/src/RILU.cpp
ViewVC logotype

Contents of /trunk/paso/src/RILU.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4873 - (show annotations)
Wed Apr 16 06:38:51 2014 UTC (5 years, 5 months ago) by caltinay
File size: 14342 byte(s)
whitespace only changes.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /****************************************************************************/
19
20 /* Paso: RILU preconditioner with reordering */
21
22 /****************************************************************************/
23
24 /* Author: Lutz Gross, l.gross@uq.edu.au */
25
26 /****************************************************************************/
27
28 #include "Paso.h"
29 #include "Preconditioner.h"
30 #include "PasoUtil.h"
31 #include "BlockOps.h"
32
33 namespace paso {
34
35 void Solver_RILU_free(Solver_RILU* in)
36 {
37 if (in!=NULL) {
38 Solver_RILU_free(in->RILU_of_Schur);
39 delete[] in->inv_A_FF;
40 delete[] in->A_FF_pivot;
41 delete[] in->rows_in_F;
42 delete[] in->rows_in_C;
43 delete[] in->mask_F;
44 delete[] in->mask_C;
45 delete[] in->x_F;
46 delete[] in->b_F;
47 delete[] in->x_C;
48 delete[] in->b_C;
49 delete in;
50 }
51 }
52
53 /****************************************************************************/
54
55 /* constructs the block-block factorization of
56
57 [ A_FF A_FC ]
58 A_p=
59 [ A_CF A_FF ]
60
61 to
62
63 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
64 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
65
66
67 where S=A_FF-ACF*invA_FF*A_FC within the shape of S
68
69 then RILU is applied to S again until S becomes empty
70 */
71 Solver_RILU* Solver_getRILU(SparseMatrix_ptr A_p, bool verbose)
72 {
73 Solver_RILU* out=NULL;
74 dim_t n=A_p->numRows;
75 dim_t n_block=A_p->row_block_size;
76 index_t* mis_marker=NULL;
77 index_t* counter=NULL;
78 index_t iPtr,*index, *where_p;
79 dim_t i,k;
80 SparseMatrix_ptr schur;
81 double A11,A12,A13,A21,A22,A23,A31,A32,A33,D,time0=0,time1=0;/*,time2=0;*/
82
83 mis_marker=new index_t[n];
84 counter=new index_t[n];
85 out=new Solver_RILU;
86 out->RILU_of_Schur=NULL;
87 out->inv_A_FF=NULL;
88 out->A_FF_pivot=NULL;
89 out->rows_in_F=NULL;
90 out->rows_in_C=NULL;
91 out->mask_F=NULL;
92 out->mask_C=NULL;
93 out->x_F=NULL;
94 out->b_F=NULL;
95 out->x_C=NULL;
96 out->b_C=NULL;
97
98 /* identify independent set of rows/columns */
99 time0=Esys_timer();
100 #pragma omp parallel for private(i) schedule(static)
101 for (i=0;i<n;++i) mis_marker[i]=-1;
102 A_p->pattern->mis(mis_marker);
103 /*time2=Esys_timer()-time0;*/
104 if (Esys_noError()) {
105 #pragma omp parallel for private(i) schedule(static)
106 for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
107 out->n=n;
108 out->n_block=n_block;
109 out->n_F=util::cumsum(n,counter);
110 out->mask_F=new index_t[n];
111 out->rows_in_F=new index_t[out->n_F];
112 out->inv_A_FF=new double[n_block*n_block*out->n_F];
113 out->A_FF_pivot=NULL; /* later use for block size>3 */
114 #pragma omp parallel
115 {
116 /* creates an index for F from mask */
117 #pragma omp for private(i) schedule(static)
118 for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
119 #pragma omp for private(i) schedule(static)
120 for (i = 0; i < n; ++i) {
121 if (mis_marker[i]) {
122 out->rows_in_F[counter[i]]=i;
123 out->mask_F[i]=counter[i];
124 } else {
125 out->mask_F[i]=-1;
126 }
127 }
128 #pragma omp for private(i, where_p,iPtr,A11,A12,A13,A21,A22,A23,A31,A32,A33,D,index) schedule(static)
129 for (i = 0; i < out->n_F; i++) {
130 /* find main diagonal */
131 iPtr=A_p->pattern->ptr[out->rows_in_F[i]];
132 index=&(A_p->pattern->index[iPtr]);
133 where_p=(index_t*)bsearch(&out->rows_in_F[i],
134 index,
135 A_p->pattern->ptr[out->rows_in_F[i] + 1]-A_p->pattern->ptr[out->rows_in_F[i]],
136 sizeof(index_t),
137 util::comparIndex);
138 if (where_p==NULL) {
139 Esys_setError(VALUE_ERROR, "Solver_getRILU: main diagonal element missing.");
140 } else {
141 iPtr+=(index_t)(where_p-index);
142 /* get inverse of A_FF block: */
143 if (n_block==1) {
144 if (ABS(A_p->val[iPtr])>0.) {
145 out->inv_A_FF[i]=1./A_p->val[iPtr];
146 } else {
147 Esys_setError(ZERO_DIVISION_ERROR, "Solver_getRILU: Break-down in RILU decomposition: non-regular main diagonal block.");
148 }
149 } else if (n_block==2) {
150 A11=A_p->val[iPtr*4];
151 A21=A_p->val[iPtr*4+1];
152 A12=A_p->val[iPtr*4+2];
153 A22=A_p->val[iPtr*4+3];
154 D = A11*A22-A12*A21;
155 if (ABS(D) > 0 ){
156 D=1./D;
157 out->inv_A_FF[i*4]= A22*D;
158 out->inv_A_FF[i*4+1]=-A21*D;
159 out->inv_A_FF[i*4+2]=-A12*D;
160 out->inv_A_FF[i*4+3]= A11*D;
161 } else {
162 Esys_setError(ZERO_DIVISION_ERROR, "Solver_getRILU: Break-down in RILU decomposition: non-regular main diagonal block.");
163 }
164 } else if (n_block==3) {
165 A11=A_p->val[iPtr*9 ];
166 A21=A_p->val[iPtr*9+1];
167 A31=A_p->val[iPtr*9+2];
168 A12=A_p->val[iPtr*9+3];
169 A22=A_p->val[iPtr*9+4];
170 A32=A_p->val[iPtr*9+5];
171 A13=A_p->val[iPtr*9+6];
172 A23=A_p->val[iPtr*9+7];
173 A33=A_p->val[iPtr*9+8];
174 D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
175 if (ABS(D) > 0 ){
176 D=1./D;
177 out->inv_A_FF[i*9 ]=(A22*A33-A23*A32)*D;
178 out->inv_A_FF[i*9+1]=(A31*A23-A21*A33)*D;
179 out->inv_A_FF[i*9+2]=(A21*A32-A31*A22)*D;
180 out->inv_A_FF[i*9+3]=(A13*A32-A12*A33)*D;
181 out->inv_A_FF[i*9+4]=(A11*A33-A31*A13)*D;
182 out->inv_A_FF[i*9+5]=(A12*A31-A11*A32)*D;
183 out->inv_A_FF[i*9+6]=(A12*A23-A13*A22)*D;
184 out->inv_A_FF[i*9+7]=(A13*A21-A11*A23)*D;
185 out->inv_A_FF[i*9+8]=(A11*A22-A12*A21)*D;
186 } else {
187 Esys_setError(ZERO_DIVISION_ERROR, "Solver_getRILU: Break-down in RILU decomposition: non-regular main diagonal block.");
188 }
189 }
190 }
191 }
192 } /* end parallel region */
193
194 if (Esys_noError()) {
195 // if there are no nodes in the coarse level there is no more
196 // work to do
197 out->n_C=n-out->n_F;
198 if (out->n_C > 0) {
199 out->rows_in_C = new index_t[out->n_C];
200 out->mask_C = new index_t[n];
201 /* creates an index for C from mask */
202 #pragma omp parallel for private(i) schedule(static)
203 for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
204 util::cumsum(n,counter);
205 #pragma omp parallel
206 {
207 #pragma omp for private(i) schedule(static)
208 for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
209 #pragma omp for private(i) schedule(static)
210 for (i = 0; i < n; ++i) {
211 if (! mis_marker[i]) {
212 out->rows_in_C[counter[i]]=i;
213 out->mask_C[i]=counter[i];
214 } else {
215 out->mask_C[i]=-1;
216 }
217 }
218 } /* end parallel region */
219 /* get A_CF block: */
220 out->A_CF=A_p->getSubmatrix(out->n_C, out->n_F, out->rows_in_C, out->mask_F);
221 if (Esys_noError()) {
222 /* get A_FC block: */
223 out->A_FC=A_p->getSubmatrix(out->n_F, out->n_C, out->rows_in_F, out->mask_C);
224 }
225 /* get A_FF block: */
226 if (Esys_noError()) {
227 schur = A_p->getSubmatrix(out->n_C, out->n_C, out->rows_in_C, out->mask_C);
228 }
229 time0=Esys_timer()-time0;
230 if (Esys_noError()) {
231 time1=Esys_timer();
232 /* update A_CC block to get Schur complement and then apply RILU to it */
233 Solver_updateIncompleteSchurComplement(schur, out->A_CF, out->inv_A_FF, out->A_FF_pivot, out->A_FC);
234 time1=Esys_timer()-time1;
235 out->RILU_of_Schur = Solver_getRILU(schur, verbose);
236 schur.reset();
237 }
238 /* allocate work arrays for RILU application */
239 if (Esys_noError()) {
240 out->x_F=new double[n_block*out->n_F];
241 out->b_F=new double[n_block*out->n_F];
242 out->x_C=new double[n_block*out->n_C];
243 out->b_C=new double[n_block*out->n_C];
244 #pragma omp parallel
245 {
246 #pragma omp for private(i,k) schedule(static)
247 for (i = 0; i < out->n_F; ++i) {
248 for (k=0; k<n_block;++k) {
249 out->x_F[i*n_block+k]=0.;
250 out->b_F[i*n_block+k]=0.;
251 }
252 }
253 #pragma omp for private(i,k) schedule(static)
254 for (i = 0; i < out->n_C; ++i) {
255 for (k=0; k<n_block;++k) {
256 out->x_C[i*n_block+k]=0.;
257 out->b_C[i*n_block+k]=0.;
258 }
259 }
260 } // end parallel region
261 }
262 }
263 }
264 }
265 delete[] mis_marker;
266 delete[] counter;
267 if (Esys_noError()) {
268 //if (verbose) {
269 // printf("RILU: %d unknowns eliminated. %d left.\n",out->n_F,n-out->n_F);
270 // if (out->n_C>0) {
271 // printf("timing: RILU: MIS/reordering/elimination : %e/%e/%e\n",time2,time0,time1);
272 // } else {
273 // printf("timing: RILU: MIS: %e\n",time2);
274 // }
275 //}
276 return out;
277 } else {
278 Solver_RILU_free(out);
279 return NULL;
280 }
281 }
282
283 /****************************************************************************/
284
285 /* apply RILU precondition b-> x
286
287 in fact it solves
288
289 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ] [ x_F ] = [b_F]
290 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
291
292 in the form
293
294 b->[b_F,b_C]
295 x_F=invA_FF*b_F
296 b_C=b_C-A_CF*x_F
297 x_C=RILU(b_C)
298 b_F=b_F-A_FC*x_C
299 x_F=invA_FF*b_F
300 x<-[x_F,x_C]
301
302 Should be called within a parallel region.
303 Barrier synchronization should be performed to make sure that the input
304 vector is available.
305 */
306 void Solver_solveRILU(Solver_RILU* rilu, double* x, double* b)
307 {
308 dim_t i,k;
309 dim_t n_block=rilu->n_block;
310
311 if (rilu->n_C==0) {
312 /* x=invA_FF*b */
313 util::copy(n_block*rilu->n_F, x, b);
314 BlockOps_solveAll(n_block,rilu->n_F,rilu->inv_A_FF,rilu->A_FF_pivot,x);
315 } else {
316 /* b->[b_F,b_C] */
317 if (n_block==1) {
318 #pragma omp parallel for private(i) schedule(static)
319 for (i=0;i<rilu->n_F;++i) rilu->b_F[i]=b[rilu->rows_in_F[i]];
320 #pragma omp parallel for private(i) schedule(static)
321 for (i=0;i<rilu->n_C;++i) rilu->b_C[i]=b[rilu->rows_in_C[i]];
322 } else {
323 #pragma omp parallel for private(i,k) schedule(static)
324 for (i=0; i<rilu->n_F; i++)
325 for (k=0; k<n_block; k++)
326 rilu->b_F[rilu->n_block*i+k]=b[n_block*rilu->rows_in_F[i]+k];
327 #pragma omp parallel for private(i,k) schedule(static)
328 for (i=0; i<rilu->n_C; i++)
329 for (k=0; k<n_block; k++)
330 rilu->b_C[rilu->n_block*i+k]=b[n_block*rilu->rows_in_C[i]+k];
331 }
332 /* x_F=invA_FF*b_F */
333 util::copy(n_block*rilu->n_F, rilu->x_F,rilu->b_F);
334 BlockOps_solveAll(n_block,rilu->n_F,rilu->inv_A_FF,rilu->A_FF_pivot,rilu->x_F);
335 /* b_C=b_C-A_CF*x_F */
336 SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,rilu->A_CF,rilu->x_F,1.,rilu->b_C);
337 /* x_C=RILU(b_C) */
338 Solver_solveRILU(rilu->RILU_of_Schur,rilu->x_C,rilu->b_C);
339 /* b_F=b_F-A_FC*x_C */
340 SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,rilu->A_FC,rilu->x_C,1.,rilu->b_F);
341 /* x_F=invA_FF*b_F */
342 util::copy(n_block*rilu->n_F, rilu->x_F,rilu->b_F);
343 BlockOps_solveAll(n_block,rilu->n_F,rilu->inv_A_FF,rilu->A_FF_pivot,rilu->x_F);
344 /* x<-[x_F,x_C] */
345 if (n_block==1) {
346 #pragma omp parallel for private(i) schedule(static)
347 for (i=0; i<rilu->n; i++) {
348 if (rilu->mask_C[i] > -1) {
349 x[i]=rilu->x_C[rilu->mask_C[i]];
350 } else {
351 x[i]=rilu->x_F[rilu->mask_F[i]];
352 }
353 }
354 } else {
355 #pragma omp parallel for private(i,k) schedule(static)
356 for (i=0; i<rilu->n; i++) {
357 if (rilu->mask_C[i] > -1) {
358 for (k=0; k<n_block; k++)
359 x[n_block*i+k]=rilu->x_C[n_block*rilu->mask_C[i]+k];
360 } else {
361 for (k=0; k<n_block; k++)
362 x[n_block*i+k]=rilu->x_F[n_block*rilu->mask_F[i]+k];
363 }
364 }
365 }
366 }
367 }
368
369 } // namespace paso
370

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/RILU.cpp:3531-3826 /branches/lapack2681/paso/src/RILU.cpp:2682-2741 /branches/pasowrap/paso/src/RILU.cpp:3661-3674 /branches/py3_attempt2/paso/src/RILU.cpp:3871-3891 /branches/restext/paso/src/RILU.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/RILU.cpp:3669-3791 /branches/stage3.0/paso/src/RILU.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/RILU.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/RILU.cpp:3517-3974 /release/3.0/paso/src/RILU.cpp:2591-2601 /trunk/paso/src/RILU.cpp:4257-4344 /trunk/ripley/test/python/paso/src/RILU.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26