/[escript]/trunk/paso/src/Solver_RILU.c
ViewVC logotype

Contents of /trunk/paso/src/Solver_RILU.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 682 - (show annotations)
Mon Mar 27 02:43:09 2006 UTC (13 years, 10 months ago) by robwdcock
Original Path: trunk/paso/src/Solvers/Solver_RILU.c
File MIME type: text/plain
File size: 16122 byte(s)
+ NEW BUILD SYSTEM

This commit contains the new build system with cross-platform support.
Most things work are before though you can have more control.

ENVIRONMENT settings have changed:
+ You no longer require LD_LIBRARY_PATH or PYTHONPATH to point to the
esysroot for building and testing performed via scons
+ ACcESS altix users: It is recommended you change your modules to load
the latest intel compiler and other libraries required by boost to match
the setup in svn (you can override). The correct modules are as follows

module load intel_cc.9.0.026
export
MODULEPATH=${MODULEPATH}:/data/raid2/toolspp4/modulefiles/gcc-3.3.6
module load boost/1.33.0/python-2.4.1
module load python/2.4.1
module load numarray/1.3.3


1 /* $Id$ */
2
3
4 /*
5 ********************************************************************************
6 * Copyright 2006 by ACcESS MNRF *
7 * *
8 * http://www.access.edu.au *
9 * Primary Business: Queensland, Australia *
10 * Licensed under the Open Software License version 3.0 *
11 * http://www.opensource.org/licenses/osl-3.0.php *
12 ********************************************************************************
13 */
14
15 /**************************************************************/
16
17 /* Paso: RILU preconditioner with reordering */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia 2003,2004,2005 */
22 /* Author: gross@access.edu.au */
23
24 /**************************************************************/
25
26 #include "../Paso.h"
27 #include "Solver.h"
28 #include "../PasoUtil.h"
29
30 /**************************************************************/
31
32 /* free all memory used by RILU */
33
34 void Paso_Solver_RILU_free(Paso_Solver_RILU * in) {
35 if (in!=NULL) {
36 Paso_Solver_RILU_free(in->RILU_of_Schur);
37 MEMFREE(in->inv_A_FF);
38 MEMFREE(in->A_FF_pivot);
39 Paso_SystemMatrix_dealloc(in->A_FC);
40 Paso_SystemMatrix_dealloc(in->A_CF);
41 MEMFREE(in->rows_in_F);
42 MEMFREE(in->rows_in_C);
43 MEMFREE(in->mask_F);
44 MEMFREE(in->mask_C);
45 MEMFREE(in->x_F);
46 MEMFREE(in->b_F);
47 MEMFREE(in->x_C);
48 MEMFREE(in->b_C);
49 MEMFREE(in);
50 }
51 }
52
53 /**************************************************************/
54
55 /* constructs the block-block factorization of
56
57 [ A_FF A_FC ]
58 A_p=
59 [ A_CF A_FF ]
60
61 to
62
63 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
64 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
65
66
67 where S=A_FF-ACF*invA_FF*A_FC within the shape of S
68
69 then RILU is applied to S again until S becomes empty
70
71 */
72 Paso_Solver_RILU* Paso_Solver_getRILU(Paso_SystemMatrix * A_p,bool_t verbose) {
73 Paso_Solver_RILU* out=NULL;
74 dim_t n=A_p->num_rows;
75 dim_t n_block=A_p->row_block_size;
76 index_t* mis_marker=NULL;
77 index_t* counter=NULL;
78 index_t iPtr,*index, *where_p;
79 dim_t i,k;
80 Paso_SystemMatrix * schur=NULL;
81 double A11,A12,A13,A21,A22,A23,A31,A32,A33,D,time0,time1,time2;
82
83
84 /* identify independend set of rows/columns */
85 mis_marker=TMPMEMALLOC(n,index_t);
86 counter=TMPMEMALLOC(n,index_t);
87 out=MEMALLOC(1,Paso_Solver_RILU);
88 out->RILU_of_Schur=NULL;
89 out->inv_A_FF=NULL;
90 out->A_FF_pivot=NULL;
91 out->A_FC=NULL;
92 out->A_CF=NULL;
93 out->rows_in_F=NULL;
94 out->rows_in_C=NULL;
95 out->mask_F=NULL;
96 out->mask_C=NULL;
97 out->x_F=NULL;
98 out->b_F=NULL;
99 out->x_C=NULL;
100 out->b_C=NULL;
101
102 if ( !(Paso_checkPtr(mis_marker) || Paso_checkPtr(out) || Paso_checkPtr(counter) ) ) {
103 /* identify independend set of rows/columns */
104 time0=Paso_timer();
105 #pragma omp parallel for private(i) schedule(static)
106 for (i=0;i<n;++i) mis_marker[i]=-1;
107 Paso_SystemMatrixPattern_mis(A_p->pattern,mis_marker);
108 time2=Paso_timer()-time0;
109 if (Paso_noError()) {
110 #pragma omp parallel for private(i) schedule(static)
111 for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
112 out->n=n;
113 out->n_block=n_block;
114 out->n_F=Paso_Util_cumsum(n,counter);
115 out->mask_F=MEMALLOC(n,index_t);
116 out->rows_in_F=MEMALLOC(out->n_F,index_t);
117 out->inv_A_FF=MEMALLOC(n_block*n_block*out->n_F,double);
118 out->A_FF_pivot=NULL; /* later use for block size>3 */
119 if (! (Paso_checkPtr(out->mask_F) || Paso_checkPtr(out->inv_A_FF) || Paso_checkPtr(out->rows_in_F) ) ) {
120 #pragma omp parallel
121 {
122 /* creates an index for F from mask */
123 #pragma omp for private(i) schedule(static)
124 for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
125 #pragma omp for private(i) schedule(static)
126 for (i = 0; i < n; ++i) {
127 if (mis_marker[i]) {
128 out->rows_in_F[counter[i]]=i;
129 out->mask_F[i]=counter[i];
130 } else {
131 out->mask_F[i]=-1;
132 }
133 }
134 #pragma omp for private(i, where_p,iPtr,A11,A12,A13,A21,A22,A23,A31,A32,A33,D,index) schedule(static)
135 for (i = 0; i < out->n_F; i++) {
136 /* find main diagonal */
137 iPtr=A_p->pattern->ptr[out->rows_in_F[i]];
138 index=&(A_p->pattern->index[iPtr]);
139 where_p=(index_t*)bsearch(&out->rows_in_F[i],
140 index,
141 A_p->pattern->ptr[out->rows_in_F[i] + 1]-A_p->pattern->ptr[out->rows_in_F[i]],
142 sizeof(index_t),
143 Paso_comparIndex);
144 if (where_p==NULL) {
145 Paso_setError(VALUE_ERROR, "Paso_Solver_getRILU: main diagonal element missing.");
146 } else {
147 iPtr+=(index_t)(where_p-index);
148 /* get inverse of A_FF block: */
149 if (n_block==1) {
150 if (ABS(A_p->val[iPtr])>0.) {
151 out->inv_A_FF[i]=1./A_p->val[iPtr];
152 } else {
153 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getRILU: Break-down in RILU decomposition: non-regular main diagonal block.");
154 }
155 } else if (n_block==2) {
156 A11=A_p->val[iPtr*4];
157 A21=A_p->val[iPtr*4+1];
158 A12=A_p->val[iPtr*4+2];
159 A22=A_p->val[iPtr*4+3];
160 D = A11*A22-A12*A21;
161 if (ABS(D) > 0 ){
162 D=1./D;
163 out->inv_A_FF[i*4]= A22*D;
164 out->inv_A_FF[i*4+1]=-A21*D;
165 out->inv_A_FF[i*4+2]=-A12*D;
166 out->inv_A_FF[i*4+3]= A11*D;
167 } else {
168 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getRILU:Break-down in RILU decomposition: non-regular main diagonal block.");
169 }
170 } else if (n_block==3) {
171 A11=A_p->val[iPtr*9 ];
172 A21=A_p->val[iPtr*9+1];
173 A31=A_p->val[iPtr*9+2];
174 A12=A_p->val[iPtr*9+3];
175 A22=A_p->val[iPtr*9+4];
176 A32=A_p->val[iPtr*9+5];
177 A13=A_p->val[iPtr*9+6];
178 A23=A_p->val[iPtr*9+7];
179 A33=A_p->val[iPtr*9+8];
180 D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
181 if (ABS(D) > 0 ){
182 D=1./D;
183 out->inv_A_FF[i*9 ]=(A22*A33-A23*A32)*D;
184 out->inv_A_FF[i*9+1]=(A31*A23-A21*A33)*D;
185 out->inv_A_FF[i*9+2]=(A21*A32-A31*A22)*D;
186 out->inv_A_FF[i*9+3]=(A13*A32-A12*A33)*D;
187 out->inv_A_FF[i*9+4]=(A11*A33-A31*A13)*D;
188 out->inv_A_FF[i*9+5]=(A12*A31-A11*A32)*D;
189 out->inv_A_FF[i*9+6]=(A12*A23-A13*A22)*D;
190 out->inv_A_FF[i*9+7]=(A13*A21-A11*A23)*D;
191 out->inv_A_FF[i*9+8]=(A11*A22-A12*A21)*D;
192 } else {
193 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getRILU:Break-down in RILU decomposition: non-regular main diagonal block.");
194 }
195 }
196 }
197 }
198 } /* end parallel region */
199
200 if( Paso_noError()) {
201 /* if there are no nodes in the coarse level there is no more work to do */
202 out->n_C=n-out->n_F;
203 if (out->n_C>0) {
204 out->rows_in_C=MEMALLOC(out->n_C,index_t);
205 out->mask_C=MEMALLOC(n,index_t);
206 if (! (Paso_checkPtr(out->mask_C) || Paso_checkPtr(out->rows_in_C) ) ) {
207 /* creates an index for C from mask */
208 #pragma omp parallel for private(i) schedule(static)
209 for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
210 Paso_Util_cumsum(n,counter);
211 #pragma omp parallel
212 {
213 #pragma omp for private(i) schedule(static)
214 for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
215 #pragma omp for private(i) schedule(static)
216 for (i = 0; i < n; ++i) {
217 if (! mis_marker[i]) {
218 out->rows_in_C[counter[i]]=i;
219 out->mask_C[i]=counter[i];
220 } else {
221 out->mask_C[i]=-1;
222 }
223 }
224 } /* end parallel region */
225 /* get A_CF block: */
226 out->A_CF=Paso_SystemMatrix_getSubmatrix(A_p,out->n_C,out->rows_in_C,out->mask_F);
227 if (Paso_noError()) {
228 /* get A_FC block: */
229 out->A_FC=Paso_SystemMatrix_getSubmatrix(A_p,out->n_F,out->rows_in_F,out->mask_C);
230 /* get A_FF block: */
231 if (Paso_noError()) {
232 schur=Paso_SystemMatrix_getSubmatrix(A_p,out->n_C,out->rows_in_C,out->mask_C);
233 time0=Paso_timer()-time0;
234 if (Paso_noError()) {
235 time1=Paso_timer();
236 /* update A_CC block to get Schur complement and then apply RILU to it */
237 Paso_Solver_updateIncompleteSchurComplement(schur,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC);
238 time1=Paso_timer()-time1;
239 out->RILU_of_Schur=Paso_Solver_getRILU(schur,verbose);
240 Paso_SystemMatrix_dealloc(schur);
241 }
242 /* allocate work arrays for RILU application */
243 if (Paso_noError()) {
244 out->x_F=MEMALLOC(n_block*out->n_F,double);
245 out->b_F=MEMALLOC(n_block*out->n_F,double);
246 out->x_C=MEMALLOC(n_block*out->n_C,double);
247 out->b_C=MEMALLOC(n_block*out->n_C,double);
248 if (! (Paso_checkPtr(out->x_F) || Paso_checkPtr(out->b_F) || Paso_checkPtr(out->x_C) || Paso_checkPtr(out->b_C) ) ) {
249 #pragma omp parallel
250 {
251 #pragma omp for private(i,k) schedule(static)
252 for (i = 0; i < out->n_F; ++i) {
253 for (k=0; k<n_block;++k) {
254 out->x_F[i*n_block+k]=0.;
255 out->b_F[i*n_block+k]=0.;
256 }
257 }
258 #pragma omp for private(i,k) schedule(static)
259 for (i = 0; i < out->n_C; ++i) {
260 for (k=0; k<n_block;++k) {
261 out->x_C[i*n_block+k]=0.;
262 out->b_C[i*n_block+k]=0.;
263 }
264 }
265 } /* end parallel region */
266 }
267 }
268 }
269 }
270 }
271 }
272 }
273 }
274 }
275 }
276 TMPMEMFREE(mis_marker);
277 TMPMEMFREE(counter);
278 if (Paso_noError()) {
279 if (verbose) {
280 printf("RILU: %d unknowns eliminated. %d left.\n",out->n_F,n-out->n_F);
281 if (out->n_C>0) {
282 printf("timing: RILU: MIS/reordering/elemination : %e/%e/%e\n",time2,time0,time1);
283 } else {
284 printf("timing: RILU: MIS: %e\n",time2);
285 }
286 }
287 return out;
288 } else {
289 Paso_Solver_RILU_free(out);
290 return NULL;
291 }
292 }
293
294 /**************************************************************/
295
296 /* apply RILU precondition b-> x
297
298 in fact it solves
299
300 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ] [ x_F ] = [b_F]
301 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
302
303 in the form
304
305 b->[b_F,b_C]
306 x_F=invA_FF*b_F
307 b_C=b_C-A_CF*x_F
308 x_C=RILU(b_C)
309 b_F=b_F-A_FC*x_C
310 x_F=invA_FF*b_F
311 x<-[x_F,x_C]
312
313 should be called within a parallel region
314 barrier synconization should be performed to make sure that the input vector available
315
316 */
317
318 void Paso_Solver_solveRILU(Paso_Solver_RILU * rilu, double * x, double * b) {
319 dim_t i,k;
320 dim_t n_block=rilu->n_block;
321
322 if (rilu->n_C==0) {
323 /* x=invA_FF*b */
324 Paso_Solver_applyBlockDiagonalMatrix(n_block,rilu->n_F,rilu->inv_A_FF,rilu->A_FF_pivot,x,b);
325 } else {
326 /* b->[b_F,b_C] */
327 if (n_block==1) {
328 #pragma omp for private(i) schedule(static)
329 for (i=0;i<rilu->n_F;++i) rilu->b_F[i]=b[rilu->rows_in_F[i]];
330 #pragma omp for private(i) schedule(static)
331 for (i=0;i<rilu->n_C;++i) rilu->b_C[i]=b[rilu->rows_in_C[i]];
332 } else {
333 #pragma omp for private(i,k) schedule(static)
334 for (i=0;i<rilu->n_F;++i)
335 for (k=0;k<n_block;k++) rilu->b_F[rilu->n_block*i+k]=b[n_block*rilu->rows_in_F[i]+k];
336 #pragma omp for private(i,k) schedule(static)
337 for (i=0;i<rilu->n_C;++i)
338 for (k=0;k<n_block;k++) rilu->b_C[rilu->n_block*i+k]=b[n_block*rilu->rows_in_C[i]+k];
339 }
340 /* x_F=invA_FF*b_F */
341 Paso_Solver_applyBlockDiagonalMatrix(n_block,rilu->n_F,rilu->inv_A_FF,rilu->A_FF_pivot,rilu->x_F,rilu->b_F);
342 /* b_C=b_C-A_CF*x_F */
343 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,rilu->A_CF,rilu->x_F,1.,rilu->b_C);
344 /* x_C=RILU(b_C) */
345 Paso_Solver_solveRILU(rilu->RILU_of_Schur,rilu->x_C,rilu->b_C);
346 /* b_F=b_F-A_FC*x_C */
347 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,rilu->A_FC,rilu->x_C,1.,rilu->b_F);
348 /* x_F=invA_FF*b_F */
349 Paso_Solver_applyBlockDiagonalMatrix(n_block,rilu->n_F,rilu->inv_A_FF,rilu->A_FF_pivot,rilu->x_F,rilu->b_F);
350 /* x<-[x_F,x_C] */
351 if (n_block==1) {
352 #pragma omp for private(i) schedule(static)
353 for (i=0;i<rilu->n;++i) {
354 if (rilu->mask_C[i]>-1) {
355 x[i]=rilu->x_C[rilu->mask_C[i]];
356 } else {
357 x[i]=rilu->x_F[rilu->mask_F[i]];
358 }
359 }
360 } else {
361 #pragma omp for private(i,k) schedule(static)
362 for (i=0;i<rilu->n;++i) {
363 if (rilu->mask_C[i]>-1) {
364 for (k=0;k<n_block;k++) x[n_block*i+k]=rilu->x_C[n_block*rilu->mask_C[i]+k];
365 } else {
366 for (k=0;k<n_block;k++) x[n_block*i+k]=rilu->x_F[n_block*rilu->mask_F[i]+k];
367 }
368 }
369 }
370 /* all done */
371 }
372 return;
373 }
374
375 /*
376 * $Log$
377 *
378 */

  ViewVC Help
Powered by ViewVC 1.1.26