/[escript]/trunk/paso/src/Solver_RILU.c
ViewVC logotype

Annotation of /trunk/paso/src/Solver_RILU.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 682 - (hide annotations)
Mon Mar 27 02:43:09 2006 UTC (13 years, 10 months ago) by robwdcock
Original Path: trunk/paso/src/Solvers/Solver_RILU.c
File MIME type: text/plain
File size: 16122 byte(s)
+ NEW BUILD SYSTEM

This commit contains the new build system with cross-platform support.
Most things work are before though you can have more control.

ENVIRONMENT settings have changed:
+ You no longer require LD_LIBRARY_PATH or PYTHONPATH to point to the
esysroot for building and testing performed via scons
+ ACcESS altix users: It is recommended you change your modules to load
the latest intel compiler and other libraries required by boost to match
the setup in svn (you can override). The correct modules are as follows

module load intel_cc.9.0.026
export
MODULEPATH=${MODULEPATH}:/data/raid2/toolspp4/modulefiles/gcc-3.3.6
module load boost/1.33.0/python-2.4.1
module load python/2.4.1
module load numarray/1.3.3


1 gross 430 /* $Id$ */
2    
3 dhawcroft 631
4     /*
5     ********************************************************************************
6 dhawcroft 633 * Copyright 2006 by ACcESS MNRF *
7 dhawcroft 631 * *
8     * http://www.access.edu.au *
9     * Primary Business: Queensland, Australia *
10     * Licensed under the Open Software License version 3.0 *
11     * http://www.opensource.org/licenses/osl-3.0.php *
12     ********************************************************************************
13     */
14    
15 gross 430 /**************************************************************/
16    
17     /* Paso: RILU preconditioner with reordering */
18    
19     /**************************************************************/
20    
21     /* Copyrights by ACcESS Australia 2003,2004,2005 */
22     /* Author: gross@access.edu.au */
23    
24     /**************************************************************/
25    
26 robwdcock 682 #include "../Paso.h"
27 gross 430 #include "Solver.h"
28 robwdcock 682 #include "../PasoUtil.h"
29 gross 430
30     /**************************************************************/
31    
32     /* free all memory used by RILU */
33    
34     void Paso_Solver_RILU_free(Paso_Solver_RILU * in) {
35     if (in!=NULL) {
36     Paso_Solver_RILU_free(in->RILU_of_Schur);
37     MEMFREE(in->inv_A_FF);
38     MEMFREE(in->A_FF_pivot);
39     Paso_SystemMatrix_dealloc(in->A_FC);
40     Paso_SystemMatrix_dealloc(in->A_CF);
41     MEMFREE(in->rows_in_F);
42     MEMFREE(in->rows_in_C);
43     MEMFREE(in->mask_F);
44     MEMFREE(in->mask_C);
45     MEMFREE(in->x_F);
46     MEMFREE(in->b_F);
47     MEMFREE(in->x_C);
48     MEMFREE(in->b_C);
49     MEMFREE(in);
50     }
51     }
52    
53     /**************************************************************/
54    
55     /* constructs the block-block factorization of
56    
57     [ A_FF A_FC ]
58     A_p=
59     [ A_CF A_FF ]
60    
61     to
62    
63     [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
64     [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
65    
66    
67     where S=A_FF-ACF*invA_FF*A_FC within the shape of S
68    
69     then RILU is applied to S again until S becomes empty
70    
71     */
72     Paso_Solver_RILU* Paso_Solver_getRILU(Paso_SystemMatrix * A_p,bool_t verbose) {
73     Paso_Solver_RILU* out=NULL;
74     dim_t n=A_p->num_rows;
75     dim_t n_block=A_p->row_block_size;
76     index_t* mis_marker=NULL;
77     index_t* counter=NULL;
78     index_t iPtr,*index, *where_p;
79     dim_t i,k;
80     Paso_SystemMatrix * schur=NULL;
81     double A11,A12,A13,A21,A22,A23,A31,A32,A33,D,time0,time1,time2;
82    
83    
84     /* identify independend set of rows/columns */
85     mis_marker=TMPMEMALLOC(n,index_t);
86     counter=TMPMEMALLOC(n,index_t);
87     out=MEMALLOC(1,Paso_Solver_RILU);
88     out->RILU_of_Schur=NULL;
89     out->inv_A_FF=NULL;
90     out->A_FF_pivot=NULL;
91     out->A_FC=NULL;
92     out->A_CF=NULL;
93     out->rows_in_F=NULL;
94     out->rows_in_C=NULL;
95     out->mask_F=NULL;
96     out->mask_C=NULL;
97     out->x_F=NULL;
98     out->b_F=NULL;
99     out->x_C=NULL;
100     out->b_C=NULL;
101    
102     if ( !(Paso_checkPtr(mis_marker) || Paso_checkPtr(out) || Paso_checkPtr(counter) ) ) {
103     /* identify independend set of rows/columns */
104     time0=Paso_timer();
105 gross 431 #pragma omp parallel for private(i) schedule(static)
106     for (i=0;i<n;++i) mis_marker[i]=-1;
107 gross 430 Paso_SystemMatrixPattern_mis(A_p->pattern,mis_marker);
108     time2=Paso_timer()-time0;
109     if (Paso_noError()) {
110     #pragma omp parallel for private(i) schedule(static)
111     for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
112     out->n=n;
113     out->n_block=n_block;
114     out->n_F=Paso_Util_cumsum(n,counter);
115     out->mask_F=MEMALLOC(n,index_t);
116     out->rows_in_F=MEMALLOC(out->n_F,index_t);
117     out->inv_A_FF=MEMALLOC(n_block*n_block*out->n_F,double);
118     out->A_FF_pivot=NULL; /* later use for block size>3 */
119     if (! (Paso_checkPtr(out->mask_F) || Paso_checkPtr(out->inv_A_FF) || Paso_checkPtr(out->rows_in_F) ) ) {
120     #pragma omp parallel
121     {
122     /* creates an index for F from mask */
123     #pragma omp for private(i) schedule(static)
124     for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
125     #pragma omp for private(i) schedule(static)
126     for (i = 0; i < n; ++i) {
127     if (mis_marker[i]) {
128     out->rows_in_F[counter[i]]=i;
129     out->mask_F[i]=counter[i];
130     } else {
131     out->mask_F[i]=-1;
132     }
133     }
134     #pragma omp for private(i, where_p,iPtr,A11,A12,A13,A21,A22,A23,A31,A32,A33,D,index) schedule(static)
135     for (i = 0; i < out->n_F; i++) {
136     /* find main diagonal */
137     iPtr=A_p->pattern->ptr[out->rows_in_F[i]];
138     index=&(A_p->pattern->index[iPtr]);
139     where_p=(index_t*)bsearch(&out->rows_in_F[i],
140     index,
141     A_p->pattern->ptr[out->rows_in_F[i] + 1]-A_p->pattern->ptr[out->rows_in_F[i]],
142     sizeof(index_t),
143     Paso_comparIndex);
144     if (where_p==NULL) {
145     Paso_setError(VALUE_ERROR, "Paso_Solver_getRILU: main diagonal element missing.");
146     } else {
147     iPtr+=(index_t)(where_p-index);
148     /* get inverse of A_FF block: */
149     if (n_block==1) {
150     if (ABS(A_p->val[iPtr])>0.) {
151     out->inv_A_FF[i]=1./A_p->val[iPtr];
152     } else {
153     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getRILU: Break-down in RILU decomposition: non-regular main diagonal block.");
154     }
155     } else if (n_block==2) {
156     A11=A_p->val[iPtr*4];
157     A21=A_p->val[iPtr*4+1];
158     A12=A_p->val[iPtr*4+2];
159     A22=A_p->val[iPtr*4+3];
160     D = A11*A22-A12*A21;
161     if (ABS(D) > 0 ){
162     D=1./D;
163     out->inv_A_FF[i*4]= A22*D;
164     out->inv_A_FF[i*4+1]=-A21*D;
165     out->inv_A_FF[i*4+2]=-A12*D;
166     out->inv_A_FF[i*4+3]= A11*D;
167     } else {
168     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getRILU:Break-down in RILU decomposition: non-regular main diagonal block.");
169     }
170     } else if (n_block==3) {
171     A11=A_p->val[iPtr*9 ];
172     A21=A_p->val[iPtr*9+1];
173     A31=A_p->val[iPtr*9+2];
174     A12=A_p->val[iPtr*9+3];
175     A22=A_p->val[iPtr*9+4];
176     A32=A_p->val[iPtr*9+5];
177     A13=A_p->val[iPtr*9+6];
178     A23=A_p->val[iPtr*9+7];
179     A33=A_p->val[iPtr*9+8];
180     D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
181     if (ABS(D) > 0 ){
182     D=1./D;
183     out->inv_A_FF[i*9 ]=(A22*A33-A23*A32)*D;
184     out->inv_A_FF[i*9+1]=(A31*A23-A21*A33)*D;
185     out->inv_A_FF[i*9+2]=(A21*A32-A31*A22)*D;
186     out->inv_A_FF[i*9+3]=(A13*A32-A12*A33)*D;
187     out->inv_A_FF[i*9+4]=(A11*A33-A31*A13)*D;
188     out->inv_A_FF[i*9+5]=(A12*A31-A11*A32)*D;
189     out->inv_A_FF[i*9+6]=(A12*A23-A13*A22)*D;
190     out->inv_A_FF[i*9+7]=(A13*A21-A11*A23)*D;
191     out->inv_A_FF[i*9+8]=(A11*A22-A12*A21)*D;
192     } else {
193     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getRILU:Break-down in RILU decomposition: non-regular main diagonal block.");
194     }
195     }
196     }
197     }
198     } /* end parallel region */
199    
200     if( Paso_noError()) {
201     /* if there are no nodes in the coarse level there is no more work to do */
202     out->n_C=n-out->n_F;
203     if (out->n_C>0) {
204     out->rows_in_C=MEMALLOC(out->n_C,index_t);
205     out->mask_C=MEMALLOC(n,index_t);
206     if (! (Paso_checkPtr(out->mask_C) || Paso_checkPtr(out->rows_in_C) ) ) {
207     /* creates an index for C from mask */
208     #pragma omp parallel for private(i) schedule(static)
209     for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
210     Paso_Util_cumsum(n,counter);
211     #pragma omp parallel
212     {
213     #pragma omp for private(i) schedule(static)
214     for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
215     #pragma omp for private(i) schedule(static)
216     for (i = 0; i < n; ++i) {
217     if (! mis_marker[i]) {
218     out->rows_in_C[counter[i]]=i;
219     out->mask_C[i]=counter[i];
220     } else {
221     out->mask_C[i]=-1;
222     }
223     }
224     } /* end parallel region */
225     /* get A_CF block: */
226     out->A_CF=Paso_SystemMatrix_getSubmatrix(A_p,out->n_C,out->rows_in_C,out->mask_F);
227     if (Paso_noError()) {
228     /* get A_FC block: */
229     out->A_FC=Paso_SystemMatrix_getSubmatrix(A_p,out->n_F,out->rows_in_F,out->mask_C);
230     /* get A_FF block: */
231     if (Paso_noError()) {
232     schur=Paso_SystemMatrix_getSubmatrix(A_p,out->n_C,out->rows_in_C,out->mask_C);
233     time0=Paso_timer()-time0;
234     if (Paso_noError()) {
235     time1=Paso_timer();
236     /* update A_CC block to get Schur complement and then apply RILU to it */
237     Paso_Solver_updateIncompleteSchurComplement(schur,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC);
238     time1=Paso_timer()-time1;
239     out->RILU_of_Schur=Paso_Solver_getRILU(schur,verbose);
240     Paso_SystemMatrix_dealloc(schur);
241     }
242     /* allocate work arrays for RILU application */
243     if (Paso_noError()) {
244     out->x_F=MEMALLOC(n_block*out->n_F,double);
245     out->b_F=MEMALLOC(n_block*out->n_F,double);
246     out->x_C=MEMALLOC(n_block*out->n_C,double);
247     out->b_C=MEMALLOC(n_block*out->n_C,double);
248     if (! (Paso_checkPtr(out->x_F) || Paso_checkPtr(out->b_F) || Paso_checkPtr(out->x_C) || Paso_checkPtr(out->b_C) ) ) {
249     #pragma omp parallel
250     {
251     #pragma omp for private(i,k) schedule(static)
252     for (i = 0; i < out->n_F; ++i) {
253     for (k=0; k<n_block;++k) {
254     out->x_F[i*n_block+k]=0.;
255     out->b_F[i*n_block+k]=0.;
256     }
257     }
258     #pragma omp for private(i,k) schedule(static)
259     for (i = 0; i < out->n_C; ++i) {
260     for (k=0; k<n_block;++k) {
261     out->x_C[i*n_block+k]=0.;
262     out->b_C[i*n_block+k]=0.;
263     }
264     }
265     } /* end parallel region */
266     }
267     }
268     }
269     }
270     }
271     }
272     }
273     }
274     }
275     }
276     TMPMEMFREE(mis_marker);
277     TMPMEMFREE(counter);
278     if (Paso_noError()) {
279     if (verbose) {
280     printf("RILU: %d unknowns eliminated. %d left.\n",out->n_F,n-out->n_F);
281     if (out->n_C>0) {
282     printf("timing: RILU: MIS/reordering/elemination : %e/%e/%e\n",time2,time0,time1);
283     } else {
284     printf("timing: RILU: MIS: %e\n",time2);
285     }
286     }
287     return out;
288     } else {
289     Paso_Solver_RILU_free(out);
290     return NULL;
291     }
292     }
293    
294     /**************************************************************/
295    
296     /* apply RILU precondition b-> x
297    
298     in fact it solves
299    
300     [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ] [ x_F ] = [b_F]
301     [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
302    
303     in the form
304    
305     b->[b_F,b_C]
306     x_F=invA_FF*b_F
307     b_C=b_C-A_CF*x_F
308     x_C=RILU(b_C)
309     b_F=b_F-A_FC*x_C
310     x_F=invA_FF*b_F
311     x<-[x_F,x_C]
312    
313     should be called within a parallel region
314     barrier synconization should be performed to make sure that the input vector available
315    
316     */
317    
318     void Paso_Solver_solveRILU(Paso_Solver_RILU * rilu, double * x, double * b) {
319     dim_t i,k;
320     dim_t n_block=rilu->n_block;
321    
322     if (rilu->n_C==0) {
323     /* x=invA_FF*b */
324     Paso_Solver_applyBlockDiagonalMatrix(n_block,rilu->n_F,rilu->inv_A_FF,rilu->A_FF_pivot,x,b);
325     } else {
326     /* b->[b_F,b_C] */
327     if (n_block==1) {
328     #pragma omp for private(i) schedule(static)
329     for (i=0;i<rilu->n_F;++i) rilu->b_F[i]=b[rilu->rows_in_F[i]];
330     #pragma omp for private(i) schedule(static)
331     for (i=0;i<rilu->n_C;++i) rilu->b_C[i]=b[rilu->rows_in_C[i]];
332     } else {
333     #pragma omp for private(i,k) schedule(static)
334     for (i=0;i<rilu->n_F;++i)
335     for (k=0;k<n_block;k++) rilu->b_F[rilu->n_block*i+k]=b[n_block*rilu->rows_in_F[i]+k];
336     #pragma omp for private(i,k) schedule(static)
337     for (i=0;i<rilu->n_C;++i)
338     for (k=0;k<n_block;k++) rilu->b_C[rilu->n_block*i+k]=b[n_block*rilu->rows_in_C[i]+k];
339     }
340     /* x_F=invA_FF*b_F */
341     Paso_Solver_applyBlockDiagonalMatrix(n_block,rilu->n_F,rilu->inv_A_FF,rilu->A_FF_pivot,rilu->x_F,rilu->b_F);
342     /* b_C=b_C-A_CF*x_F */
343     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,rilu->A_CF,rilu->x_F,1.,rilu->b_C);
344     /* x_C=RILU(b_C) */
345     Paso_Solver_solveRILU(rilu->RILU_of_Schur,rilu->x_C,rilu->b_C);
346     /* b_F=b_F-A_FC*x_C */
347     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,rilu->A_FC,rilu->x_C,1.,rilu->b_F);
348     /* x_F=invA_FF*b_F */
349     Paso_Solver_applyBlockDiagonalMatrix(n_block,rilu->n_F,rilu->inv_A_FF,rilu->A_FF_pivot,rilu->x_F,rilu->b_F);
350     /* x<-[x_F,x_C] */
351     if (n_block==1) {
352     #pragma omp for private(i) schedule(static)
353     for (i=0;i<rilu->n;++i) {
354     if (rilu->mask_C[i]>-1) {
355     x[i]=rilu->x_C[rilu->mask_C[i]];
356     } else {
357     x[i]=rilu->x_F[rilu->mask_F[i]];
358     }
359     }
360     } else {
361     #pragma omp for private(i,k) schedule(static)
362     for (i=0;i<rilu->n;++i) {
363     if (rilu->mask_C[i]>-1) {
364     for (k=0;k<n_block;k++) x[n_block*i+k]=rilu->x_C[n_block*rilu->mask_C[i]+k];
365     } else {
366     for (k=0;k<n_block;k++) x[n_block*i+k]=rilu->x_F[n_block*rilu->mask_F[i]+k];
367     }
368     }
369     }
370     /* all done */
371     }
372     return;
373     }
374    
375     /*
376     * $Log$
377     *
378     */

  ViewVC Help
Powered by ViewVC 1.1.26