/[escript]/trunk/paso/src/Solver_GS.c
ViewVC logotype

Annotation of /trunk/paso/src/Solver_GS.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1819 - (hide annotations)
Tue Sep 30 05:58:06 2008 UTC (12 years, 11 months ago) by artak
File MIME type: text/plain
File size: 14795 byte(s)
Firs version of symmetric Gauss-Seidel preconditioner with coloring
1 artak 1819
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17     /* Paso: GS preconditioner with reordering */
18    
19     /**************************************************************/
20    
21     /* Copyrights by ACcESS Australia 2003,2004,2005,2006,2007,2008 */
22     /* Author: artak@uq.edu.au */
23    
24     /**************************************************************/
25    
26     #include "Paso.h"
27     #include "Solver.h"
28     #include "PasoUtil.h"
29    
30     #include <stdio.h>
31    
32     /**************************************************************/
33    
34     /* free all memory used by GS */
35    
36     void Paso_Solver_GS_free(Paso_Solver_GS * in) {
37     if (in!=NULL) {
38     MEMFREE(in->colorOf);
39     Paso_SparseMatrix_free(in->factors);
40     MEMFREE(in->diag);
41     MEMFREE(in->main_iptr);
42     Paso_Pattern_free(in->pattern);
43     MEMFREE(in);
44     }
45     }
46    
47     /**************************************************************/
48    
49     /* constructs the incomplete block factorization of
50    
51     */
52     Paso_Solver_GS* Paso_Solver_getGS(Paso_SparseMatrix * A,bool_t verbose) {
53     dim_t n=A->numRows;
54     dim_t n_block=A->row_block_size;
55     dim_t block_size=A->block_size;
56     index_t num_colors=0, *mis_marker=NULL;
57     register index_t i,iptr_main,iPtr,iptr_ik,k,iptr_kj,j,iptr_ij,color,color2;
58     double time0,time_color,time_fac;
59     /* allocations: */
60     Paso_Solver_GS* out=MEMALLOC(1,Paso_Solver_GS);
61     if (Paso_checkPtr(out)) return NULL;
62     out->colorOf=MEMALLOC(n,index_t);
63     out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
64     /*out->diag=MEMALLOC(A->len,double);*/
65     out->main_iptr=MEMALLOC(n,index_t);
66     out->pattern=Paso_Pattern_getReference(A->pattern);
67     out->factors=Paso_SparseMatrix_getReference(A);
68     out->n_block=n_block;
69     out->n=n;
70    
71     if ( !(Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
72     time0=Paso_timer();
73     Paso_Pattern_color(A->pattern,&out->num_colors,out->colorOf);
74     time_color=Paso_timer()-time0;
75    
76     if (Paso_noError()) {
77     time0=Paso_timer();
78    
79     if (! (Paso_checkPtr(out->diag))) {
80     if (n_block==1) {
81     #pragma omp parallel for private(i, iPtr) schedule(static)
82     for (i = 0; i < A->pattern->numOutput; i++) {
83     out->diag[i]=1.;
84     /* find main diagonal */
85     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
86     if (A->pattern->index[iPtr]==i) {
87     iptr_main=iPtr;
88     out->diag[i]=A->val[iPtr];
89     break;
90     }
91     }
92     out->main_iptr[i]=iptr_main;
93     }
94     } else if (n_block==2) {
95     #pragma omp parallel for private(i, iPtr) schedule(static)
96     for (i = 0; i < A->pattern->numOutput; i++) {
97     out->diag[i*4+0]= 1.;
98     out->diag[i*4+1]= 0.;
99     out->diag[i*4+2]= 0.;
100     out->diag[i*4+3]= 1.;
101     /* find main diagonal */
102     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
103     if (A->pattern->index[iPtr]==i) {
104     iptr_main=iPtr;
105     out->diag[i*4]= A->val[iPtr*4];
106     out->diag[i*4+1]=A->val[iPtr*4+1];
107     out->diag[i*4+2]=A->val[iPtr*4+2];
108     out->diag[i*4+3]= A->val[iPtr*4+3];
109     break;
110     }
111     }
112     out->main_iptr[i]=iptr_main;
113     }
114     } else if (n_block==3) {
115     #pragma omp parallel for private(i, iPtr) schedule(static)
116     for (i = 0; i < A->pattern->numOutput; i++) {
117     out->diag[i*9 ]=1.;
118     out->diag[i*9+1]=0.;
119     out->diag[i*9+2]=0.;
120     out->diag[i*9+3]=0.;
121     out->diag[i*9+4]=1.;
122     out->diag[i*9+5]=0.;
123     out->diag[i*9+6]=0.;
124     out->diag[i*9+7]=0.;
125     out->diag[i*9+8]=1.;
126     /* find main diagonal */
127     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
128     if (A->pattern->index[iPtr]==i) {
129     iptr_main=iPtr;
130     out->diag[i*9 ]=A->val[iPtr*9 ];
131     out->diag[i*9+1]=A->val[iPtr*9+1];
132     out->diag[i*9+2]=A->val[iPtr*9+2];
133     out->diag[i*9+3]=A->val[iPtr*9+3];
134     out->diag[i*9+4]=A->val[iPtr*9+4];
135     out->diag[i*9+5]=A->val[iPtr*9+5];
136     out->diag[i*9+6]=A->val[iPtr*9+6];
137     out->diag[i*9+7]=A->val[iPtr*9+7];
138     out->diag[i*9+8]=A->val[iPtr*9+8];
139     break;
140     }
141     }
142     out->main_iptr[i]=iptr_main;
143     }
144     }
145     }
146    
147     time_fac=Paso_timer()-time0;
148     }
149     }
150     if (Paso_noError()) {
151     if (verbose) {
152     printf("GS: %d color used \n",out->num_colors);
153     printf("timing: GS: coloring/elemination : %e/%e\n",time_color,time_fac);
154     }
155     return out;
156     } else {
157     Paso_Solver_GS_free(out);
158     return NULL;
159     }
160     }
161    
162     /**************************************************************/
163    
164     /* apply GS precondition b-> x
165    
166     in fact it solves LD^{-1}Ux=b in the form x= U^{-1} D L^{-1}b
167    
168     should be called within a parallel region
169     barrier synconization should be performed to make sure that the input vector available
170    
171     */
172    
173     void Paso_Solver_solveGS(Paso_Solver_GS * gs, double * x, double * b) {
174     register dim_t i,k;
175     register index_t color,iptr_ik,iptr_main;
176     register double S1,S2,S3,R1,R2,R3;
177     dim_t n_block=gs->n_block;
178     dim_t n=gs->n;
179     index_t* pivot=NULL;
180    
181     /* copy x into b*/
182     #pragma omp parallel for private(i) schedule(static)
183     for (i=0;i<n*n_block;++i) x[i]=b[i];
184     /* forward substitution */
185     for (color=0;color<gs->num_colors;++color) {
186     if (n_block==1) {
187     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
188     for (i = 0; i < n; ++i) {
189     if (gs->colorOf[i]==color) {
190     /* x_i=x_i-a_ik*x_k */
191     S1=x[i];
192     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
193     k=gs->pattern->index[iptr_ik];
194     if (gs->colorOf[k]<color) {
195     R1=x[k];
196     S1-=gs->factors->val[iptr_ik]*R1;
197     }
198     }
199     iptr_main=gs->main_iptr[i];
200     x[i]=(1/gs->factors->val[iptr_main])*S1;
201     }
202     }
203     } else if (n_block==2) {
204     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
205     for (i = 0; i < n; ++i) {
206     if (gs->colorOf[i]==color) {
207     /* x_i=x_i-a_ik*x_k */
208     S1=x[2*i];
209     S2=x[2*i+1];
210     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
211     k=gs->pattern->index[iptr_ik];
212     if (gs->colorOf[k]<color) {
213     R1=x[2*k];
214     R2=x[2*k+1];
215     S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
216     S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
217     }
218     }
219     iptr_main=gs->main_iptr[i];
220     x[2*i ]=(1/gs->factors->val[4*iptr_main ])*S1+(1/gs->factors->val[4*iptr_main+2])*S2;
221     x[2*i+1]=(1/gs->factors->val[4*iptr_main+1])*S1+(1/gs->factors->val[4*iptr_main+3])*S2;
222     }
223    
224     }
225     } else if (n_block==3) {
226     #pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
227     for (i = 0; i < n; ++i) {
228     if (gs->colorOf[i]==color) {
229     /* x_i=x_i-a_ik*x_k */
230     S1=x[3*i];
231     S2=x[3*i+1];
232     S3=x[3*i+2];
233     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
234     k=gs->pattern->index[iptr_ik];
235     if (gs->colorOf[k]<color) {
236     R1=x[3*k];
237     R2=x[3*k+1];
238     R3=x[3*k+2];
239     S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
240     S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
241     S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
242     }
243     }
244     iptr_main=gs->main_iptr[i];
245     x[3*i ]=(1/gs->factors->val[9*iptr_main ])*S1+(1/gs->factors->val[9*iptr_main+3])*S2+(1/gs->factors->val[9*iptr_main+6])*S3;
246     x[3*i+1]=(1/gs->factors->val[9*iptr_main+1])*S1+(1/gs->factors->val[9*iptr_main+4])*S2+(1/gs->factors->val[9*iptr_main+7])*S3;
247     x[3*i+2]=(1/gs->factors->val[9*iptr_main+2])*S1+(1/gs->factors->val[9*iptr_main+5])*S2+(1/gs->factors->val[9*iptr_main+8])*S3;
248     }
249     }
250     }
251     }
252    
253     /* Multipling with diag(A) */
254     Paso_Solver_applyBlockDiagonalMatrix(gs->n_block,gs->n,gs->diag,pivot,x,x);
255    
256     /* backward substitution */
257     for (color=(gs->num_colors)-1;color>-1;--color) {
258     if (n_block==1) {
259     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1)
260     for (i = 0; i < n; ++i) {
261     if (gs->colorOf[i]==color) {
262     /* x_i=x_i-a_ik*x_k */
263     S1=x[i];
264     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
265     k=gs->pattern->index[iptr_ik];
266     if (gs->colorOf[k]>color) {
267     R1=x[k];
268     S1-=gs->factors->val[iptr_ik]*R1;
269     }
270     }
271     /*x[i]=S1;*/
272     iptr_main=gs->main_iptr[i];
273     x[i]=(1/gs->factors->val[iptr_main])*S1;
274     }
275     }
276     } else if (n_block==2) {
277     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
278     for (i = 0; i < n; ++i) {
279     if (gs->colorOf[i]==color) {
280     /* x_i=x_i-a_ik*x_k */
281     S1=x[2*i];
282     S2=x[2*i+1];
283     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
284     k=gs->pattern->index[iptr_ik];
285     if (gs->colorOf[k]>color) {
286     R1=x[2*k];
287     R2=x[2*k+1];
288     S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
289     S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
290     }
291     }
292     /*x[2*i]=S1;
293     x[2*i+1]=S2;*/
294     iptr_main=gs->main_iptr[i];
295     x[2*i ]=(1/gs->factors->val[4*iptr_main ])*S1+(1/gs->factors->val[4*iptr_main+2])*S2;
296     x[2*i+1]=(1/gs->factors->val[4*iptr_main+1])*S1+(1/gs->factors->val[4*iptr_main+3])*S2;
297     }
298     }
299     } else if (n_block==3) {
300     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
301     for (i = 0; i < n; ++i) {
302     if (gs->colorOf[i]==color) {
303     /* x_i=x_i-a_ik*x_k */
304     S1=x[3*i ];
305     S2=x[3*i+1];
306     S3=x[3*i+2];
307     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
308     k=gs->pattern->index[iptr_ik];
309     if (gs->colorOf[k]>color) {
310     R1=x[3*k];
311     R2=x[3*k+1];
312     R3=x[3*k+2];
313     S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
314     S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
315     S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
316     }
317     }
318     /* x[3*i]=S1;
319     x[3*i+1]=S2;
320     x[3*i+2]=S3;
321     */ iptr_main=gs->main_iptr[i];
322     x[3*i ]=(1/gs->factors->val[9*iptr_main ])*S1+(1/gs->factors->val[9*iptr_main+3])*S2+(1/gs->factors->val[9*iptr_main+6])*S3;
323     x[3*i+1]=(1/gs->factors->val[9*iptr_main+1])*S1+(1/gs->factors->val[9*iptr_main+4])*S2+(1/gs->factors->val[9*iptr_main+7])*S3;
324     x[3*i+2]=(1/gs->factors->val[9*iptr_main+2])*S1+(1/gs->factors->val[9*iptr_main+5])*S2+(1/gs->factors->val[9*iptr_main+8])*S3;
325    
326     }
327     }
328     }
329     }
330     return;
331     }
332    

  ViewVC Help
Powered by ViewVC 1.1.26