/[escript]/trunk/paso/src/Smoother.c
ViewVC logotype

Annotation of /trunk/paso/src/Smoother.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1975 - (hide annotations)
Thu Nov 6 03:07:02 2008 UTC (10 years, 4 months ago) by artak
Original Path: trunk/paso/src/Solver_GS.c
File MIME type: text/plain
File size: 18032 byte(s)
Some variables are initialized to get rid of compiler warnings
1 jfenwick 1851
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17     /* Paso: GS preconditioner with reordering */
18    
19     /**************************************************************/
20    
21     /* Copyrights by ACcESS Australia 2003,2004,2005,2006,2007,2008 */
22     /* Author: artak@uq.edu.au */
23    
24     /**************************************************************/
25    
26     #include "Paso.h"
27     #include "Solver.h"
28     #include "PasoUtil.h"
29    
30     #include <stdio.h>
31    
32     /**************************************************************/
33    
34     /* free all memory used by GS */
35    
36     void Paso_Solver_GS_free(Paso_Solver_GS * in) {
37     if (in!=NULL) {
38     MEMFREE(in->colorOf);
39     Paso_SparseMatrix_free(in->factors);
40     MEMFREE(in->diag);
41     MEMFREE(in->main_iptr);
42     Paso_Pattern_free(in->pattern);
43     MEMFREE(in);
44     }
45     }
46    
47     /**************************************************************/
48    
49     /* constructs the incomplete block factorization of
50    
51     */
52     Paso_Solver_GS* Paso_Solver_getGS(Paso_SparseMatrix * A,bool_t verbose) {
53     dim_t n=A->numRows;
54     dim_t n_block=A->row_block_size;
55     dim_t block_size=A->block_size;
56 artak 1975 register index_t i,iptr_main=0,iPtr;
57     double time0=0,time_color=0,time_fac=0;
58 jfenwick 1851 /* allocations: */
59     Paso_Solver_GS* out=MEMALLOC(1,Paso_Solver_GS);
60     if (Paso_checkPtr(out)) return NULL;
61     out->colorOf=MEMALLOC(n,index_t);
62     out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
63     /*out->diag=MEMALLOC(A->len,double);*/
64     out->main_iptr=MEMALLOC(n,index_t);
65     out->pattern=Paso_Pattern_getReference(A->pattern);
66     out->factors=Paso_SparseMatrix_getReference(A);
67     out->n_block=n_block;
68     out->n=n;
69    
70     if ( !(Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
71     time0=Paso_timer();
72     Paso_Pattern_color(A->pattern,&out->num_colors,out->colorOf);
73     time_color=Paso_timer()-time0;
74    
75     if (Paso_noError()) {
76     time0=Paso_timer();
77    
78     if (! (Paso_checkPtr(out->diag))) {
79     if (n_block==1) {
80     #pragma omp parallel for private(i, iPtr) schedule(static)
81     for (i = 0; i < A->pattern->numOutput; i++) {
82     out->diag[i]=1.;
83     /* find main diagonal */
84     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
85     if (A->pattern->index[iPtr]==i) {
86     iptr_main=iPtr;
87     out->diag[i]=A->val[iPtr];
88     break;
89     }
90     }
91     out->main_iptr[i]=iptr_main;
92     }
93     } else if (n_block==2) {
94     #pragma omp parallel for private(i, iPtr) schedule(static)
95     for (i = 0; i < A->pattern->numOutput; i++) {
96     out->diag[i*4+0]= 1.;
97     out->diag[i*4+1]= 0.;
98     out->diag[i*4+2]= 0.;
99     out->diag[i*4+3]= 1.;
100     /* find main diagonal */
101     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
102     if (A->pattern->index[iPtr]==i) {
103     iptr_main=iPtr;
104     out->diag[i*4]= A->val[iPtr*4];
105     out->diag[i*4+1]=A->val[iPtr*4+1];
106     out->diag[i*4+2]=A->val[iPtr*4+2];
107     out->diag[i*4+3]= A->val[iPtr*4+3];
108     break;
109     }
110     }
111     out->main_iptr[i]=iptr_main;
112     }
113     } else if (n_block==3) {
114     #pragma omp parallel for private(i, iPtr) schedule(static)
115     for (i = 0; i < A->pattern->numOutput; i++) {
116     out->diag[i*9 ]=1.;
117     out->diag[i*9+1]=0.;
118     out->diag[i*9+2]=0.;
119     out->diag[i*9+3]=0.;
120     out->diag[i*9+4]=1.;
121     out->diag[i*9+5]=0.;
122     out->diag[i*9+6]=0.;
123     out->diag[i*9+7]=0.;
124     out->diag[i*9+8]=1.;
125     /* find main diagonal */
126     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
127     if (A->pattern->index[iPtr]==i) {
128     iptr_main=iPtr;
129     out->diag[i*9 ]=A->val[iPtr*9 ];
130     out->diag[i*9+1]=A->val[iPtr*9+1];
131     out->diag[i*9+2]=A->val[iPtr*9+2];
132     out->diag[i*9+3]=A->val[iPtr*9+3];
133     out->diag[i*9+4]=A->val[iPtr*9+4];
134     out->diag[i*9+5]=A->val[iPtr*9+5];
135     out->diag[i*9+6]=A->val[iPtr*9+6];
136     out->diag[i*9+7]=A->val[iPtr*9+7];
137     out->diag[i*9+8]=A->val[iPtr*9+8];
138     break;
139     }
140     }
141     out->main_iptr[i]=iptr_main;
142     }
143     }
144     }
145    
146     time_fac=Paso_timer()-time0;
147     }
148     }
149     if (Paso_noError()) {
150     if (verbose) {
151     printf("GS: %d color used \n",out->num_colors);
152     printf("timing: GS: coloring/elemination : %e/%e\n",time_color,time_fac);
153     }
154     return out;
155     } else {
156     Paso_Solver_GS_free(out);
157     return NULL;
158     }
159     }
160    
161     /**************************************************************/
162    
163     /* apply GS precondition b-> x
164    
165     in fact it solves LD^{-1}Ux=b in the form x= U^{-1} D L^{-1}b
166    
167     should be called within a parallel region
168     barrier synconization should be performed to make sure that the input vector available
169    
170     */
171    
172     void Paso_Solver_solveGS(Paso_Solver_GS * gs, double * x, double * b) {
173     register dim_t i,k;
174     register index_t color,iptr_ik,iptr_main;
175     register double A11,A12,A21,A22,A13,A23,A33,A32,A31,S1,S2,S3,R1,R2,R3,D,S21,S22,S12,S11,S13,S23,S33,S32,S31;
176     dim_t n_block=gs->n_block;
177     dim_t n=gs->n;
178     index_t* pivot=NULL;
179    
180     /* copy x into b*/
181     #pragma omp parallel for private(i) schedule(static)
182     for (i=0;i<n*n_block;++i) x[i]=b[i];
183     /* forward substitution */
184     for (color=0;color<gs->num_colors;++color) {
185     if (n_block==1) {
186     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
187     for (i = 0; i < n; ++i) {
188     if (gs->colorOf[i]==color) {
189     /* x_i=x_i-a_ik*x_k */
190     S1=x[i];
191     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
192     k=gs->pattern->index[iptr_ik];
193     if (gs->colorOf[k]<color) {
194     R1=x[k];
195     S1-=gs->factors->val[iptr_ik]*R1;
196     }
197     }
198     iptr_main=gs->main_iptr[i];
199     x[i]=(1/gs->factors->val[iptr_main])*S1;
200     }
201     }
202     } else if (n_block==2) {
203     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
204     for (i = 0; i < n; ++i) {
205     if (gs->colorOf[i]==color) {
206     /* x_i=x_i-a_ik*x_k */
207     S1=x[2*i];
208     S2=x[2*i+1];
209     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
210     k=gs->pattern->index[iptr_ik];
211     if (gs->colorOf[k]<color) {
212     R1=x[2*k];
213     R2=x[2*k+1];
214     S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
215     S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
216     }
217     }
218     iptr_main=gs->main_iptr[i];
219     A11=gs->factors->val[iptr_main*4];
220     A21=gs->factors->val[iptr_main*4+1];
221     A12=gs->factors->val[iptr_main*4+2];
222     A22=gs->factors->val[iptr_main*4+3];
223     D = A11*A22-A12*A21;
224     if (ABS(D)>0.) {
225     D=1./D;
226     S11= A22*D;
227     S21=-A21*D;
228     S12=-A12*D;
229     S22= A11*D;
230     x[2*i ]=S11*S1+S12*S2;
231     x[2*i+1]=S21*S1+S22*S2;
232     } else {
233     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
234     }
235     }
236    
237     }
238     } else if (n_block==3) {
239     #pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
240     for (i = 0; i < n; ++i) {
241     if (gs->colorOf[i]==color) {
242     /* x_i=x_i-a_ik*x_k */
243     S1=x[3*i];
244     S2=x[3*i+1];
245     S3=x[3*i+2];
246     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
247     k=gs->pattern->index[iptr_ik];
248     if (gs->colorOf[k]<color) {
249     R1=x[3*k];
250     R2=x[3*k+1];
251     R3=x[3*k+2];
252     S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
253     S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
254     S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
255     }
256     }
257     iptr_main=gs->main_iptr[i];
258     A11=gs->factors->val[iptr_main*9 ];
259     A21=gs->factors->val[iptr_main*9+1];
260     A31=gs->factors->val[iptr_main*9+2];
261     A12=gs->factors->val[iptr_main*9+3];
262     A22=gs->factors->val[iptr_main*9+4];
263     A32=gs->factors->val[iptr_main*9+5];
264     A13=gs->factors->val[iptr_main*9+6];
265     A23=gs->factors->val[iptr_main*9+7];
266     A33=gs->factors->val[iptr_main*9+8];
267     D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
268     if (ABS(D)>0.) {
269     D=1./D;
270     S11=(A22*A33-A23*A32)*D;
271     S21=(A31*A23-A21*A33)*D;
272     S31=(A21*A32-A31*A22)*D;
273     S12=(A13*A32-A12*A33)*D;
274     S22=(A11*A33-A31*A13)*D;
275     S32=(A12*A31-A11*A32)*D;
276     S13=(A12*A23-A13*A22)*D;
277     S23=(A13*A21-A11*A23)*D;
278     S33=(A11*A22-A12*A21)*D;
279     /* a_ik=a_ii^{-1}*a_ik */
280     x[3*i ]=S11*S1+S12*S2+S13*S3;
281     x[3*i+1]=S21*S1+S22*S2+S23*S3;
282     x[3*i+2]=S31*S1+S32*S2+S33*S3;
283     } else {
284     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
285     }
286     }
287     }
288     }
289     }
290    
291     /* Multipling with diag(A) */
292     Paso_Solver_applyBlockDiagonalMatrix(gs->n_block,gs->n,gs->diag,pivot,x,x);
293    
294     /* backward substitution */
295     for (color=(gs->num_colors)-1;color>-1;--color) {
296     if (n_block==1) {
297     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1)
298     for (i = 0; i < n; ++i) {
299     if (gs->colorOf[i]==color) {
300     /* x_i=x_i-a_ik*x_k */
301     S1=x[i];
302     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
303     k=gs->pattern->index[iptr_ik];
304     if (gs->colorOf[k]>color) {
305     R1=x[k];
306     S1-=gs->factors->val[iptr_ik]*R1;
307     }
308     }
309     /*x[i]=S1;*/
310     iptr_main=gs->main_iptr[i];
311     x[i]=(1/gs->factors->val[iptr_main])*S1;
312     }
313     }
314     } else if (n_block==2) {
315     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
316     for (i = 0; i < n; ++i) {
317     if (gs->colorOf[i]==color) {
318     /* x_i=x_i-a_ik*x_k */
319     S1=x[2*i];
320     S2=x[2*i+1];
321     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
322     k=gs->pattern->index[iptr_ik];
323     if (gs->colorOf[k]>color) {
324     R1=x[2*k];
325     R2=x[2*k+1];
326     S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
327     S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
328     }
329     }
330     /*x[2*i]=S1;
331     x[2*i+1]=S2;*/
332     iptr_main=gs->main_iptr[i];
333     A11=gs->factors->val[iptr_main*4];
334     A21=gs->factors->val[iptr_main*4+1];
335     A12=gs->factors->val[iptr_main*4+2];
336     A22=gs->factors->val[iptr_main*4+3];
337     D = A11*A22-A12*A21;
338     if (ABS(D)>0.) {
339     D=1./D;
340     S11= A22*D;
341     S21=-A21*D;
342     S12=-A12*D;
343     S22= A11*D;
344     x[2*i ]=S11*S1+S12*S2;
345     x[2*i+1]=S21*S1+S22*S2;
346     } else {
347     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
348     }
349    
350     }
351     }
352     } else if (n_block==3) {
353     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
354     for (i = 0; i < n; ++i) {
355     if (gs->colorOf[i]==color) {
356     /* x_i=x_i-a_ik*x_k */
357     S1=x[3*i ];
358     S2=x[3*i+1];
359     S3=x[3*i+2];
360     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
361     k=gs->pattern->index[iptr_ik];
362     if (gs->colorOf[k]>color) {
363     R1=x[3*k];
364     R2=x[3*k+1];
365     R3=x[3*k+2];
366     S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
367     S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
368     S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
369     }
370     }
371     /* x[3*i]=S1;
372     x[3*i+1]=S2;
373     x[3*i+2]=S3;
374     */ iptr_main=gs->main_iptr[i];
375     A11=gs->factors->val[iptr_main*9 ];
376     A21=gs->factors->val[iptr_main*9+1];
377     A31=gs->factors->val[iptr_main*9+2];
378     A12=gs->factors->val[iptr_main*9+3];
379     A22=gs->factors->val[iptr_main*9+4];
380     A32=gs->factors->val[iptr_main*9+5];
381     A13=gs->factors->val[iptr_main*9+6];
382     A23=gs->factors->val[iptr_main*9+7];
383     A33=gs->factors->val[iptr_main*9+8];
384     D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
385     if (ABS(D)>0.) {
386     D=1./D;
387     S11=(A22*A33-A23*A32)*D;
388     S21=(A31*A23-A21*A33)*D;
389     S31=(A21*A32-A31*A22)*D;
390     S12=(A13*A32-A12*A33)*D;
391     S22=(A11*A33-A31*A13)*D;
392     S32=(A12*A31-A11*A32)*D;
393     S13=(A12*A23-A13*A22)*D;
394     S23=(A13*A21-A11*A23)*D;
395     S33=(A11*A22-A12*A21)*D;
396     x[3*i ]=S11*S1+S12*S2+S13*S3;
397     x[3*i+1]=S21*S1+S22*S2+S23*S3;
398     x[3*i+2]=S31*S1+S32*S2+S33*S3;
399     } else {
400     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
401     }
402     }
403     }
404     }
405     }
406     return;
407     }
408    

  ViewVC Help
Powered by ViewVC 1.1.26