/[escript]/trunk/paso/src/Smoother.c
ViewVC logotype

Annotation of /trunk/paso/src/Smoother.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3005 - (hide annotations)
Thu Apr 22 05:59:31 2010 UTC (8 years, 11 months ago) by gross
Original Path: trunk/paso/src/Solver_GS.c
File MIME type: text/plain
File size: 18408 byte(s)
early call of setPreconditioner in FCT solver removed.
1 jfenwick 1851
2     /*******************************************************
3     *
4 jfenwick 2881 * Copyright (c) 2003-2010 by University of Queensland
5 jfenwick 1851 * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17     /* Paso: GS preconditioner with reordering */
18    
19     /**************************************************************/
20    
21     /* Copyrights by ACcESS Australia 2003,2004,2005,2006,2007,2008 */
22     /* Author: artak@uq.edu.au */
23    
24     /**************************************************************/
25    
26     #include "Paso.h"
27     #include "Solver.h"
28     #include "PasoUtil.h"
29    
30     #include <stdio.h>
31    
32     /**************************************************************/
33    
34     /* free all memory used by GS */
35    
36     void Paso_Solver_GS_free(Paso_Solver_GS * in) {
37     if (in!=NULL) {
38     MEMFREE(in->colorOf);
39     Paso_SparseMatrix_free(in->factors);
40     MEMFREE(in->diag);
41     MEMFREE(in->main_iptr);
42     Paso_Pattern_free(in->pattern);
43     MEMFREE(in);
44     }
45     }
46    
47     /**************************************************************/
48    
49     /* constructs the incomplete block factorization of
50    
51     */
52     Paso_Solver_GS* Paso_Solver_getGS(Paso_SparseMatrix * A,bool_t verbose) {
53     dim_t n=A->numRows;
54     dim_t n_block=A->row_block_size;
55     dim_t block_size=A->block_size;
56 artak 1975 register index_t i,iptr_main=0,iPtr;
57     double time0=0,time_color=0,time_fac=0;
58 jfenwick 1851 /* allocations: */
59     Paso_Solver_GS* out=MEMALLOC(1,Paso_Solver_GS);
60     if (Paso_checkPtr(out)) return NULL;
61     out->colorOf=MEMALLOC(n,index_t);
62     out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
63     /*out->diag=MEMALLOC(A->len,double);*/
64     out->main_iptr=MEMALLOC(n,index_t);
65     out->pattern=Paso_Pattern_getReference(A->pattern);
66     out->factors=Paso_SparseMatrix_getReference(A);
67     out->n_block=n_block;
68     out->n=n;
69    
70     if ( !(Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
71     time0=Paso_timer();
72     Paso_Pattern_color(A->pattern,&out->num_colors,out->colorOf);
73     time_color=Paso_timer()-time0;
74    
75     if (Paso_noError()) {
76     time0=Paso_timer();
77    
78     if (! (Paso_checkPtr(out->diag))) {
79     if (n_block==1) {
80 artak 2833 #pragma omp parallel for private(i,iPtr,iptr_main) schedule(static)
81 jfenwick 1851 for (i = 0; i < A->pattern->numOutput; i++) {
82 artak 2833 iptr_main=0;
83 jfenwick 1851 out->diag[i]=1.;
84     /* find main diagonal */
85     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
86     if (A->pattern->index[iPtr]==i) {
87     iptr_main=iPtr;
88     out->diag[i]=A->val[iPtr];
89     break;
90     }
91     }
92     out->main_iptr[i]=iptr_main;
93     }
94     } else if (n_block==2) {
95 artak 2833 #pragma omp parallel for private(i,iPtr,iptr_main) schedule(static)
96 jfenwick 1851 for (i = 0; i < A->pattern->numOutput; i++) {
97     out->diag[i*4+0]= 1.;
98     out->diag[i*4+1]= 0.;
99     out->diag[i*4+2]= 0.;
100     out->diag[i*4+3]= 1.;
101 artak 2833 iptr_main=0;
102 jfenwick 1851 /* find main diagonal */
103     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
104     if (A->pattern->index[iPtr]==i) {
105     iptr_main=iPtr;
106     out->diag[i*4]= A->val[iPtr*4];
107     out->diag[i*4+1]=A->val[iPtr*4+1];
108     out->diag[i*4+2]=A->val[iPtr*4+2];
109     out->diag[i*4+3]= A->val[iPtr*4+3];
110     break;
111     }
112     }
113     out->main_iptr[i]=iptr_main;
114     }
115     } else if (n_block==3) {
116 artak 2833 #pragma omp parallel for private(i, iPtr,iptr_main) schedule(static)
117 jfenwick 1851 for (i = 0; i < A->pattern->numOutput; i++) {
118     out->diag[i*9 ]=1.;
119     out->diag[i*9+1]=0.;
120     out->diag[i*9+2]=0.;
121     out->diag[i*9+3]=0.;
122     out->diag[i*9+4]=1.;
123     out->diag[i*9+5]=0.;
124     out->diag[i*9+6]=0.;
125     out->diag[i*9+7]=0.;
126     out->diag[i*9+8]=1.;
127 artak 2833 iptr_main=0;
128 jfenwick 1851 /* find main diagonal */
129     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
130     if (A->pattern->index[iPtr]==i) {
131     iptr_main=iPtr;
132     out->diag[i*9 ]=A->val[iPtr*9 ];
133     out->diag[i*9+1]=A->val[iPtr*9+1];
134     out->diag[i*9+2]=A->val[iPtr*9+2];
135     out->diag[i*9+3]=A->val[iPtr*9+3];
136     out->diag[i*9+4]=A->val[iPtr*9+4];
137     out->diag[i*9+5]=A->val[iPtr*9+5];
138     out->diag[i*9+6]=A->val[iPtr*9+6];
139     out->diag[i*9+7]=A->val[iPtr*9+7];
140     out->diag[i*9+8]=A->val[iPtr*9+8];
141     break;
142     }
143     }
144     out->main_iptr[i]=iptr_main;
145     }
146     }
147     }
148    
149     time_fac=Paso_timer()-time0;
150     }
151     }
152     if (Paso_noError()) {
153     if (verbose) {
154     printf("GS: %d color used \n",out->num_colors);
155     printf("timing: GS: coloring/elemination : %e/%e\n",time_color,time_fac);
156     }
157     return out;
158     } else {
159     Paso_Solver_GS_free(out);
160     return NULL;
161     }
162     }
163    
164     /**************************************************************/
165    
166     /* apply GS precondition b-> x
167    
168     in fact it solves LD^{-1}Ux=b in the form x= U^{-1} D L^{-1}b
169    
170     should be called within a parallel region
171     barrier synconization should be performed to make sure that the input vector available
172    
173     */
174    
175     void Paso_Solver_solveGS(Paso_Solver_GS * gs, double * x, double * b) {
176     register dim_t i,k;
177     register index_t color,iptr_ik,iptr_main;
178     register double A11,A12,A21,A22,A13,A23,A33,A32,A31,S1,S2,S3,R1,R2,R3,D,S21,S22,S12,S11,S13,S23,S33,S32,S31;
179     dim_t n_block=gs->n_block;
180     dim_t n=gs->n;
181     index_t* pivot=NULL;
182    
183     /* copy x into b*/
184     #pragma omp parallel for private(i) schedule(static)
185     for (i=0;i<n*n_block;++i) x[i]=b[i];
186     /* forward substitution */
187     for (color=0;color<gs->num_colors;++color) {
188     if (n_block==1) {
189     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
190     for (i = 0; i < n; ++i) {
191     if (gs->colorOf[i]==color) {
192     /* x_i=x_i-a_ik*x_k */
193     S1=x[i];
194     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
195     k=gs->pattern->index[iptr_ik];
196     if (gs->colorOf[k]<color) {
197     R1=x[k];
198     S1-=gs->factors->val[iptr_ik]*R1;
199     }
200     }
201     iptr_main=gs->main_iptr[i];
202 gross 3005 x[i]=(1./gs->factors->val[iptr_main])*S1;
203 jfenwick 1851 }
204     }
205     } else if (n_block==2) {
206 artak 2998 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2,A11,A21,A12,A22,D,S11,S21,S12,S22)
207 jfenwick 1851 for (i = 0; i < n; ++i) {
208     if (gs->colorOf[i]==color) {
209     /* x_i=x_i-a_ik*x_k */
210     S1=x[2*i];
211     S2=x[2*i+1];
212     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
213     k=gs->pattern->index[iptr_ik];
214     if (gs->colorOf[k]<color) {
215     R1=x[2*k];
216     R2=x[2*k+1];
217     S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
218     S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
219     }
220     }
221     iptr_main=gs->main_iptr[i];
222     A11=gs->factors->val[iptr_main*4];
223     A21=gs->factors->val[iptr_main*4+1];
224     A12=gs->factors->val[iptr_main*4+2];
225     A22=gs->factors->val[iptr_main*4+3];
226     D = A11*A22-A12*A21;
227     if (ABS(D)>0.) {
228     D=1./D;
229     S11= A22*D;
230     S21=-A21*D;
231     S12=-A12*D;
232     S22= A11*D;
233     x[2*i ]=S11*S1+S12*S2;
234     x[2*i+1]=S21*S1+S22*S2;
235     } else {
236     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
237     }
238     }
239    
240     }
241     } else if (n_block==3) {
242 artak 2998 #pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3,A11,A21,A31,A12,A22,A32,A13,A23,A33,D,S11,S21,S31,S12,S22,S32,S13,S23,S33)
243 jfenwick 1851 for (i = 0; i < n; ++i) {
244     if (gs->colorOf[i]==color) {
245     /* x_i=x_i-a_ik*x_k */
246     S1=x[3*i];
247     S2=x[3*i+1];
248     S3=x[3*i+2];
249     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
250     k=gs->pattern->index[iptr_ik];
251     if (gs->colorOf[k]<color) {
252     R1=x[3*k];
253     R2=x[3*k+1];
254     R3=x[3*k+2];
255     S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
256     S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
257     S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
258     }
259     }
260     iptr_main=gs->main_iptr[i];
261     A11=gs->factors->val[iptr_main*9 ];
262     A21=gs->factors->val[iptr_main*9+1];
263     A31=gs->factors->val[iptr_main*9+2];
264     A12=gs->factors->val[iptr_main*9+3];
265     A22=gs->factors->val[iptr_main*9+4];
266     A32=gs->factors->val[iptr_main*9+5];
267     A13=gs->factors->val[iptr_main*9+6];
268     A23=gs->factors->val[iptr_main*9+7];
269     A33=gs->factors->val[iptr_main*9+8];
270     D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
271     if (ABS(D)>0.) {
272     D=1./D;
273     S11=(A22*A33-A23*A32)*D;
274     S21=(A31*A23-A21*A33)*D;
275     S31=(A21*A32-A31*A22)*D;
276     S12=(A13*A32-A12*A33)*D;
277     S22=(A11*A33-A31*A13)*D;
278     S32=(A12*A31-A11*A32)*D;
279     S13=(A12*A23-A13*A22)*D;
280     S23=(A13*A21-A11*A23)*D;
281     S33=(A11*A22-A12*A21)*D;
282     /* a_ik=a_ii^{-1}*a_ik */
283     x[3*i ]=S11*S1+S12*S2+S13*S3;
284     x[3*i+1]=S21*S1+S22*S2+S23*S3;
285     x[3*i+2]=S31*S1+S32*S2+S33*S3;
286     } else {
287     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
288     }
289     }
290     }
291     }
292     }
293    
294     /* Multipling with diag(A) */
295     Paso_Solver_applyBlockDiagonalMatrix(gs->n_block,gs->n,gs->diag,pivot,x,x);
296    
297     /* backward substitution */
298     for (color=(gs->num_colors)-1;color>-1;--color) {
299     if (n_block==1) {
300 artak 2998 /*#pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)*/
301 jfenwick 1851 for (i = 0; i < n; ++i) {
302     if (gs->colorOf[i]==color) {
303     /* x_i=x_i-a_ik*x_k */
304     S1=x[i];
305     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
306     k=gs->pattern->index[iptr_ik];
307     if (gs->colorOf[k]>color) {
308     R1=x[k];
309     S1-=gs->factors->val[iptr_ik]*R1;
310     }
311     }
312     /*x[i]=S1;*/
313     iptr_main=gs->main_iptr[i];
314     x[i]=(1/gs->factors->val[iptr_main])*S1;
315     }
316     }
317     } else if (n_block==2) {
318 artak 2998 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2,iptr_main,D,A11,A21,A12,A22,S11,S21,S12,S22)
319 jfenwick 1851 for (i = 0; i < n; ++i) {
320     if (gs->colorOf[i]==color) {
321     /* x_i=x_i-a_ik*x_k */
322     S1=x[2*i];
323     S2=x[2*i+1];
324     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
325     k=gs->pattern->index[iptr_ik];
326     if (gs->colorOf[k]>color) {
327     R1=x[2*k];
328     R2=x[2*k+1];
329     S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
330     S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
331     }
332     }
333     /*x[2*i]=S1;
334     x[2*i+1]=S2;*/
335     iptr_main=gs->main_iptr[i];
336     A11=gs->factors->val[iptr_main*4];
337     A21=gs->factors->val[iptr_main*4+1];
338     A12=gs->factors->val[iptr_main*4+2];
339     A22=gs->factors->val[iptr_main*4+3];
340     D = A11*A22-A12*A21;
341     if (ABS(D)>0.) {
342     D=1./D;
343     S11= A22*D;
344     S21=-A21*D;
345     S12=-A12*D;
346     S22= A11*D;
347     x[2*i ]=S11*S1+S12*S2;
348     x[2*i+1]=S21*S1+S22*S2;
349     } else {
350     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
351     }
352    
353     }
354     }
355     } else if (n_block==3) {
356 artak 2998 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3,iptr_main,D,A11,A21,A31,A12,A22,A32,A13,A23,A33,S11,S21,S31,S12,S22,S32,S13,S23,S33)
357 jfenwick 1851 for (i = 0; i < n; ++i) {
358     if (gs->colorOf[i]==color) {
359     /* x_i=x_i-a_ik*x_k */
360     S1=x[3*i ];
361     S2=x[3*i+1];
362     S3=x[3*i+2];
363     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
364     k=gs->pattern->index[iptr_ik];
365     if (gs->colorOf[k]>color) {
366     R1=x[3*k];
367     R2=x[3*k+1];
368     R3=x[3*k+2];
369     S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
370     S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
371     S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
372     }
373     }
374     /* x[3*i]=S1;
375     x[3*i+1]=S2;
376     x[3*i+2]=S3;
377     */ iptr_main=gs->main_iptr[i];
378     A11=gs->factors->val[iptr_main*9 ];
379     A21=gs->factors->val[iptr_main*9+1];
380     A31=gs->factors->val[iptr_main*9+2];
381     A12=gs->factors->val[iptr_main*9+3];
382     A22=gs->factors->val[iptr_main*9+4];
383     A32=gs->factors->val[iptr_main*9+5];
384     A13=gs->factors->val[iptr_main*9+6];
385     A23=gs->factors->val[iptr_main*9+7];
386     A33=gs->factors->val[iptr_main*9+8];
387     D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
388     if (ABS(D)>0.) {
389     D=1./D;
390     S11=(A22*A33-A23*A32)*D;
391     S21=(A31*A23-A21*A33)*D;
392     S31=(A21*A32-A31*A22)*D;
393     S12=(A13*A32-A12*A33)*D;
394     S22=(A11*A33-A31*A13)*D;
395     S32=(A12*A31-A11*A32)*D;
396     S13=(A12*A23-A13*A22)*D;
397     S23=(A13*A21-A11*A23)*D;
398     S33=(A11*A22-A12*A21)*D;
399     x[3*i ]=S11*S1+S12*S2+S13*S3;
400     x[3*i+1]=S21*S1+S22*S2+S23*S3;
401     x[3*i+2]=S31*S1+S32*S2+S33*S3;
402     } else {
403     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
404     }
405     }
406     }
407     }
408     }
409     return;
410     }
411    

  ViewVC Help
Powered by ViewVC 1.1.26