/[escript]/trunk/paso/src/Solver_GS.c
ViewVC logotype

Contents of /trunk/paso/src/Solver_GS.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1819 - (show annotations)
Tue Sep 30 05:58:06 2008 UTC (11 years, 6 months ago) by artak
File MIME type: text/plain
File size: 14795 byte(s)
Firs version of symmetric Gauss-Seidel preconditioner with coloring
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2008 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: GS preconditioner with reordering */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia 2003,2004,2005,2006,2007,2008 */
22 /* Author: artak@uq.edu.au */
23
24 /**************************************************************/
25
26 #include "Paso.h"
27 #include "Solver.h"
28 #include "PasoUtil.h"
29
30 #include <stdio.h>
31
32 /**************************************************************/
33
34 /* free all memory used by GS */
35
36 void Paso_Solver_GS_free(Paso_Solver_GS * in) {
37 if (in!=NULL) {
38 MEMFREE(in->colorOf);
39 Paso_SparseMatrix_free(in->factors);
40 MEMFREE(in->diag);
41 MEMFREE(in->main_iptr);
42 Paso_Pattern_free(in->pattern);
43 MEMFREE(in);
44 }
45 }
46
47 /**************************************************************/
48
49 /* constructs the incomplete block factorization of
50
51 */
52 Paso_Solver_GS* Paso_Solver_getGS(Paso_SparseMatrix * A,bool_t verbose) {
53 dim_t n=A->numRows;
54 dim_t n_block=A->row_block_size;
55 dim_t block_size=A->block_size;
56 index_t num_colors=0, *mis_marker=NULL;
57 register index_t i,iptr_main,iPtr,iptr_ik,k,iptr_kj,j,iptr_ij,color,color2;
58 double time0,time_color,time_fac;
59 /* allocations: */
60 Paso_Solver_GS* out=MEMALLOC(1,Paso_Solver_GS);
61 if (Paso_checkPtr(out)) return NULL;
62 out->colorOf=MEMALLOC(n,index_t);
63 out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
64 /*out->diag=MEMALLOC(A->len,double);*/
65 out->main_iptr=MEMALLOC(n,index_t);
66 out->pattern=Paso_Pattern_getReference(A->pattern);
67 out->factors=Paso_SparseMatrix_getReference(A);
68 out->n_block=n_block;
69 out->n=n;
70
71 if ( !(Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
72 time0=Paso_timer();
73 Paso_Pattern_color(A->pattern,&out->num_colors,out->colorOf);
74 time_color=Paso_timer()-time0;
75
76 if (Paso_noError()) {
77 time0=Paso_timer();
78
79 if (! (Paso_checkPtr(out->diag))) {
80 if (n_block==1) {
81 #pragma omp parallel for private(i, iPtr) schedule(static)
82 for (i = 0; i < A->pattern->numOutput; i++) {
83 out->diag[i]=1.;
84 /* find main diagonal */
85 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
86 if (A->pattern->index[iPtr]==i) {
87 iptr_main=iPtr;
88 out->diag[i]=A->val[iPtr];
89 break;
90 }
91 }
92 out->main_iptr[i]=iptr_main;
93 }
94 } else if (n_block==2) {
95 #pragma omp parallel for private(i, iPtr) schedule(static)
96 for (i = 0; i < A->pattern->numOutput; i++) {
97 out->diag[i*4+0]= 1.;
98 out->diag[i*4+1]= 0.;
99 out->diag[i*4+2]= 0.;
100 out->diag[i*4+3]= 1.;
101 /* find main diagonal */
102 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
103 if (A->pattern->index[iPtr]==i) {
104 iptr_main=iPtr;
105 out->diag[i*4]= A->val[iPtr*4];
106 out->diag[i*4+1]=A->val[iPtr*4+1];
107 out->diag[i*4+2]=A->val[iPtr*4+2];
108 out->diag[i*4+3]= A->val[iPtr*4+3];
109 break;
110 }
111 }
112 out->main_iptr[i]=iptr_main;
113 }
114 } else if (n_block==3) {
115 #pragma omp parallel for private(i, iPtr) schedule(static)
116 for (i = 0; i < A->pattern->numOutput; i++) {
117 out->diag[i*9 ]=1.;
118 out->diag[i*9+1]=0.;
119 out->diag[i*9+2]=0.;
120 out->diag[i*9+3]=0.;
121 out->diag[i*9+4]=1.;
122 out->diag[i*9+5]=0.;
123 out->diag[i*9+6]=0.;
124 out->diag[i*9+7]=0.;
125 out->diag[i*9+8]=1.;
126 /* find main diagonal */
127 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
128 if (A->pattern->index[iPtr]==i) {
129 iptr_main=iPtr;
130 out->diag[i*9 ]=A->val[iPtr*9 ];
131 out->diag[i*9+1]=A->val[iPtr*9+1];
132 out->diag[i*9+2]=A->val[iPtr*9+2];
133 out->diag[i*9+3]=A->val[iPtr*9+3];
134 out->diag[i*9+4]=A->val[iPtr*9+4];
135 out->diag[i*9+5]=A->val[iPtr*9+5];
136 out->diag[i*9+6]=A->val[iPtr*9+6];
137 out->diag[i*9+7]=A->val[iPtr*9+7];
138 out->diag[i*9+8]=A->val[iPtr*9+8];
139 break;
140 }
141 }
142 out->main_iptr[i]=iptr_main;
143 }
144 }
145 }
146
147 time_fac=Paso_timer()-time0;
148 }
149 }
150 if (Paso_noError()) {
151 if (verbose) {
152 printf("GS: %d color used \n",out->num_colors);
153 printf("timing: GS: coloring/elemination : %e/%e\n",time_color,time_fac);
154 }
155 return out;
156 } else {
157 Paso_Solver_GS_free(out);
158 return NULL;
159 }
160 }
161
162 /**************************************************************/
163
164 /* apply GS precondition b-> x
165
166 in fact it solves LD^{-1}Ux=b in the form x= U^{-1} D L^{-1}b
167
168 should be called within a parallel region
169 barrier synconization should be performed to make sure that the input vector available
170
171 */
172
173 void Paso_Solver_solveGS(Paso_Solver_GS * gs, double * x, double * b) {
174 register dim_t i,k;
175 register index_t color,iptr_ik,iptr_main;
176 register double S1,S2,S3,R1,R2,R3;
177 dim_t n_block=gs->n_block;
178 dim_t n=gs->n;
179 index_t* pivot=NULL;
180
181 /* copy x into b*/
182 #pragma omp parallel for private(i) schedule(static)
183 for (i=0;i<n*n_block;++i) x[i]=b[i];
184 /* forward substitution */
185 for (color=0;color<gs->num_colors;++color) {
186 if (n_block==1) {
187 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
188 for (i = 0; i < n; ++i) {
189 if (gs->colorOf[i]==color) {
190 /* x_i=x_i-a_ik*x_k */
191 S1=x[i];
192 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
193 k=gs->pattern->index[iptr_ik];
194 if (gs->colorOf[k]<color) {
195 R1=x[k];
196 S1-=gs->factors->val[iptr_ik]*R1;
197 }
198 }
199 iptr_main=gs->main_iptr[i];
200 x[i]=(1/gs->factors->val[iptr_main])*S1;
201 }
202 }
203 } else if (n_block==2) {
204 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
205 for (i = 0; i < n; ++i) {
206 if (gs->colorOf[i]==color) {
207 /* x_i=x_i-a_ik*x_k */
208 S1=x[2*i];
209 S2=x[2*i+1];
210 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
211 k=gs->pattern->index[iptr_ik];
212 if (gs->colorOf[k]<color) {
213 R1=x[2*k];
214 R2=x[2*k+1];
215 S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
216 S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
217 }
218 }
219 iptr_main=gs->main_iptr[i];
220 x[2*i ]=(1/gs->factors->val[4*iptr_main ])*S1+(1/gs->factors->val[4*iptr_main+2])*S2;
221 x[2*i+1]=(1/gs->factors->val[4*iptr_main+1])*S1+(1/gs->factors->val[4*iptr_main+3])*S2;
222 }
223
224 }
225 } else if (n_block==3) {
226 #pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
227 for (i = 0; i < n; ++i) {
228 if (gs->colorOf[i]==color) {
229 /* x_i=x_i-a_ik*x_k */
230 S1=x[3*i];
231 S2=x[3*i+1];
232 S3=x[3*i+2];
233 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
234 k=gs->pattern->index[iptr_ik];
235 if (gs->colorOf[k]<color) {
236 R1=x[3*k];
237 R2=x[3*k+1];
238 R3=x[3*k+2];
239 S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
240 S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
241 S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
242 }
243 }
244 iptr_main=gs->main_iptr[i];
245 x[3*i ]=(1/gs->factors->val[9*iptr_main ])*S1+(1/gs->factors->val[9*iptr_main+3])*S2+(1/gs->factors->val[9*iptr_main+6])*S3;
246 x[3*i+1]=(1/gs->factors->val[9*iptr_main+1])*S1+(1/gs->factors->val[9*iptr_main+4])*S2+(1/gs->factors->val[9*iptr_main+7])*S3;
247 x[3*i+2]=(1/gs->factors->val[9*iptr_main+2])*S1+(1/gs->factors->val[9*iptr_main+5])*S2+(1/gs->factors->val[9*iptr_main+8])*S3;
248 }
249 }
250 }
251 }
252
253 /* Multipling with diag(A) */
254 Paso_Solver_applyBlockDiagonalMatrix(gs->n_block,gs->n,gs->diag,pivot,x,x);
255
256 /* backward substitution */
257 for (color=(gs->num_colors)-1;color>-1;--color) {
258 if (n_block==1) {
259 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1)
260 for (i = 0; i < n; ++i) {
261 if (gs->colorOf[i]==color) {
262 /* x_i=x_i-a_ik*x_k */
263 S1=x[i];
264 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
265 k=gs->pattern->index[iptr_ik];
266 if (gs->colorOf[k]>color) {
267 R1=x[k];
268 S1-=gs->factors->val[iptr_ik]*R1;
269 }
270 }
271 /*x[i]=S1;*/
272 iptr_main=gs->main_iptr[i];
273 x[i]=(1/gs->factors->val[iptr_main])*S1;
274 }
275 }
276 } else if (n_block==2) {
277 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
278 for (i = 0; i < n; ++i) {
279 if (gs->colorOf[i]==color) {
280 /* x_i=x_i-a_ik*x_k */
281 S1=x[2*i];
282 S2=x[2*i+1];
283 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
284 k=gs->pattern->index[iptr_ik];
285 if (gs->colorOf[k]>color) {
286 R1=x[2*k];
287 R2=x[2*k+1];
288 S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
289 S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
290 }
291 }
292 /*x[2*i]=S1;
293 x[2*i+1]=S2;*/
294 iptr_main=gs->main_iptr[i];
295 x[2*i ]=(1/gs->factors->val[4*iptr_main ])*S1+(1/gs->factors->val[4*iptr_main+2])*S2;
296 x[2*i+1]=(1/gs->factors->val[4*iptr_main+1])*S1+(1/gs->factors->val[4*iptr_main+3])*S2;
297 }
298 }
299 } else if (n_block==3) {
300 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
301 for (i = 0; i < n; ++i) {
302 if (gs->colorOf[i]==color) {
303 /* x_i=x_i-a_ik*x_k */
304 S1=x[3*i ];
305 S2=x[3*i+1];
306 S3=x[3*i+2];
307 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
308 k=gs->pattern->index[iptr_ik];
309 if (gs->colorOf[k]>color) {
310 R1=x[3*k];
311 R2=x[3*k+1];
312 R3=x[3*k+2];
313 S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
314 S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
315 S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
316 }
317 }
318 /* x[3*i]=S1;
319 x[3*i+1]=S2;
320 x[3*i+2]=S3;
321 */ iptr_main=gs->main_iptr[i];
322 x[3*i ]=(1/gs->factors->val[9*iptr_main ])*S1+(1/gs->factors->val[9*iptr_main+3])*S2+(1/gs->factors->val[9*iptr_main+6])*S3;
323 x[3*i+1]=(1/gs->factors->val[9*iptr_main+1])*S1+(1/gs->factors->val[9*iptr_main+4])*S2+(1/gs->factors->val[9*iptr_main+7])*S3;
324 x[3*i+2]=(1/gs->factors->val[9*iptr_main+2])*S1+(1/gs->factors->val[9*iptr_main+5])*S2+(1/gs->factors->val[9*iptr_main+8])*S3;
325
326 }
327 }
328 }
329 }
330 return;
331 }
332

  ViewVC Help
Powered by ViewVC 1.1.26