/[escript]/trunk/paso/src/Smoother.c
ViewVC logotype

Contents of /trunk/paso/src/Smoother.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1975 - (show annotations)
Thu Nov 6 03:07:02 2008 UTC (10 years, 4 months ago) by artak
Original Path: trunk/paso/src/Solver_GS.c
File MIME type: text/plain
File size: 18032 byte(s)
Some variables are initialized to get rid of compiler warnings
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2008 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: GS preconditioner with reordering */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia 2003,2004,2005,2006,2007,2008 */
22 /* Author: artak@uq.edu.au */
23
24 /**************************************************************/
25
26 #include "Paso.h"
27 #include "Solver.h"
28 #include "PasoUtil.h"
29
30 #include <stdio.h>
31
32 /**************************************************************/
33
34 /* free all memory used by GS */
35
36 void Paso_Solver_GS_free(Paso_Solver_GS * in) {
37 if (in!=NULL) {
38 MEMFREE(in->colorOf);
39 Paso_SparseMatrix_free(in->factors);
40 MEMFREE(in->diag);
41 MEMFREE(in->main_iptr);
42 Paso_Pattern_free(in->pattern);
43 MEMFREE(in);
44 }
45 }
46
47 /**************************************************************/
48
49 /* constructs the incomplete block factorization of
50
51 */
52 Paso_Solver_GS* Paso_Solver_getGS(Paso_SparseMatrix * A,bool_t verbose) {
53 dim_t n=A->numRows;
54 dim_t n_block=A->row_block_size;
55 dim_t block_size=A->block_size;
56 register index_t i,iptr_main=0,iPtr;
57 double time0=0,time_color=0,time_fac=0;
58 /* allocations: */
59 Paso_Solver_GS* out=MEMALLOC(1,Paso_Solver_GS);
60 if (Paso_checkPtr(out)) return NULL;
61 out->colorOf=MEMALLOC(n,index_t);
62 out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
63 /*out->diag=MEMALLOC(A->len,double);*/
64 out->main_iptr=MEMALLOC(n,index_t);
65 out->pattern=Paso_Pattern_getReference(A->pattern);
66 out->factors=Paso_SparseMatrix_getReference(A);
67 out->n_block=n_block;
68 out->n=n;
69
70 if ( !(Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
71 time0=Paso_timer();
72 Paso_Pattern_color(A->pattern,&out->num_colors,out->colorOf);
73 time_color=Paso_timer()-time0;
74
75 if (Paso_noError()) {
76 time0=Paso_timer();
77
78 if (! (Paso_checkPtr(out->diag))) {
79 if (n_block==1) {
80 #pragma omp parallel for private(i, iPtr) schedule(static)
81 for (i = 0; i < A->pattern->numOutput; i++) {
82 out->diag[i]=1.;
83 /* find main diagonal */
84 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
85 if (A->pattern->index[iPtr]==i) {
86 iptr_main=iPtr;
87 out->diag[i]=A->val[iPtr];
88 break;
89 }
90 }
91 out->main_iptr[i]=iptr_main;
92 }
93 } else if (n_block==2) {
94 #pragma omp parallel for private(i, iPtr) schedule(static)
95 for (i = 0; i < A->pattern->numOutput; i++) {
96 out->diag[i*4+0]= 1.;
97 out->diag[i*4+1]= 0.;
98 out->diag[i*4+2]= 0.;
99 out->diag[i*4+3]= 1.;
100 /* find main diagonal */
101 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
102 if (A->pattern->index[iPtr]==i) {
103 iptr_main=iPtr;
104 out->diag[i*4]= A->val[iPtr*4];
105 out->diag[i*4+1]=A->val[iPtr*4+1];
106 out->diag[i*4+2]=A->val[iPtr*4+2];
107 out->diag[i*4+3]= A->val[iPtr*4+3];
108 break;
109 }
110 }
111 out->main_iptr[i]=iptr_main;
112 }
113 } else if (n_block==3) {
114 #pragma omp parallel for private(i, iPtr) schedule(static)
115 for (i = 0; i < A->pattern->numOutput; i++) {
116 out->diag[i*9 ]=1.;
117 out->diag[i*9+1]=0.;
118 out->diag[i*9+2]=0.;
119 out->diag[i*9+3]=0.;
120 out->diag[i*9+4]=1.;
121 out->diag[i*9+5]=0.;
122 out->diag[i*9+6]=0.;
123 out->diag[i*9+7]=0.;
124 out->diag[i*9+8]=1.;
125 /* find main diagonal */
126 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
127 if (A->pattern->index[iPtr]==i) {
128 iptr_main=iPtr;
129 out->diag[i*9 ]=A->val[iPtr*9 ];
130 out->diag[i*9+1]=A->val[iPtr*9+1];
131 out->diag[i*9+2]=A->val[iPtr*9+2];
132 out->diag[i*9+3]=A->val[iPtr*9+3];
133 out->diag[i*9+4]=A->val[iPtr*9+4];
134 out->diag[i*9+5]=A->val[iPtr*9+5];
135 out->diag[i*9+6]=A->val[iPtr*9+6];
136 out->diag[i*9+7]=A->val[iPtr*9+7];
137 out->diag[i*9+8]=A->val[iPtr*9+8];
138 break;
139 }
140 }
141 out->main_iptr[i]=iptr_main;
142 }
143 }
144 }
145
146 time_fac=Paso_timer()-time0;
147 }
148 }
149 if (Paso_noError()) {
150 if (verbose) {
151 printf("GS: %d color used \n",out->num_colors);
152 printf("timing: GS: coloring/elemination : %e/%e\n",time_color,time_fac);
153 }
154 return out;
155 } else {
156 Paso_Solver_GS_free(out);
157 return NULL;
158 }
159 }
160
161 /**************************************************************/
162
163 /* apply GS precondition b-> x
164
165 in fact it solves LD^{-1}Ux=b in the form x= U^{-1} D L^{-1}b
166
167 should be called within a parallel region
168 barrier synconization should be performed to make sure that the input vector available
169
170 */
171
172 void Paso_Solver_solveGS(Paso_Solver_GS * gs, double * x, double * b) {
173 register dim_t i,k;
174 register index_t color,iptr_ik,iptr_main;
175 register double A11,A12,A21,A22,A13,A23,A33,A32,A31,S1,S2,S3,R1,R2,R3,D,S21,S22,S12,S11,S13,S23,S33,S32,S31;
176 dim_t n_block=gs->n_block;
177 dim_t n=gs->n;
178 index_t* pivot=NULL;
179
180 /* copy x into b*/
181 #pragma omp parallel for private(i) schedule(static)
182 for (i=0;i<n*n_block;++i) x[i]=b[i];
183 /* forward substitution */
184 for (color=0;color<gs->num_colors;++color) {
185 if (n_block==1) {
186 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
187 for (i = 0; i < n; ++i) {
188 if (gs->colorOf[i]==color) {
189 /* x_i=x_i-a_ik*x_k */
190 S1=x[i];
191 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
192 k=gs->pattern->index[iptr_ik];
193 if (gs->colorOf[k]<color) {
194 R1=x[k];
195 S1-=gs->factors->val[iptr_ik]*R1;
196 }
197 }
198 iptr_main=gs->main_iptr[i];
199 x[i]=(1/gs->factors->val[iptr_main])*S1;
200 }
201 }
202 } else if (n_block==2) {
203 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
204 for (i = 0; i < n; ++i) {
205 if (gs->colorOf[i]==color) {
206 /* x_i=x_i-a_ik*x_k */
207 S1=x[2*i];
208 S2=x[2*i+1];
209 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
210 k=gs->pattern->index[iptr_ik];
211 if (gs->colorOf[k]<color) {
212 R1=x[2*k];
213 R2=x[2*k+1];
214 S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
215 S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
216 }
217 }
218 iptr_main=gs->main_iptr[i];
219 A11=gs->factors->val[iptr_main*4];
220 A21=gs->factors->val[iptr_main*4+1];
221 A12=gs->factors->val[iptr_main*4+2];
222 A22=gs->factors->val[iptr_main*4+3];
223 D = A11*A22-A12*A21;
224 if (ABS(D)>0.) {
225 D=1./D;
226 S11= A22*D;
227 S21=-A21*D;
228 S12=-A12*D;
229 S22= A11*D;
230 x[2*i ]=S11*S1+S12*S2;
231 x[2*i+1]=S21*S1+S22*S2;
232 } else {
233 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
234 }
235 }
236
237 }
238 } else if (n_block==3) {
239 #pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
240 for (i = 0; i < n; ++i) {
241 if (gs->colorOf[i]==color) {
242 /* x_i=x_i-a_ik*x_k */
243 S1=x[3*i];
244 S2=x[3*i+1];
245 S3=x[3*i+2];
246 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
247 k=gs->pattern->index[iptr_ik];
248 if (gs->colorOf[k]<color) {
249 R1=x[3*k];
250 R2=x[3*k+1];
251 R3=x[3*k+2];
252 S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
253 S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
254 S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
255 }
256 }
257 iptr_main=gs->main_iptr[i];
258 A11=gs->factors->val[iptr_main*9 ];
259 A21=gs->factors->val[iptr_main*9+1];
260 A31=gs->factors->val[iptr_main*9+2];
261 A12=gs->factors->val[iptr_main*9+3];
262 A22=gs->factors->val[iptr_main*9+4];
263 A32=gs->factors->val[iptr_main*9+5];
264 A13=gs->factors->val[iptr_main*9+6];
265 A23=gs->factors->val[iptr_main*9+7];
266 A33=gs->factors->val[iptr_main*9+8];
267 D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
268 if (ABS(D)>0.) {
269 D=1./D;
270 S11=(A22*A33-A23*A32)*D;
271 S21=(A31*A23-A21*A33)*D;
272 S31=(A21*A32-A31*A22)*D;
273 S12=(A13*A32-A12*A33)*D;
274 S22=(A11*A33-A31*A13)*D;
275 S32=(A12*A31-A11*A32)*D;
276 S13=(A12*A23-A13*A22)*D;
277 S23=(A13*A21-A11*A23)*D;
278 S33=(A11*A22-A12*A21)*D;
279 /* a_ik=a_ii^{-1}*a_ik */
280 x[3*i ]=S11*S1+S12*S2+S13*S3;
281 x[3*i+1]=S21*S1+S22*S2+S23*S3;
282 x[3*i+2]=S31*S1+S32*S2+S33*S3;
283 } else {
284 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
285 }
286 }
287 }
288 }
289 }
290
291 /* Multipling with diag(A) */
292 Paso_Solver_applyBlockDiagonalMatrix(gs->n_block,gs->n,gs->diag,pivot,x,x);
293
294 /* backward substitution */
295 for (color=(gs->num_colors)-1;color>-1;--color) {
296 if (n_block==1) {
297 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1)
298 for (i = 0; i < n; ++i) {
299 if (gs->colorOf[i]==color) {
300 /* x_i=x_i-a_ik*x_k */
301 S1=x[i];
302 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
303 k=gs->pattern->index[iptr_ik];
304 if (gs->colorOf[k]>color) {
305 R1=x[k];
306 S1-=gs->factors->val[iptr_ik]*R1;
307 }
308 }
309 /*x[i]=S1;*/
310 iptr_main=gs->main_iptr[i];
311 x[i]=(1/gs->factors->val[iptr_main])*S1;
312 }
313 }
314 } else if (n_block==2) {
315 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
316 for (i = 0; i < n; ++i) {
317 if (gs->colorOf[i]==color) {
318 /* x_i=x_i-a_ik*x_k */
319 S1=x[2*i];
320 S2=x[2*i+1];
321 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
322 k=gs->pattern->index[iptr_ik];
323 if (gs->colorOf[k]>color) {
324 R1=x[2*k];
325 R2=x[2*k+1];
326 S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
327 S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
328 }
329 }
330 /*x[2*i]=S1;
331 x[2*i+1]=S2;*/
332 iptr_main=gs->main_iptr[i];
333 A11=gs->factors->val[iptr_main*4];
334 A21=gs->factors->val[iptr_main*4+1];
335 A12=gs->factors->val[iptr_main*4+2];
336 A22=gs->factors->val[iptr_main*4+3];
337 D = A11*A22-A12*A21;
338 if (ABS(D)>0.) {
339 D=1./D;
340 S11= A22*D;
341 S21=-A21*D;
342 S12=-A12*D;
343 S22= A11*D;
344 x[2*i ]=S11*S1+S12*S2;
345 x[2*i+1]=S21*S1+S22*S2;
346 } else {
347 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
348 }
349
350 }
351 }
352 } else if (n_block==3) {
353 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
354 for (i = 0; i < n; ++i) {
355 if (gs->colorOf[i]==color) {
356 /* x_i=x_i-a_ik*x_k */
357 S1=x[3*i ];
358 S2=x[3*i+1];
359 S3=x[3*i+2];
360 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
361 k=gs->pattern->index[iptr_ik];
362 if (gs->colorOf[k]>color) {
363 R1=x[3*k];
364 R2=x[3*k+1];
365 R3=x[3*k+2];
366 S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
367 S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
368 S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
369 }
370 }
371 /* x[3*i]=S1;
372 x[3*i+1]=S2;
373 x[3*i+2]=S3;
374 */ iptr_main=gs->main_iptr[i];
375 A11=gs->factors->val[iptr_main*9 ];
376 A21=gs->factors->val[iptr_main*9+1];
377 A31=gs->factors->val[iptr_main*9+2];
378 A12=gs->factors->val[iptr_main*9+3];
379 A22=gs->factors->val[iptr_main*9+4];
380 A32=gs->factors->val[iptr_main*9+5];
381 A13=gs->factors->val[iptr_main*9+6];
382 A23=gs->factors->val[iptr_main*9+7];
383 A33=gs->factors->val[iptr_main*9+8];
384 D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
385 if (ABS(D)>0.) {
386 D=1./D;
387 S11=(A22*A33-A23*A32)*D;
388 S21=(A31*A23-A21*A33)*D;
389 S31=(A21*A32-A31*A22)*D;
390 S12=(A13*A32-A12*A33)*D;
391 S22=(A11*A33-A31*A13)*D;
392 S32=(A12*A31-A11*A32)*D;
393 S13=(A12*A23-A13*A22)*D;
394 S23=(A13*A21-A11*A23)*D;
395 S33=(A11*A22-A12*A21)*D;
396 x[3*i ]=S11*S1+S12*S2+S13*S3;
397 x[3*i+1]=S21*S1+S22*S2+S23*S3;
398 x[3*i+2]=S31*S1+S32*S2+S33*S3;
399 } else {
400 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
401 }
402 }
403 }
404 }
405 }
406 return;
407 }
408

  ViewVC Help
Powered by ViewVC 1.1.26