/[escript]/trunk/paso/src/Smoother.c
ViewVC logotype

Contents of /trunk/paso/src/Smoother.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3005 - (show annotations)
Thu Apr 22 05:59:31 2010 UTC (8 years, 9 months ago) by gross
Original Path: trunk/paso/src/Solver_GS.c
File MIME type: text/plain
File size: 18408 byte(s)
early call of setPreconditioner in FCT solver removed.
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: GS preconditioner with reordering */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia 2003,2004,2005,2006,2007,2008 */
22 /* Author: artak@uq.edu.au */
23
24 /**************************************************************/
25
26 #include "Paso.h"
27 #include "Solver.h"
28 #include "PasoUtil.h"
29
30 #include <stdio.h>
31
32 /**************************************************************/
33
34 /* free all memory used by GS */
35
36 void Paso_Solver_GS_free(Paso_Solver_GS * in) {
37 if (in!=NULL) {
38 MEMFREE(in->colorOf);
39 Paso_SparseMatrix_free(in->factors);
40 MEMFREE(in->diag);
41 MEMFREE(in->main_iptr);
42 Paso_Pattern_free(in->pattern);
43 MEMFREE(in);
44 }
45 }
46
47 /**************************************************************/
48
49 /* constructs the incomplete block factorization of
50
51 */
52 Paso_Solver_GS* Paso_Solver_getGS(Paso_SparseMatrix * A,bool_t verbose) {
53 dim_t n=A->numRows;
54 dim_t n_block=A->row_block_size;
55 dim_t block_size=A->block_size;
56 register index_t i,iptr_main=0,iPtr;
57 double time0=0,time_color=0,time_fac=0;
58 /* allocations: */
59 Paso_Solver_GS* out=MEMALLOC(1,Paso_Solver_GS);
60 if (Paso_checkPtr(out)) return NULL;
61 out->colorOf=MEMALLOC(n,index_t);
62 out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
63 /*out->diag=MEMALLOC(A->len,double);*/
64 out->main_iptr=MEMALLOC(n,index_t);
65 out->pattern=Paso_Pattern_getReference(A->pattern);
66 out->factors=Paso_SparseMatrix_getReference(A);
67 out->n_block=n_block;
68 out->n=n;
69
70 if ( !(Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
71 time0=Paso_timer();
72 Paso_Pattern_color(A->pattern,&out->num_colors,out->colorOf);
73 time_color=Paso_timer()-time0;
74
75 if (Paso_noError()) {
76 time0=Paso_timer();
77
78 if (! (Paso_checkPtr(out->diag))) {
79 if (n_block==1) {
80 #pragma omp parallel for private(i,iPtr,iptr_main) schedule(static)
81 for (i = 0; i < A->pattern->numOutput; i++) {
82 iptr_main=0;
83 out->diag[i]=1.;
84 /* find main diagonal */
85 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
86 if (A->pattern->index[iPtr]==i) {
87 iptr_main=iPtr;
88 out->diag[i]=A->val[iPtr];
89 break;
90 }
91 }
92 out->main_iptr[i]=iptr_main;
93 }
94 } else if (n_block==2) {
95 #pragma omp parallel for private(i,iPtr,iptr_main) schedule(static)
96 for (i = 0; i < A->pattern->numOutput; i++) {
97 out->diag[i*4+0]= 1.;
98 out->diag[i*4+1]= 0.;
99 out->diag[i*4+2]= 0.;
100 out->diag[i*4+3]= 1.;
101 iptr_main=0;
102 /* find main diagonal */
103 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
104 if (A->pattern->index[iPtr]==i) {
105 iptr_main=iPtr;
106 out->diag[i*4]= A->val[iPtr*4];
107 out->diag[i*4+1]=A->val[iPtr*4+1];
108 out->diag[i*4+2]=A->val[iPtr*4+2];
109 out->diag[i*4+3]= A->val[iPtr*4+3];
110 break;
111 }
112 }
113 out->main_iptr[i]=iptr_main;
114 }
115 } else if (n_block==3) {
116 #pragma omp parallel for private(i, iPtr,iptr_main) schedule(static)
117 for (i = 0; i < A->pattern->numOutput; i++) {
118 out->diag[i*9 ]=1.;
119 out->diag[i*9+1]=0.;
120 out->diag[i*9+2]=0.;
121 out->diag[i*9+3]=0.;
122 out->diag[i*9+4]=1.;
123 out->diag[i*9+5]=0.;
124 out->diag[i*9+6]=0.;
125 out->diag[i*9+7]=0.;
126 out->diag[i*9+8]=1.;
127 iptr_main=0;
128 /* find main diagonal */
129 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
130 if (A->pattern->index[iPtr]==i) {
131 iptr_main=iPtr;
132 out->diag[i*9 ]=A->val[iPtr*9 ];
133 out->diag[i*9+1]=A->val[iPtr*9+1];
134 out->diag[i*9+2]=A->val[iPtr*9+2];
135 out->diag[i*9+3]=A->val[iPtr*9+3];
136 out->diag[i*9+4]=A->val[iPtr*9+4];
137 out->diag[i*9+5]=A->val[iPtr*9+5];
138 out->diag[i*9+6]=A->val[iPtr*9+6];
139 out->diag[i*9+7]=A->val[iPtr*9+7];
140 out->diag[i*9+8]=A->val[iPtr*9+8];
141 break;
142 }
143 }
144 out->main_iptr[i]=iptr_main;
145 }
146 }
147 }
148
149 time_fac=Paso_timer()-time0;
150 }
151 }
152 if (Paso_noError()) {
153 if (verbose) {
154 printf("GS: %d color used \n",out->num_colors);
155 printf("timing: GS: coloring/elemination : %e/%e\n",time_color,time_fac);
156 }
157 return out;
158 } else {
159 Paso_Solver_GS_free(out);
160 return NULL;
161 }
162 }
163
164 /**************************************************************/
165
166 /* apply GS precondition b-> x
167
168 in fact it solves LD^{-1}Ux=b in the form x= U^{-1} D L^{-1}b
169
170 should be called within a parallel region
171 barrier synconization should be performed to make sure that the input vector available
172
173 */
174
175 void Paso_Solver_solveGS(Paso_Solver_GS * gs, double * x, double * b) {
176 register dim_t i,k;
177 register index_t color,iptr_ik,iptr_main;
178 register double A11,A12,A21,A22,A13,A23,A33,A32,A31,S1,S2,S3,R1,R2,R3,D,S21,S22,S12,S11,S13,S23,S33,S32,S31;
179 dim_t n_block=gs->n_block;
180 dim_t n=gs->n;
181 index_t* pivot=NULL;
182
183 /* copy x into b*/
184 #pragma omp parallel for private(i) schedule(static)
185 for (i=0;i<n*n_block;++i) x[i]=b[i];
186 /* forward substitution */
187 for (color=0;color<gs->num_colors;++color) {
188 if (n_block==1) {
189 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
190 for (i = 0; i < n; ++i) {
191 if (gs->colorOf[i]==color) {
192 /* x_i=x_i-a_ik*x_k */
193 S1=x[i];
194 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
195 k=gs->pattern->index[iptr_ik];
196 if (gs->colorOf[k]<color) {
197 R1=x[k];
198 S1-=gs->factors->val[iptr_ik]*R1;
199 }
200 }
201 iptr_main=gs->main_iptr[i];
202 x[i]=(1./gs->factors->val[iptr_main])*S1;
203 }
204 }
205 } else if (n_block==2) {
206 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2,A11,A21,A12,A22,D,S11,S21,S12,S22)
207 for (i = 0; i < n; ++i) {
208 if (gs->colorOf[i]==color) {
209 /* x_i=x_i-a_ik*x_k */
210 S1=x[2*i];
211 S2=x[2*i+1];
212 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
213 k=gs->pattern->index[iptr_ik];
214 if (gs->colorOf[k]<color) {
215 R1=x[2*k];
216 R2=x[2*k+1];
217 S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
218 S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
219 }
220 }
221 iptr_main=gs->main_iptr[i];
222 A11=gs->factors->val[iptr_main*4];
223 A21=gs->factors->val[iptr_main*4+1];
224 A12=gs->factors->val[iptr_main*4+2];
225 A22=gs->factors->val[iptr_main*4+3];
226 D = A11*A22-A12*A21;
227 if (ABS(D)>0.) {
228 D=1./D;
229 S11= A22*D;
230 S21=-A21*D;
231 S12=-A12*D;
232 S22= A11*D;
233 x[2*i ]=S11*S1+S12*S2;
234 x[2*i+1]=S21*S1+S22*S2;
235 } else {
236 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
237 }
238 }
239
240 }
241 } else if (n_block==3) {
242 #pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3,A11,A21,A31,A12,A22,A32,A13,A23,A33,D,S11,S21,S31,S12,S22,S32,S13,S23,S33)
243 for (i = 0; i < n; ++i) {
244 if (gs->colorOf[i]==color) {
245 /* x_i=x_i-a_ik*x_k */
246 S1=x[3*i];
247 S2=x[3*i+1];
248 S3=x[3*i+2];
249 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
250 k=gs->pattern->index[iptr_ik];
251 if (gs->colorOf[k]<color) {
252 R1=x[3*k];
253 R2=x[3*k+1];
254 R3=x[3*k+2];
255 S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
256 S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
257 S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
258 }
259 }
260 iptr_main=gs->main_iptr[i];
261 A11=gs->factors->val[iptr_main*9 ];
262 A21=gs->factors->val[iptr_main*9+1];
263 A31=gs->factors->val[iptr_main*9+2];
264 A12=gs->factors->val[iptr_main*9+3];
265 A22=gs->factors->val[iptr_main*9+4];
266 A32=gs->factors->val[iptr_main*9+5];
267 A13=gs->factors->val[iptr_main*9+6];
268 A23=gs->factors->val[iptr_main*9+7];
269 A33=gs->factors->val[iptr_main*9+8];
270 D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
271 if (ABS(D)>0.) {
272 D=1./D;
273 S11=(A22*A33-A23*A32)*D;
274 S21=(A31*A23-A21*A33)*D;
275 S31=(A21*A32-A31*A22)*D;
276 S12=(A13*A32-A12*A33)*D;
277 S22=(A11*A33-A31*A13)*D;
278 S32=(A12*A31-A11*A32)*D;
279 S13=(A12*A23-A13*A22)*D;
280 S23=(A13*A21-A11*A23)*D;
281 S33=(A11*A22-A12*A21)*D;
282 /* a_ik=a_ii^{-1}*a_ik */
283 x[3*i ]=S11*S1+S12*S2+S13*S3;
284 x[3*i+1]=S21*S1+S22*S2+S23*S3;
285 x[3*i+2]=S31*S1+S32*S2+S33*S3;
286 } else {
287 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
288 }
289 }
290 }
291 }
292 }
293
294 /* Multipling with diag(A) */
295 Paso_Solver_applyBlockDiagonalMatrix(gs->n_block,gs->n,gs->diag,pivot,x,x);
296
297 /* backward substitution */
298 for (color=(gs->num_colors)-1;color>-1;--color) {
299 if (n_block==1) {
300 /*#pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)*/
301 for (i = 0; i < n; ++i) {
302 if (gs->colorOf[i]==color) {
303 /* x_i=x_i-a_ik*x_k */
304 S1=x[i];
305 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
306 k=gs->pattern->index[iptr_ik];
307 if (gs->colorOf[k]>color) {
308 R1=x[k];
309 S1-=gs->factors->val[iptr_ik]*R1;
310 }
311 }
312 /*x[i]=S1;*/
313 iptr_main=gs->main_iptr[i];
314 x[i]=(1/gs->factors->val[iptr_main])*S1;
315 }
316 }
317 } else if (n_block==2) {
318 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2,iptr_main,D,A11,A21,A12,A22,S11,S21,S12,S22)
319 for (i = 0; i < n; ++i) {
320 if (gs->colorOf[i]==color) {
321 /* x_i=x_i-a_ik*x_k */
322 S1=x[2*i];
323 S2=x[2*i+1];
324 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
325 k=gs->pattern->index[iptr_ik];
326 if (gs->colorOf[k]>color) {
327 R1=x[2*k];
328 R2=x[2*k+1];
329 S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
330 S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
331 }
332 }
333 /*x[2*i]=S1;
334 x[2*i+1]=S2;*/
335 iptr_main=gs->main_iptr[i];
336 A11=gs->factors->val[iptr_main*4];
337 A21=gs->factors->val[iptr_main*4+1];
338 A12=gs->factors->val[iptr_main*4+2];
339 A22=gs->factors->val[iptr_main*4+3];
340 D = A11*A22-A12*A21;
341 if (ABS(D)>0.) {
342 D=1./D;
343 S11= A22*D;
344 S21=-A21*D;
345 S12=-A12*D;
346 S22= A11*D;
347 x[2*i ]=S11*S1+S12*S2;
348 x[2*i+1]=S21*S1+S22*S2;
349 } else {
350 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
351 }
352
353 }
354 }
355 } else if (n_block==3) {
356 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3,iptr_main,D,A11,A21,A31,A12,A22,A32,A13,A23,A33,S11,S21,S31,S12,S22,S32,S13,S23,S33)
357 for (i = 0; i < n; ++i) {
358 if (gs->colorOf[i]==color) {
359 /* x_i=x_i-a_ik*x_k */
360 S1=x[3*i ];
361 S2=x[3*i+1];
362 S3=x[3*i+2];
363 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
364 k=gs->pattern->index[iptr_ik];
365 if (gs->colorOf[k]>color) {
366 R1=x[3*k];
367 R2=x[3*k+1];
368 R3=x[3*k+2];
369 S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
370 S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
371 S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
372 }
373 }
374 /* x[3*i]=S1;
375 x[3*i+1]=S2;
376 x[3*i+2]=S3;
377 */ iptr_main=gs->main_iptr[i];
378 A11=gs->factors->val[iptr_main*9 ];
379 A21=gs->factors->val[iptr_main*9+1];
380 A31=gs->factors->val[iptr_main*9+2];
381 A12=gs->factors->val[iptr_main*9+3];
382 A22=gs->factors->val[iptr_main*9+4];
383 A32=gs->factors->val[iptr_main*9+5];
384 A13=gs->factors->val[iptr_main*9+6];
385 A23=gs->factors->val[iptr_main*9+7];
386 A33=gs->factors->val[iptr_main*9+8];
387 D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
388 if (ABS(D)>0.) {
389 D=1./D;
390 S11=(A22*A33-A23*A32)*D;
391 S21=(A31*A23-A21*A33)*D;
392 S31=(A21*A32-A31*A22)*D;
393 S12=(A13*A32-A12*A33)*D;
394 S22=(A11*A33-A31*A13)*D;
395 S32=(A12*A31-A11*A32)*D;
396 S13=(A12*A23-A13*A22)*D;
397 S23=(A13*A21-A11*A23)*D;
398 S33=(A11*A22-A12*A21)*D;
399 x[3*i ]=S11*S1+S12*S2+S13*S3;
400 x[3*i+1]=S21*S1+S22*S2+S23*S3;
401 x[3*i+2]=S31*S1+S32*S2+S33*S3;
402 } else {
403 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
404 }
405 }
406 }
407 }
408 }
409 return;
410 }
411

  ViewVC Help
Powered by ViewVC 1.1.26