/[escript]/branches/more_shared_ptrs_from_1812/paso/src/Solver_GS.c
ViewVC logotype

Contents of /branches/more_shared_ptrs_from_1812/paso/src/Solver_GS.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1851 - (show annotations)
Mon Oct 6 03:16:43 2008 UTC (10 years, 5 months ago) by jfenwick
File MIME type: text/plain
File size: 18113 byte(s)
Branch commit.
Added files while the merge op did not add.
Modified shake59 config for non-ken users.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2008 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: GS preconditioner with reordering */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia 2003,2004,2005,2006,2007,2008 */
22 /* Author: artak@uq.edu.au */
23
24 /**************************************************************/
25
26 #include "Paso.h"
27 #include "Solver.h"
28 #include "PasoUtil.h"
29
30 #include <stdio.h>
31
32 /**************************************************************/
33
34 /* free all memory used by GS */
35
36 void Paso_Solver_GS_free(Paso_Solver_GS * in) {
37 if (in!=NULL) {
38 MEMFREE(in->colorOf);
39 Paso_SparseMatrix_free(in->factors);
40 MEMFREE(in->diag);
41 MEMFREE(in->main_iptr);
42 Paso_Pattern_free(in->pattern);
43 MEMFREE(in);
44 }
45 }
46
47 /**************************************************************/
48
49 /* constructs the incomplete block factorization of
50
51 */
52 Paso_Solver_GS* Paso_Solver_getGS(Paso_SparseMatrix * A,bool_t verbose) {
53 dim_t n=A->numRows;
54 dim_t n_block=A->row_block_size;
55 dim_t block_size=A->block_size;
56 index_t num_colors=0, *mis_marker=NULL;
57 register index_t i,iptr_main,iPtr,iptr_ik,k,iptr_kj,j,iptr_ij,color,color2;
58 double time0,time_color,time_fac;
59 /* allocations: */
60 Paso_Solver_GS* out=MEMALLOC(1,Paso_Solver_GS);
61 if (Paso_checkPtr(out)) return NULL;
62 out->colorOf=MEMALLOC(n,index_t);
63 out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
64 /*out->diag=MEMALLOC(A->len,double);*/
65 out->main_iptr=MEMALLOC(n,index_t);
66 out->pattern=Paso_Pattern_getReference(A->pattern);
67 out->factors=Paso_SparseMatrix_getReference(A);
68 out->n_block=n_block;
69 out->n=n;
70
71 if ( !(Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
72 time0=Paso_timer();
73 Paso_Pattern_color(A->pattern,&out->num_colors,out->colorOf);
74 time_color=Paso_timer()-time0;
75
76 if (Paso_noError()) {
77 time0=Paso_timer();
78
79 if (! (Paso_checkPtr(out->diag))) {
80 if (n_block==1) {
81 #pragma omp parallel for private(i, iPtr) schedule(static)
82 for (i = 0; i < A->pattern->numOutput; i++) {
83 out->diag[i]=1.;
84 /* find main diagonal */
85 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
86 if (A->pattern->index[iPtr]==i) {
87 iptr_main=iPtr;
88 out->diag[i]=A->val[iPtr];
89 break;
90 }
91 }
92 out->main_iptr[i]=iptr_main;
93 }
94 } else if (n_block==2) {
95 #pragma omp parallel for private(i, iPtr) schedule(static)
96 for (i = 0; i < A->pattern->numOutput; i++) {
97 out->diag[i*4+0]= 1.;
98 out->diag[i*4+1]= 0.;
99 out->diag[i*4+2]= 0.;
100 out->diag[i*4+3]= 1.;
101 /* find main diagonal */
102 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
103 if (A->pattern->index[iPtr]==i) {
104 iptr_main=iPtr;
105 out->diag[i*4]= A->val[iPtr*4];
106 out->diag[i*4+1]=A->val[iPtr*4+1];
107 out->diag[i*4+2]=A->val[iPtr*4+2];
108 out->diag[i*4+3]= A->val[iPtr*4+3];
109 break;
110 }
111 }
112 out->main_iptr[i]=iptr_main;
113 }
114 } else if (n_block==3) {
115 #pragma omp parallel for private(i, iPtr) schedule(static)
116 for (i = 0; i < A->pattern->numOutput; i++) {
117 out->diag[i*9 ]=1.;
118 out->diag[i*9+1]=0.;
119 out->diag[i*9+2]=0.;
120 out->diag[i*9+3]=0.;
121 out->diag[i*9+4]=1.;
122 out->diag[i*9+5]=0.;
123 out->diag[i*9+6]=0.;
124 out->diag[i*9+7]=0.;
125 out->diag[i*9+8]=1.;
126 /* find main diagonal */
127 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
128 if (A->pattern->index[iPtr]==i) {
129 iptr_main=iPtr;
130 out->diag[i*9 ]=A->val[iPtr*9 ];
131 out->diag[i*9+1]=A->val[iPtr*9+1];
132 out->diag[i*9+2]=A->val[iPtr*9+2];
133 out->diag[i*9+3]=A->val[iPtr*9+3];
134 out->diag[i*9+4]=A->val[iPtr*9+4];
135 out->diag[i*9+5]=A->val[iPtr*9+5];
136 out->diag[i*9+6]=A->val[iPtr*9+6];
137 out->diag[i*9+7]=A->val[iPtr*9+7];
138 out->diag[i*9+8]=A->val[iPtr*9+8];
139 break;
140 }
141 }
142 out->main_iptr[i]=iptr_main;
143 }
144 }
145 }
146
147 time_fac=Paso_timer()-time0;
148 }
149 }
150 if (Paso_noError()) {
151 if (verbose) {
152 printf("GS: %d color used \n",out->num_colors);
153 printf("timing: GS: coloring/elemination : %e/%e\n",time_color,time_fac);
154 }
155 return out;
156 } else {
157 Paso_Solver_GS_free(out);
158 return NULL;
159 }
160 }
161
162 /**************************************************************/
163
164 /* apply GS precondition b-> x
165
166 in fact it solves LD^{-1}Ux=b in the form x= U^{-1} D L^{-1}b
167
168 should be called within a parallel region
169 barrier synconization should be performed to make sure that the input vector available
170
171 */
172
173 void Paso_Solver_solveGS(Paso_Solver_GS * gs, double * x, double * b) {
174 register dim_t i,k;
175 register index_t color,iptr_ik,iptr_main;
176 register double A11,A12,A21,A22,A13,A23,A33,A32,A31,S1,S2,S3,R1,R2,R3,D,S21,S22,S12,S11,S13,S23,S33,S32,S31;
177 dim_t n_block=gs->n_block;
178 dim_t n=gs->n;
179 index_t* pivot=NULL;
180
181 /* copy x into b*/
182 #pragma omp parallel for private(i) schedule(static)
183 for (i=0;i<n*n_block;++i) x[i]=b[i];
184 /* forward substitution */
185 for (color=0;color<gs->num_colors;++color) {
186 if (n_block==1) {
187 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
188 for (i = 0; i < n; ++i) {
189 if (gs->colorOf[i]==color) {
190 /* x_i=x_i-a_ik*x_k */
191 S1=x[i];
192 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
193 k=gs->pattern->index[iptr_ik];
194 if (gs->colorOf[k]<color) {
195 R1=x[k];
196 S1-=gs->factors->val[iptr_ik]*R1;
197 }
198 }
199 iptr_main=gs->main_iptr[i];
200 x[i]=(1/gs->factors->val[iptr_main])*S1;
201 }
202 }
203 } else if (n_block==2) {
204 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
205 for (i = 0; i < n; ++i) {
206 if (gs->colorOf[i]==color) {
207 /* x_i=x_i-a_ik*x_k */
208 S1=x[2*i];
209 S2=x[2*i+1];
210 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
211 k=gs->pattern->index[iptr_ik];
212 if (gs->colorOf[k]<color) {
213 R1=x[2*k];
214 R2=x[2*k+1];
215 S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
216 S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
217 }
218 }
219 iptr_main=gs->main_iptr[i];
220 A11=gs->factors->val[iptr_main*4];
221 A21=gs->factors->val[iptr_main*4+1];
222 A12=gs->factors->val[iptr_main*4+2];
223 A22=gs->factors->val[iptr_main*4+3];
224 D = A11*A22-A12*A21;
225 if (ABS(D)>0.) {
226 D=1./D;
227 S11= A22*D;
228 S21=-A21*D;
229 S12=-A12*D;
230 S22= A11*D;
231 x[2*i ]=S11*S1+S12*S2;
232 x[2*i+1]=S21*S1+S22*S2;
233 } else {
234 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
235 }
236 }
237
238 }
239 } else if (n_block==3) {
240 #pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
241 for (i = 0; i < n; ++i) {
242 if (gs->colorOf[i]==color) {
243 /* x_i=x_i-a_ik*x_k */
244 S1=x[3*i];
245 S2=x[3*i+1];
246 S3=x[3*i+2];
247 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
248 k=gs->pattern->index[iptr_ik];
249 if (gs->colorOf[k]<color) {
250 R1=x[3*k];
251 R2=x[3*k+1];
252 R3=x[3*k+2];
253 S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
254 S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
255 S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
256 }
257 }
258 iptr_main=gs->main_iptr[i];
259 A11=gs->factors->val[iptr_main*9 ];
260 A21=gs->factors->val[iptr_main*9+1];
261 A31=gs->factors->val[iptr_main*9+2];
262 A12=gs->factors->val[iptr_main*9+3];
263 A22=gs->factors->val[iptr_main*9+4];
264 A32=gs->factors->val[iptr_main*9+5];
265 A13=gs->factors->val[iptr_main*9+6];
266 A23=gs->factors->val[iptr_main*9+7];
267 A33=gs->factors->val[iptr_main*9+8];
268 D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
269 if (ABS(D)>0.) {
270 D=1./D;
271 S11=(A22*A33-A23*A32)*D;
272 S21=(A31*A23-A21*A33)*D;
273 S31=(A21*A32-A31*A22)*D;
274 S12=(A13*A32-A12*A33)*D;
275 S22=(A11*A33-A31*A13)*D;
276 S32=(A12*A31-A11*A32)*D;
277 S13=(A12*A23-A13*A22)*D;
278 S23=(A13*A21-A11*A23)*D;
279 S33=(A11*A22-A12*A21)*D;
280 /* a_ik=a_ii^{-1}*a_ik */
281 x[3*i ]=S11*S1+S12*S2+S13*S3;
282 x[3*i+1]=S21*S1+S22*S2+S23*S3;
283 x[3*i+2]=S31*S1+S32*S2+S33*S3;
284 } else {
285 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
286 }
287 }
288 }
289 }
290 }
291
292 /* Multipling with diag(A) */
293 Paso_Solver_applyBlockDiagonalMatrix(gs->n_block,gs->n,gs->diag,pivot,x,x);
294
295 /* backward substitution */
296 for (color=(gs->num_colors)-1;color>-1;--color) {
297 if (n_block==1) {
298 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1)
299 for (i = 0; i < n; ++i) {
300 if (gs->colorOf[i]==color) {
301 /* x_i=x_i-a_ik*x_k */
302 S1=x[i];
303 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
304 k=gs->pattern->index[iptr_ik];
305 if (gs->colorOf[k]>color) {
306 R1=x[k];
307 S1-=gs->factors->val[iptr_ik]*R1;
308 }
309 }
310 /*x[i]=S1;*/
311 iptr_main=gs->main_iptr[i];
312 x[i]=(1/gs->factors->val[iptr_main])*S1;
313 }
314 }
315 } else if (n_block==2) {
316 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
317 for (i = 0; i < n; ++i) {
318 if (gs->colorOf[i]==color) {
319 /* x_i=x_i-a_ik*x_k */
320 S1=x[2*i];
321 S2=x[2*i+1];
322 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
323 k=gs->pattern->index[iptr_ik];
324 if (gs->colorOf[k]>color) {
325 R1=x[2*k];
326 R2=x[2*k+1];
327 S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
328 S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
329 }
330 }
331 /*x[2*i]=S1;
332 x[2*i+1]=S2;*/
333 iptr_main=gs->main_iptr[i];
334 A11=gs->factors->val[iptr_main*4];
335 A21=gs->factors->val[iptr_main*4+1];
336 A12=gs->factors->val[iptr_main*4+2];
337 A22=gs->factors->val[iptr_main*4+3];
338 D = A11*A22-A12*A21;
339 if (ABS(D)>0.) {
340 D=1./D;
341 S11= A22*D;
342 S21=-A21*D;
343 S12=-A12*D;
344 S22= A11*D;
345 x[2*i ]=S11*S1+S12*S2;
346 x[2*i+1]=S21*S1+S22*S2;
347 } else {
348 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
349 }
350
351 }
352 }
353 } else if (n_block==3) {
354 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
355 for (i = 0; i < n; ++i) {
356 if (gs->colorOf[i]==color) {
357 /* x_i=x_i-a_ik*x_k */
358 S1=x[3*i ];
359 S2=x[3*i+1];
360 S3=x[3*i+2];
361 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
362 k=gs->pattern->index[iptr_ik];
363 if (gs->colorOf[k]>color) {
364 R1=x[3*k];
365 R2=x[3*k+1];
366 R3=x[3*k+2];
367 S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
368 S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
369 S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
370 }
371 }
372 /* x[3*i]=S1;
373 x[3*i+1]=S2;
374 x[3*i+2]=S3;
375 */ iptr_main=gs->main_iptr[i];
376 A11=gs->factors->val[iptr_main*9 ];
377 A21=gs->factors->val[iptr_main*9+1];
378 A31=gs->factors->val[iptr_main*9+2];
379 A12=gs->factors->val[iptr_main*9+3];
380 A22=gs->factors->val[iptr_main*9+4];
381 A32=gs->factors->val[iptr_main*9+5];
382 A13=gs->factors->val[iptr_main*9+6];
383 A23=gs->factors->val[iptr_main*9+7];
384 A33=gs->factors->val[iptr_main*9+8];
385 D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
386 if (ABS(D)>0.) {
387 D=1./D;
388 S11=(A22*A33-A23*A32)*D;
389 S21=(A31*A23-A21*A33)*D;
390 S31=(A21*A32-A31*A22)*D;
391 S12=(A13*A32-A12*A33)*D;
392 S22=(A11*A33-A31*A13)*D;
393 S32=(A12*A31-A11*A32)*D;
394 S13=(A12*A23-A13*A22)*D;
395 S23=(A13*A21-A11*A23)*D;
396 S33=(A11*A22-A12*A21)*D;
397 x[3*i ]=S11*S1+S12*S2+S13*S3;
398 x[3*i+1]=S21*S1+S22*S2+S23*S3;
399 x[3*i+2]=S31*S1+S32*S2+S33*S3;
400 } else {
401 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
402 }
403 }
404 }
405 }
406 }
407
408 return;
409 }
410

  ViewVC Help
Powered by ViewVC 1.1.26