/[escript]/trunk/paso/src/Smoother.c
ViewVC logotype

Contents of /trunk/paso/src/Smoother.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1917 - (show annotations)
Thu Oct 23 09:09:31 2008 UTC (10 years, 4 months ago) by phornby
Original Path: trunk/paso/src/Solver_GS.c
File MIME type: text/plain
File size: 30249 byte(s)
Remove unused vars, do a bit of standardising of indentation, and use the new Pattern_coupling.h to eliminate some implicit function declarations in Solver_AMG.c
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2008 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: GS preconditioner with reordering */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia 2003,2004,2005,2006,2007,2008 */
22 /* Author: artak@uq.edu.au */
23
24 /**************************************************************/
25
26 #include "Paso.h"
27 #include "Solver.h"
28 #include "PasoUtil.h"
29
30 #include <stdio.h>
31
32 /**************************************************************/
33
34 /* free all memory used by GS */
35
36 void Paso_Solver_GS_free(Paso_Solver_GS * in) {
37 if (in!=NULL) {
38 MEMFREE(in->colorOf);
39 Paso_SparseMatrix_free(in->factors);
40 MEMFREE(in->diag);
41 MEMFREE(in->main_iptr);
42 Paso_Pattern_free(in->pattern);
43 MEMFREE(in);
44 }
45 }
46
47 /**************************************************************/
48
49 /* constructs the incomplete block factorization of
50
51 */
52 Paso_Solver_GS* Paso_Solver_getGS(Paso_SparseMatrix * A,bool_t verbose) {
53 dim_t n=A->numRows;
54 dim_t n_block=A->row_block_size;
55 dim_t block_size=A->block_size;
56 index_t num_colors=0, *mis_marker=NULL;
57 register index_t i,iptr_main,iPtr;
58 double time0,time_color,time_fac;
59 /* allocations: */
60 Paso_Solver_GS* out=MEMALLOC(1,Paso_Solver_GS);
61 if (Paso_checkPtr(out)) return NULL;
62 out->colorOf=MEMALLOC(n,index_t);
63 out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
64 /*out->diag=MEMALLOC(A->len,double);*/
65 out->main_iptr=MEMALLOC(n,index_t);
66 out->pattern=Paso_Pattern_getReference(A->pattern);
67 out->factors=Paso_SparseMatrix_getReference(A);
68 out->n_block=n_block;
69 out->n=n;
70
71 if ( !(Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
72 time0=Paso_timer();
73 Paso_Pattern_color(A->pattern,&out->num_colors,out->colorOf);
74 time_color=Paso_timer()-time0;
75
76 if (Paso_noError()) {
77 time0=Paso_timer();
78
79 if (! (Paso_checkPtr(out->diag))) {
80 if (n_block==1) {
81 #pragma omp parallel for private(i, iPtr) schedule(static)
82 for (i = 0; i < A->pattern->numOutput; i++) {
83 out->diag[i]=1.;
84 /* find main diagonal */
85 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
86 if (A->pattern->index[iPtr]==i) {
87 iptr_main=iPtr;
88 out->diag[i]=A->val[iPtr];
89 break;
90 }
91 }
92 out->main_iptr[i]=iptr_main;
93 }
94 } else if (n_block==2) {
95 #pragma omp parallel for private(i, iPtr) schedule(static)
96 for (i = 0; i < A->pattern->numOutput; i++) {
97 out->diag[i*4+0]= 1.;
98 out->diag[i*4+1]= 0.;
99 out->diag[i*4+2]= 0.;
100 out->diag[i*4+3]= 1.;
101 /* find main diagonal */
102 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
103 if (A->pattern->index[iPtr]==i) {
104 iptr_main=iPtr;
105 out->diag[i*4]= A->val[iPtr*4];
106 out->diag[i*4+1]=A->val[iPtr*4+1];
107 out->diag[i*4+2]=A->val[iPtr*4+2];
108 out->diag[i*4+3]= A->val[iPtr*4+3];
109 break;
110 }
111 }
112 out->main_iptr[i]=iptr_main;
113 }
114 } else if (n_block==3) {
115 #pragma omp parallel for private(i, iPtr) schedule(static)
116 for (i = 0; i < A->pattern->numOutput; i++) {
117 out->diag[i*9 ]=1.;
118 out->diag[i*9+1]=0.;
119 out->diag[i*9+2]=0.;
120 out->diag[i*9+3]=0.;
121 out->diag[i*9+4]=1.;
122 out->diag[i*9+5]=0.;
123 out->diag[i*9+6]=0.;
124 out->diag[i*9+7]=0.;
125 out->diag[i*9+8]=1.;
126 /* find main diagonal */
127 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
128 if (A->pattern->index[iPtr]==i) {
129 iptr_main=iPtr;
130 out->diag[i*9 ]=A->val[iPtr*9 ];
131 out->diag[i*9+1]=A->val[iPtr*9+1];
132 out->diag[i*9+2]=A->val[iPtr*9+2];
133 out->diag[i*9+3]=A->val[iPtr*9+3];
134 out->diag[i*9+4]=A->val[iPtr*9+4];
135 out->diag[i*9+5]=A->val[iPtr*9+5];
136 out->diag[i*9+6]=A->val[iPtr*9+6];
137 out->diag[i*9+7]=A->val[iPtr*9+7];
138 out->diag[i*9+8]=A->val[iPtr*9+8];
139 break;
140 }
141 }
142 out->main_iptr[i]=iptr_main;
143 }
144 }
145 }
146
147 time_fac=Paso_timer()-time0;
148 }
149 }
150 if (Paso_noError()) {
151 if (verbose) {
152 printf("GS: %d color used \n",out->num_colors);
153 printf("timing: GS: coloring/elemination : %e/%e\n",time_color,time_fac);
154 }
155 return out;
156 } else {
157 Paso_Solver_GS_free(out);
158 return NULL;
159 }
160 }
161
162 /**************************************************************/
163
164 /* apply GS precondition b-> x
165
166 in fact it solves LD^{-1}Ux=b in the form x= U^{-1} D L^{-1}b
167
168 should be called within a parallel region
169 barrier synconization should be performed to make sure that the input vector available
170
171 */
172
173 void Paso_Solver_solveGS(Paso_Solver_GS * gs, double * x, double * b) {
174 register dim_t i,k;
175 register index_t color,iptr_ik,iptr_main;
176 register double A11,A12,A21,A22,A13,A23,A33,A32,A31,S1,S2,S3,R1,R2,R3,D,S21,S22,S12,S11,S13,S23,S33,S32,S31;
177 dim_t n_block=gs->n_block;
178 dim_t n=gs->n;
179 index_t* pivot=NULL;
180
181 /* copy x into b*/
182 #pragma omp parallel for private(i) schedule(static)
183 for (i=0;i<n*n_block;++i) x[i]=b[i];
184 /* forward substitution */
185 for (color=0;color<gs->num_colors;++color) {
186 if (n_block==1) {
187 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
188 for (i = 0; i < n; ++i) {
189 if (gs->colorOf[i]==color) {
190 /* x_i=x_i-a_ik*x_k */
191 S1=x[i];
192 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
193 k=gs->pattern->index[iptr_ik];
194 if (gs->colorOf[k]<color) {
195 R1=x[k];
196 S1-=gs->factors->val[iptr_ik]*R1;
197 }
198 }
199 iptr_main=gs->main_iptr[i];
200 x[i]=(1/gs->factors->val[iptr_main])*S1;
201 }
202 }
203 } else if (n_block==2) {
204 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
205 for (i = 0; i < n; ++i) {
206 if (gs->colorOf[i]==color) {
207 /* x_i=x_i-a_ik*x_k */
208 S1=x[2*i];
209 S2=x[2*i+1];
210 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
211 k=gs->pattern->index[iptr_ik];
212 if (gs->colorOf[k]<color) {
213 R1=x[2*k];
214 R2=x[2*k+1];
215 S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
216 S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
217 }
218 }
219 iptr_main=gs->main_iptr[i];
220 A11=gs->factors->val[iptr_main*4];
221 A21=gs->factors->val[iptr_main*4+1];
222 A12=gs->factors->val[iptr_main*4+2];
223 A22=gs->factors->val[iptr_main*4+3];
224 D = A11*A22-A12*A21;
225 if (ABS(D)>0.) {
226 D=1./D;
227 S11= A22*D;
228 S21=-A21*D;
229 S12=-A12*D;
230 S22= A11*D;
231 x[2*i ]=S11*S1+S12*S2;
232 x[2*i+1]=S21*S1+S22*S2;
233 } else {
234 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
235 }
236 }
237
238 }
239 } else if (n_block==3) {
240 #pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
241 for (i = 0; i < n; ++i) {
242 if (gs->colorOf[i]==color) {
243 /* x_i=x_i-a_ik*x_k */
244 S1=x[3*i];
245 S2=x[3*i+1];
246 S3=x[3*i+2];
247 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
248 k=gs->pattern->index[iptr_ik];
249 if (gs->colorOf[k]<color) {
250 R1=x[3*k];
251 R2=x[3*k+1];
252 R3=x[3*k+2];
253 S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
254 S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
255 S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
256 }
257 }
258 iptr_main=gs->main_iptr[i];
259 A11=gs->factors->val[iptr_main*9 ];
260 A21=gs->factors->val[iptr_main*9+1];
261 A31=gs->factors->val[iptr_main*9+2];
262 A12=gs->factors->val[iptr_main*9+3];
263 A22=gs->factors->val[iptr_main*9+4];
264 A32=gs->factors->val[iptr_main*9+5];
265 A13=gs->factors->val[iptr_main*9+6];
266 A23=gs->factors->val[iptr_main*9+7];
267 A33=gs->factors->val[iptr_main*9+8];
268 D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
269 if (ABS(D)>0.) {
270 D=1./D;
271 S11=(A22*A33-A23*A32)*D;
272 S21=(A31*A23-A21*A33)*D;
273 S31=(A21*A32-A31*A22)*D;
274 S12=(A13*A32-A12*A33)*D;
275 S22=(A11*A33-A31*A13)*D;
276 S32=(A12*A31-A11*A32)*D;
277 S13=(A12*A23-A13*A22)*D;
278 S23=(A13*A21-A11*A23)*D;
279 S33=(A11*A22-A12*A21)*D;
280 /* a_ik=a_ii^{-1}*a_ik */
281 x[3*i ]=S11*S1+S12*S2+S13*S3;
282 x[3*i+1]=S21*S1+S22*S2+S23*S3;
283 x[3*i+2]=S31*S1+S32*S2+S33*S3;
284 } else {
285 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
286 }
287 }
288 }
289 }
290 }
291
292 /* Multipling with diag(A) */
293 Paso_Solver_applyBlockDiagonalMatrix(gs->n_block,gs->n,gs->diag,pivot,x,x);
294
295 /* backward substitution */
296 for (color=(gs->num_colors)-1;color>-1;--color) {
297 if (n_block==1) {
298 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1)
299 for (i = 0; i < n; ++i) {
300 if (gs->colorOf[i]==color) {
301 /* x_i=x_i-a_ik*x_k */
302 S1=x[i];
303 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
304 k=gs->pattern->index[iptr_ik];
305 if (gs->colorOf[k]>color) {
306 R1=x[k];
307 S1-=gs->factors->val[iptr_ik]*R1;
308 }
309 }
310 /*x[i]=S1;*/
311 iptr_main=gs->main_iptr[i];
312 x[i]=(1/gs->factors->val[iptr_main])*S1;
313 }
314 }
315 } else if (n_block==2) {
316 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
317 for (i = 0; i < n; ++i) {
318 if (gs->colorOf[i]==color) {
319 /* x_i=x_i-a_ik*x_k */
320 S1=x[2*i];
321 S2=x[2*i+1];
322 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
323 k=gs->pattern->index[iptr_ik];
324 if (gs->colorOf[k]>color) {
325 R1=x[2*k];
326 R2=x[2*k+1];
327 S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
328 S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
329 }
330 }
331 /*x[2*i]=S1;
332 x[2*i+1]=S2;*/
333 iptr_main=gs->main_iptr[i];
334 A11=gs->factors->val[iptr_main*4];
335 A21=gs->factors->val[iptr_main*4+1];
336 A12=gs->factors->val[iptr_main*4+2];
337 A22=gs->factors->val[iptr_main*4+3];
338 D = A11*A22-A12*A21;
339 if (ABS(D)>0.) {
340 D=1./D;
341 S11= A22*D;
342 S21=-A21*D;
343 S12=-A12*D;
344 S22= A11*D;
345 x[2*i ]=S11*S1+S12*S2;
346 x[2*i+1]=S21*S1+S22*S2;
347 } else {
348 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
349 }
350
351 }
352 }
353 } else if (n_block==3) {
354 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
355 for (i = 0; i < n; ++i) {
356 if (gs->colorOf[i]==color) {
357 /* x_i=x_i-a_ik*x_k */
358 S1=x[3*i ];
359 S2=x[3*i+1];
360 S3=x[3*i+2];
361 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
362 k=gs->pattern->index[iptr_ik];
363 if (gs->colorOf[k]>color) {
364 R1=x[3*k];
365 R2=x[3*k+1];
366 R3=x[3*k+2];
367 S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
368 S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
369 S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
370 }
371 }
372 /* x[3*i]=S1;
373 x[3*i+1]=S2;
374 x[3*i+2]=S3;
375 */ iptr_main=gs->main_iptr[i];
376 A11=gs->factors->val[iptr_main*9 ];
377 A21=gs->factors->val[iptr_main*9+1];
378 A31=gs->factors->val[iptr_main*9+2];
379 A12=gs->factors->val[iptr_main*9+3];
380 A22=gs->factors->val[iptr_main*9+4];
381 A32=gs->factors->val[iptr_main*9+5];
382 A13=gs->factors->val[iptr_main*9+6];
383 A23=gs->factors->val[iptr_main*9+7];
384 A33=gs->factors->val[iptr_main*9+8];
385 D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
386 if (ABS(D)>0.) {
387 D=1./D;
388 S11=(A22*A33-A23*A32)*D;
389 S21=(A31*A23-A21*A33)*D;
390 S31=(A21*A32-A31*A22)*D;
391 S12=(A13*A32-A12*A33)*D;
392 S22=(A11*A33-A31*A13)*D;
393 S32=(A12*A31-A11*A32)*D;
394 S13=(A12*A23-A13*A22)*D;
395 S23=(A13*A21-A11*A23)*D;
396 S33=(A11*A22-A12*A21)*D;
397 x[3*i ]=S11*S1+S12*S2+S13*S3;
398 x[3*i+1]=S21*S1+S22*S2+S23*S3;
399 x[3*i+2]=S31*S1+S32*S2+S33*S3;
400 } else {
401 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
402 }
403 }
404 }
405 }
406 }
407
408 return;
409 }
410
411 /**************************************************************/
412
413 /* apply GS precondition b-> x
414
415 in fact it solves LD^{-1}Ux=b in the form x= U^{-1} D L^{-1}b
416
417 should be called within a parallel region
418 barrier synconization should be performed to make sure that the input vector available
419
420 */
421
422 void Paso_Solver_solveGS1(Paso_Solver_GS * gs, double * x, double * b) {
423 register dim_t i,k;
424 register index_t color,iptr_ik,iptr_main;
425 register double A11,A12,A21,A22,A13,A23,A33,A32,A31,S1,S2,S3,R1,R2,R3,D,S21,S22,S12,S11,S13,S23,S33,S32,S31;
426 dim_t n_block=gs->n_block;
427 dim_t n=gs->n;
428 index_t* pivot=NULL;
429
430 /* copy x into b*/
431 #pragma omp parallel for private(i) schedule(static)
432 /*for (i=0;i<n*n_block;++i) x[i]=b[i];*/
433
434 /* forward substitution */
435 for (color=0;color<gs->num_colors;++color) {
436 if (n_block==1) {
437 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
438 for (i = 0; i < n; ++i) {
439 if (gs->colorOf[i]==color) {
440 /* x_i=x_i-a_ik*x_k */
441 S1=b[i];
442 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
443 k=gs->pattern->index[iptr_ik];
444 if (gs->colorOf[k]<color) {
445 R1=x[k];
446 S1-=gs->factors->val[iptr_ik]*R1;
447 }
448 }
449 iptr_main=gs->main_iptr[i];
450 x[i]=(1/gs->factors->val[iptr_main])*S1;
451 }
452 }
453 } else if (n_block==2) {
454 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
455 for (i = 0; i < n; ++i) {
456 if (gs->colorOf[i]==color) {
457 /* x_i=x_i-a_ik*x_k */
458 S1=b[2*i];
459 S2=b[2*i+1];
460 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
461 k=gs->pattern->index[iptr_ik];
462 if (gs->colorOf[k]<color) {
463 R1=x[2*k];
464 R2=x[2*k+1];
465 S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
466 S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
467 }
468 }
469 iptr_main=gs->main_iptr[i];
470 A11=gs->factors->val[iptr_main*4];
471 A21=gs->factors->val[iptr_main*4+1];
472 A12=gs->factors->val[iptr_main*4+2];
473 A22=gs->factors->val[iptr_main*4+3];
474 D = A11*A22-A12*A21;
475 if (ABS(D)>0.) {
476 D=1./D;
477 S11= A22*D;
478 S21=-A21*D;
479 S12=-A12*D;
480 S22= A11*D;
481 x[2*i ]=S11*S1+S12*S2;
482 x[2*i+1]=S21*S1+S22*S2;
483 } else {
484 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
485 }
486 }
487
488 }
489 } else if (n_block==3) {
490 #pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
491 for (i = 0; i < n; ++i) {
492 if (gs->colorOf[i]==color) {
493 /* x_i=x_i-a_ik*x_k */
494 S1=b[3*i];
495 S2=b[3*i+1];
496 S3=b[3*i+2];
497 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
498 k=gs->pattern->index[iptr_ik];
499 if (gs->colorOf[k]<color) {
500 R1=x[3*k];
501 R2=x[3*k+1];
502 R3=x[3*k+2];
503 S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
504 S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
505 S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
506 }
507 }
508 iptr_main=gs->main_iptr[i];
509 A11=gs->factors->val[iptr_main*9 ];
510 A21=gs->factors->val[iptr_main*9+1];
511 A31=gs->factors->val[iptr_main*9+2];
512 A12=gs->factors->val[iptr_main*9+3];
513 A22=gs->factors->val[iptr_main*9+4];
514 A32=gs->factors->val[iptr_main*9+5];
515 A13=gs->factors->val[iptr_main*9+6];
516 A23=gs->factors->val[iptr_main*9+7];
517 A33=gs->factors->val[iptr_main*9+8];
518 D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
519 if (ABS(D)>0.) {
520 D=1./D;
521 S11=(A22*A33-A23*A32)*D;
522 S21=(A31*A23-A21*A33)*D;
523 S31=(A21*A32-A31*A22)*D;
524 S12=(A13*A32-A12*A33)*D;
525 S22=(A11*A33-A31*A13)*D;
526 S32=(A12*A31-A11*A32)*D;
527 S13=(A12*A23-A13*A22)*D;
528 S23=(A13*A21-A11*A23)*D;
529 S33=(A11*A22-A12*A21)*D;
530 /* a_ik=a_ii^{-1}*a_ik */
531 x[3*i ]=S11*S1+S12*S2+S13*S3;
532 x[3*i+1]=S21*S1+S22*S2+S23*S3;
533 x[3*i+2]=S31*S1+S32*S2+S33*S3;
534 } else {
535 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
536 }
537 }
538 }
539 }
540 }
541
542 /* Multipling with diag(A) */
543 Paso_Solver_applyBlockDiagonalMatrix(gs->n_block,gs->n,gs->diag,pivot,x,x);
544
545 /* backward substitution */
546 for (color=(gs->num_colors)-1;color>-1;--color) {
547 if (n_block==1) {
548 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1)
549 for (i = 0; i < n; ++i) {
550 if (gs->colorOf[i]==color) {
551 /* x_i=x_i-a_ik*x_k */
552 S1=x[i];
553 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
554 k=gs->pattern->index[iptr_ik];
555 if (gs->colorOf[k]>color) {
556 R1=x[k];
557 S1-=gs->factors->val[iptr_ik]*R1;
558 }
559 }
560 /*x[i]=S1;*/
561 iptr_main=gs->main_iptr[i];
562 x[i]=(1/gs->factors->val[iptr_main])*S1;
563 }
564 }
565 } else if (n_block==2) {
566 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
567 for (i = 0; i < n; ++i) {
568 if (gs->colorOf[i]==color) {
569 /* x_i=x_i-a_ik*x_k */
570 S1=x[2*i];
571 S2=x[2*i+1];
572 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
573 k=gs->pattern->index[iptr_ik];
574 if (gs->colorOf[k]>color) {
575 R1=x[2*k];
576 R2=x[2*k+1];
577 S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
578 S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
579 }
580 }
581 /*x[2*i]=S1;
582 x[2*i+1]=S2;*/
583 iptr_main=gs->main_iptr[i];
584 A11=gs->factors->val[iptr_main*4];
585 A21=gs->factors->val[iptr_main*4+1];
586 A12=gs->factors->val[iptr_main*4+2];
587 A22=gs->factors->val[iptr_main*4+3];
588 D = A11*A22-A12*A21;
589 if (ABS(D)>0.) {
590 D=1./D;
591 S11= A22*D;
592 S21=-A21*D;
593 S12=-A12*D;
594 S22= A11*D;
595 x[2*i ]=S11*S1+S12*S2;
596 x[2*i+1]=S21*S1+S22*S2;
597 } else {
598 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
599 }
600
601 }
602 }
603 } else if (n_block==3) {
604 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
605 for (i = 0; i < n; ++i) {
606 if (gs->colorOf[i]==color) {
607 /* x_i=x_i-a_ik*x_k */
608 S1=x[3*i ];
609 S2=x[3*i+1];
610 S3=x[3*i+2];
611 for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
612 k=gs->pattern->index[iptr_ik];
613 if (gs->colorOf[k]>color) {
614 R1=x[3*k];
615 R2=x[3*k+1];
616 R3=x[3*k+2];
617 S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
618 S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
619 S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
620 }
621 }
622 /* x[3*i]=S1;
623 x[3*i+1]=S2;
624 x[3*i+2]=S3;
625 */ iptr_main=gs->main_iptr[i];
626 A11=gs->factors->val[iptr_main*9 ];
627 A21=gs->factors->val[iptr_main*9+1];
628 A31=gs->factors->val[iptr_main*9+2];
629 A12=gs->factors->val[iptr_main*9+3];
630 A22=gs->factors->val[iptr_main*9+4];
631 A32=gs->factors->val[iptr_main*9+5];
632 A13=gs->factors->val[iptr_main*9+6];
633 A23=gs->factors->val[iptr_main*9+7];
634 A33=gs->factors->val[iptr_main*9+8];
635 D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
636 if (ABS(D)>0.) {
637 D=1./D;
638 S11=(A22*A33-A23*A32)*D;
639 S21=(A31*A23-A21*A33)*D;
640 S31=(A21*A32-A31*A22)*D;
641 S12=(A13*A32-A12*A33)*D;
642 S22=(A11*A33-A31*A13)*D;
643 S32=(A12*A31-A11*A32)*D;
644 S13=(A12*A23-A13*A22)*D;
645 S23=(A13*A21-A11*A23)*D;
646 S33=(A11*A22-A12*A21)*D;
647 x[3*i ]=S11*S1+S12*S2+S13*S3;
648 x[3*i+1]=S21*S1+S22*S2+S23*S3;
649 x[3*i+2]=S31*S1+S32*S2+S33*S3;
650 } else {
651 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
652 }
653 }
654 }
655 }
656 }
657
658 return;
659 }
660

  ViewVC Help
Powered by ViewVC 1.1.26