/[escript]/trunk/paso/src/Smoother.c
ViewVC logotype

Annotation of /trunk/paso/src/Smoother.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1902 - (hide annotations)
Wed Oct 22 03:54:14 2008 UTC (10 years, 3 months ago) by artak
Original Path: trunk/paso/src/Solver_GS.c
File MIME type: text/plain
File size: 30290 byte(s)
some bugs fixed in coresening methods
1 jfenwick 1851
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17     /* Paso: GS preconditioner with reordering */
18    
19     /**************************************************************/
20    
21     /* Copyrights by ACcESS Australia 2003,2004,2005,2006,2007,2008 */
22     /* Author: artak@uq.edu.au */
23    
24     /**************************************************************/
25    
26     #include "Paso.h"
27     #include "Solver.h"
28     #include "PasoUtil.h"
29    
30     #include <stdio.h>
31    
32     /**************************************************************/
33    
34     /* free all memory used by GS */
35    
36     void Paso_Solver_GS_free(Paso_Solver_GS * in) {
37     if (in!=NULL) {
38     MEMFREE(in->colorOf);
39     Paso_SparseMatrix_free(in->factors);
40     MEMFREE(in->diag);
41     MEMFREE(in->main_iptr);
42     Paso_Pattern_free(in->pattern);
43     MEMFREE(in);
44     }
45     }
46    
47     /**************************************************************/
48    
49     /* constructs the incomplete block factorization of
50    
51     */
52     Paso_Solver_GS* Paso_Solver_getGS(Paso_SparseMatrix * A,bool_t verbose) {
53     dim_t n=A->numRows;
54     dim_t n_block=A->row_block_size;
55     dim_t block_size=A->block_size;
56     index_t num_colors=0, *mis_marker=NULL;
57     register index_t i,iptr_main,iPtr,iptr_ik,k,iptr_kj,j,iptr_ij,color,color2;
58     double time0,time_color,time_fac;
59     /* allocations: */
60     Paso_Solver_GS* out=MEMALLOC(1,Paso_Solver_GS);
61     if (Paso_checkPtr(out)) return NULL;
62     out->colorOf=MEMALLOC(n,index_t);
63     out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
64     /*out->diag=MEMALLOC(A->len,double);*/
65     out->main_iptr=MEMALLOC(n,index_t);
66     out->pattern=Paso_Pattern_getReference(A->pattern);
67     out->factors=Paso_SparseMatrix_getReference(A);
68     out->n_block=n_block;
69     out->n=n;
70    
71     if ( !(Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
72     time0=Paso_timer();
73     Paso_Pattern_color(A->pattern,&out->num_colors,out->colorOf);
74     time_color=Paso_timer()-time0;
75    
76     if (Paso_noError()) {
77     time0=Paso_timer();
78    
79     if (! (Paso_checkPtr(out->diag))) {
80     if (n_block==1) {
81     #pragma omp parallel for private(i, iPtr) schedule(static)
82     for (i = 0; i < A->pattern->numOutput; i++) {
83     out->diag[i]=1.;
84     /* find main diagonal */
85     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
86     if (A->pattern->index[iPtr]==i) {
87     iptr_main=iPtr;
88     out->diag[i]=A->val[iPtr];
89     break;
90     }
91     }
92     out->main_iptr[i]=iptr_main;
93     }
94     } else if (n_block==2) {
95     #pragma omp parallel for private(i, iPtr) schedule(static)
96     for (i = 0; i < A->pattern->numOutput; i++) {
97     out->diag[i*4+0]= 1.;
98     out->diag[i*4+1]= 0.;
99     out->diag[i*4+2]= 0.;
100     out->diag[i*4+3]= 1.;
101     /* find main diagonal */
102     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
103     if (A->pattern->index[iPtr]==i) {
104     iptr_main=iPtr;
105     out->diag[i*4]= A->val[iPtr*4];
106     out->diag[i*4+1]=A->val[iPtr*4+1];
107     out->diag[i*4+2]=A->val[iPtr*4+2];
108     out->diag[i*4+3]= A->val[iPtr*4+3];
109     break;
110     }
111     }
112     out->main_iptr[i]=iptr_main;
113     }
114     } else if (n_block==3) {
115     #pragma omp parallel for private(i, iPtr) schedule(static)
116     for (i = 0; i < A->pattern->numOutput; i++) {
117     out->diag[i*9 ]=1.;
118     out->diag[i*9+1]=0.;
119     out->diag[i*9+2]=0.;
120     out->diag[i*9+3]=0.;
121     out->diag[i*9+4]=1.;
122     out->diag[i*9+5]=0.;
123     out->diag[i*9+6]=0.;
124     out->diag[i*9+7]=0.;
125     out->diag[i*9+8]=1.;
126     /* find main diagonal */
127     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
128     if (A->pattern->index[iPtr]==i) {
129     iptr_main=iPtr;
130     out->diag[i*9 ]=A->val[iPtr*9 ];
131     out->diag[i*9+1]=A->val[iPtr*9+1];
132     out->diag[i*9+2]=A->val[iPtr*9+2];
133     out->diag[i*9+3]=A->val[iPtr*9+3];
134     out->diag[i*9+4]=A->val[iPtr*9+4];
135     out->diag[i*9+5]=A->val[iPtr*9+5];
136     out->diag[i*9+6]=A->val[iPtr*9+6];
137     out->diag[i*9+7]=A->val[iPtr*9+7];
138     out->diag[i*9+8]=A->val[iPtr*9+8];
139     break;
140     }
141     }
142     out->main_iptr[i]=iptr_main;
143     }
144     }
145     }
146    
147     time_fac=Paso_timer()-time0;
148     }
149     }
150     if (Paso_noError()) {
151     if (verbose) {
152     printf("GS: %d color used \n",out->num_colors);
153     printf("timing: GS: coloring/elemination : %e/%e\n",time_color,time_fac);
154     }
155     return out;
156     } else {
157     Paso_Solver_GS_free(out);
158     return NULL;
159     }
160     }
161    
162     /**************************************************************/
163    
164     /* apply GS precondition b-> x
165    
166     in fact it solves LD^{-1}Ux=b in the form x= U^{-1} D L^{-1}b
167    
168     should be called within a parallel region
169     barrier synconization should be performed to make sure that the input vector available
170    
171     */
172    
173     void Paso_Solver_solveGS(Paso_Solver_GS * gs, double * x, double * b) {
174     register dim_t i,k;
175     register index_t color,iptr_ik,iptr_main;
176     register double A11,A12,A21,A22,A13,A23,A33,A32,A31,S1,S2,S3,R1,R2,R3,D,S21,S22,S12,S11,S13,S23,S33,S32,S31;
177     dim_t n_block=gs->n_block;
178     dim_t n=gs->n;
179     index_t* pivot=NULL;
180    
181     /* copy x into b*/
182     #pragma omp parallel for private(i) schedule(static)
183     for (i=0;i<n*n_block;++i) x[i]=b[i];
184     /* forward substitution */
185     for (color=0;color<gs->num_colors;++color) {
186     if (n_block==1) {
187     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
188     for (i = 0; i < n; ++i) {
189     if (gs->colorOf[i]==color) {
190     /* x_i=x_i-a_ik*x_k */
191     S1=x[i];
192     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
193     k=gs->pattern->index[iptr_ik];
194     if (gs->colorOf[k]<color) {
195     R1=x[k];
196     S1-=gs->factors->val[iptr_ik]*R1;
197     }
198     }
199     iptr_main=gs->main_iptr[i];
200     x[i]=(1/gs->factors->val[iptr_main])*S1;
201     }
202     }
203     } else if (n_block==2) {
204     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
205     for (i = 0; i < n; ++i) {
206     if (gs->colorOf[i]==color) {
207     /* x_i=x_i-a_ik*x_k */
208     S1=x[2*i];
209     S2=x[2*i+1];
210     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
211     k=gs->pattern->index[iptr_ik];
212     if (gs->colorOf[k]<color) {
213     R1=x[2*k];
214     R2=x[2*k+1];
215     S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
216     S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
217     }
218     }
219     iptr_main=gs->main_iptr[i];
220     A11=gs->factors->val[iptr_main*4];
221     A21=gs->factors->val[iptr_main*4+1];
222     A12=gs->factors->val[iptr_main*4+2];
223     A22=gs->factors->val[iptr_main*4+3];
224     D = A11*A22-A12*A21;
225     if (ABS(D)>0.) {
226     D=1./D;
227     S11= A22*D;
228     S21=-A21*D;
229     S12=-A12*D;
230     S22= A11*D;
231     x[2*i ]=S11*S1+S12*S2;
232     x[2*i+1]=S21*S1+S22*S2;
233     } else {
234     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
235     }
236     }
237    
238     }
239     } else if (n_block==3) {
240     #pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
241     for (i = 0; i < n; ++i) {
242     if (gs->colorOf[i]==color) {
243     /* x_i=x_i-a_ik*x_k */
244     S1=x[3*i];
245     S2=x[3*i+1];
246     S3=x[3*i+2];
247     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
248     k=gs->pattern->index[iptr_ik];
249     if (gs->colorOf[k]<color) {
250     R1=x[3*k];
251     R2=x[3*k+1];
252     R3=x[3*k+2];
253     S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
254     S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
255     S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
256     }
257     }
258     iptr_main=gs->main_iptr[i];
259     A11=gs->factors->val[iptr_main*9 ];
260     A21=gs->factors->val[iptr_main*9+1];
261     A31=gs->factors->val[iptr_main*9+2];
262     A12=gs->factors->val[iptr_main*9+3];
263     A22=gs->factors->val[iptr_main*9+4];
264     A32=gs->factors->val[iptr_main*9+5];
265     A13=gs->factors->val[iptr_main*9+6];
266     A23=gs->factors->val[iptr_main*9+7];
267     A33=gs->factors->val[iptr_main*9+8];
268     D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
269     if (ABS(D)>0.) {
270     D=1./D;
271     S11=(A22*A33-A23*A32)*D;
272     S21=(A31*A23-A21*A33)*D;
273     S31=(A21*A32-A31*A22)*D;
274     S12=(A13*A32-A12*A33)*D;
275     S22=(A11*A33-A31*A13)*D;
276     S32=(A12*A31-A11*A32)*D;
277     S13=(A12*A23-A13*A22)*D;
278     S23=(A13*A21-A11*A23)*D;
279     S33=(A11*A22-A12*A21)*D;
280     /* a_ik=a_ii^{-1}*a_ik */
281     x[3*i ]=S11*S1+S12*S2+S13*S3;
282     x[3*i+1]=S21*S1+S22*S2+S23*S3;
283     x[3*i+2]=S31*S1+S32*S2+S33*S3;
284     } else {
285     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
286     }
287     }
288     }
289     }
290     }
291    
292     /* Multipling with diag(A) */
293     Paso_Solver_applyBlockDiagonalMatrix(gs->n_block,gs->n,gs->diag,pivot,x,x);
294    
295     /* backward substitution */
296     for (color=(gs->num_colors)-1;color>-1;--color) {
297     if (n_block==1) {
298     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1)
299     for (i = 0; i < n; ++i) {
300     if (gs->colorOf[i]==color) {
301     /* x_i=x_i-a_ik*x_k */
302     S1=x[i];
303     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
304     k=gs->pattern->index[iptr_ik];
305     if (gs->colorOf[k]>color) {
306     R1=x[k];
307     S1-=gs->factors->val[iptr_ik]*R1;
308     }
309     }
310     /*x[i]=S1;*/
311     iptr_main=gs->main_iptr[i];
312     x[i]=(1/gs->factors->val[iptr_main])*S1;
313     }
314     }
315     } else if (n_block==2) {
316     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
317     for (i = 0; i < n; ++i) {
318     if (gs->colorOf[i]==color) {
319     /* x_i=x_i-a_ik*x_k */
320     S1=x[2*i];
321     S2=x[2*i+1];
322     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
323     k=gs->pattern->index[iptr_ik];
324     if (gs->colorOf[k]>color) {
325     R1=x[2*k];
326     R2=x[2*k+1];
327     S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
328     S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
329     }
330     }
331     /*x[2*i]=S1;
332     x[2*i+1]=S2;*/
333     iptr_main=gs->main_iptr[i];
334     A11=gs->factors->val[iptr_main*4];
335     A21=gs->factors->val[iptr_main*4+1];
336     A12=gs->factors->val[iptr_main*4+2];
337     A22=gs->factors->val[iptr_main*4+3];
338     D = A11*A22-A12*A21;
339     if (ABS(D)>0.) {
340     D=1./D;
341     S11= A22*D;
342     S21=-A21*D;
343     S12=-A12*D;
344     S22= A11*D;
345     x[2*i ]=S11*S1+S12*S2;
346     x[2*i+1]=S21*S1+S22*S2;
347     } else {
348     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
349     }
350    
351     }
352     }
353     } else if (n_block==3) {
354     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
355     for (i = 0; i < n; ++i) {
356     if (gs->colorOf[i]==color) {
357     /* x_i=x_i-a_ik*x_k */
358     S1=x[3*i ];
359     S2=x[3*i+1];
360     S3=x[3*i+2];
361     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
362     k=gs->pattern->index[iptr_ik];
363     if (gs->colorOf[k]>color) {
364     R1=x[3*k];
365     R2=x[3*k+1];
366     R3=x[3*k+2];
367     S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
368     S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
369     S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
370     }
371     }
372     /* x[3*i]=S1;
373     x[3*i+1]=S2;
374     x[3*i+2]=S3;
375     */ iptr_main=gs->main_iptr[i];
376     A11=gs->factors->val[iptr_main*9 ];
377     A21=gs->factors->val[iptr_main*9+1];
378     A31=gs->factors->val[iptr_main*9+2];
379     A12=gs->factors->val[iptr_main*9+3];
380     A22=gs->factors->val[iptr_main*9+4];
381     A32=gs->factors->val[iptr_main*9+5];
382     A13=gs->factors->val[iptr_main*9+6];
383     A23=gs->factors->val[iptr_main*9+7];
384     A33=gs->factors->val[iptr_main*9+8];
385     D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
386     if (ABS(D)>0.) {
387     D=1./D;
388     S11=(A22*A33-A23*A32)*D;
389     S21=(A31*A23-A21*A33)*D;
390     S31=(A21*A32-A31*A22)*D;
391     S12=(A13*A32-A12*A33)*D;
392     S22=(A11*A33-A31*A13)*D;
393     S32=(A12*A31-A11*A32)*D;
394     S13=(A12*A23-A13*A22)*D;
395     S23=(A13*A21-A11*A23)*D;
396     S33=(A11*A22-A12*A21)*D;
397     x[3*i ]=S11*S1+S12*S2+S13*S3;
398     x[3*i+1]=S21*S1+S22*S2+S23*S3;
399     x[3*i+2]=S31*S1+S32*S2+S33*S3;
400     } else {
401     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
402     }
403     }
404     }
405     }
406     }
407    
408     return;
409     }
410    
411 artak 1902 /**************************************************************/
412    
413     /* apply GS precondition b-> x
414    
415     in fact it solves LD^{-1}Ux=b in the form x= U^{-1} D L^{-1}b
416    
417     should be called within a parallel region
418     barrier synconization should be performed to make sure that the input vector available
419    
420     */
421    
422     void Paso_Solver_solveGS1(Paso_Solver_GS * gs, double * x, double * b) {
423     register dim_t i,k;
424     register index_t color,iptr_ik,iptr_main;
425     register double A11,A12,A21,A22,A13,A23,A33,A32,A31,S1,S2,S3,R1,R2,R3,D,S21,S22,S12,S11,S13,S23,S33,S32,S31;
426     dim_t n_block=gs->n_block;
427     dim_t n=gs->n;
428     index_t* pivot=NULL;
429    
430     /* copy x into b*/
431     #pragma omp parallel for private(i) schedule(static)
432     /*for (i=0;i<n*n_block;++i) x[i]=b[i];*/
433    
434     /* forward substitution */
435     for (color=0;color<gs->num_colors;++color) {
436     if (n_block==1) {
437     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
438     for (i = 0; i < n; ++i) {
439     if (gs->colorOf[i]==color) {
440     /* x_i=x_i-a_ik*x_k */
441     S1=b[i];
442     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
443     k=gs->pattern->index[iptr_ik];
444     if (gs->colorOf[k]<color) {
445     R1=x[k];
446     S1-=gs->factors->val[iptr_ik]*R1;
447     }
448     }
449     iptr_main=gs->main_iptr[i];
450     x[i]=(1/gs->factors->val[iptr_main])*S1;
451     }
452     }
453     } else if (n_block==2) {
454     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
455     for (i = 0; i < n; ++i) {
456     if (gs->colorOf[i]==color) {
457     /* x_i=x_i-a_ik*x_k */
458     S1=b[2*i];
459     S2=b[2*i+1];
460     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
461     k=gs->pattern->index[iptr_ik];
462     if (gs->colorOf[k]<color) {
463     R1=x[2*k];
464     R2=x[2*k+1];
465     S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
466     S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
467     }
468     }
469     iptr_main=gs->main_iptr[i];
470     A11=gs->factors->val[iptr_main*4];
471     A21=gs->factors->val[iptr_main*4+1];
472     A12=gs->factors->val[iptr_main*4+2];
473     A22=gs->factors->val[iptr_main*4+3];
474     D = A11*A22-A12*A21;
475     if (ABS(D)>0.) {
476     D=1./D;
477     S11= A22*D;
478     S21=-A21*D;
479     S12=-A12*D;
480     S22= A11*D;
481     x[2*i ]=S11*S1+S12*S2;
482     x[2*i+1]=S21*S1+S22*S2;
483     } else {
484     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
485     }
486     }
487    
488     }
489     } else if (n_block==3) {
490     #pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
491     for (i = 0; i < n; ++i) {
492     if (gs->colorOf[i]==color) {
493     /* x_i=x_i-a_ik*x_k */
494     S1=b[3*i];
495     S2=b[3*i+1];
496     S3=b[3*i+2];
497     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
498     k=gs->pattern->index[iptr_ik];
499     if (gs->colorOf[k]<color) {
500     R1=x[3*k];
501     R2=x[3*k+1];
502     R3=x[3*k+2];
503     S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
504     S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
505     S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
506     }
507     }
508     iptr_main=gs->main_iptr[i];
509     A11=gs->factors->val[iptr_main*9 ];
510     A21=gs->factors->val[iptr_main*9+1];
511     A31=gs->factors->val[iptr_main*9+2];
512     A12=gs->factors->val[iptr_main*9+3];
513     A22=gs->factors->val[iptr_main*9+4];
514     A32=gs->factors->val[iptr_main*9+5];
515     A13=gs->factors->val[iptr_main*9+6];
516     A23=gs->factors->val[iptr_main*9+7];
517     A33=gs->factors->val[iptr_main*9+8];
518     D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
519     if (ABS(D)>0.) {
520     D=1./D;
521     S11=(A22*A33-A23*A32)*D;
522     S21=(A31*A23-A21*A33)*D;
523     S31=(A21*A32-A31*A22)*D;
524     S12=(A13*A32-A12*A33)*D;
525     S22=(A11*A33-A31*A13)*D;
526     S32=(A12*A31-A11*A32)*D;
527     S13=(A12*A23-A13*A22)*D;
528     S23=(A13*A21-A11*A23)*D;
529     S33=(A11*A22-A12*A21)*D;
530     /* a_ik=a_ii^{-1}*a_ik */
531     x[3*i ]=S11*S1+S12*S2+S13*S3;
532     x[3*i+1]=S21*S1+S22*S2+S23*S3;
533     x[3*i+2]=S31*S1+S32*S2+S33*S3;
534     } else {
535     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
536     }
537     }
538     }
539     }
540     }
541    
542     /* Multipling with diag(A) */
543     Paso_Solver_applyBlockDiagonalMatrix(gs->n_block,gs->n,gs->diag,pivot,x,x);
544    
545     /* backward substitution */
546     for (color=(gs->num_colors)-1;color>-1;--color) {
547     if (n_block==1) {
548     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1)
549     for (i = 0; i < n; ++i) {
550     if (gs->colorOf[i]==color) {
551     /* x_i=x_i-a_ik*x_k */
552     S1=x[i];
553     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
554     k=gs->pattern->index[iptr_ik];
555     if (gs->colorOf[k]>color) {
556     R1=x[k];
557     S1-=gs->factors->val[iptr_ik]*R1;
558     }
559     }
560     /*x[i]=S1;*/
561     iptr_main=gs->main_iptr[i];
562     x[i]=(1/gs->factors->val[iptr_main])*S1;
563     }
564     }
565     } else if (n_block==2) {
566     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
567     for (i = 0; i < n; ++i) {
568     if (gs->colorOf[i]==color) {
569     /* x_i=x_i-a_ik*x_k */
570     S1=x[2*i];
571     S2=x[2*i+1];
572     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
573     k=gs->pattern->index[iptr_ik];
574     if (gs->colorOf[k]>color) {
575     R1=x[2*k];
576     R2=x[2*k+1];
577     S1-=gs->factors->val[4*iptr_ik ]*R1+gs->factors->val[4*iptr_ik+2]*R2;
578     S2-=gs->factors->val[4*iptr_ik+1]*R1+gs->factors->val[4*iptr_ik+3]*R2;
579     }
580     }
581     /*x[2*i]=S1;
582     x[2*i+1]=S2;*/
583     iptr_main=gs->main_iptr[i];
584     A11=gs->factors->val[iptr_main*4];
585     A21=gs->factors->val[iptr_main*4+1];
586     A12=gs->factors->val[iptr_main*4+2];
587     A22=gs->factors->val[iptr_main*4+3];
588     D = A11*A22-A12*A21;
589     if (ABS(D)>0.) {
590     D=1./D;
591     S11= A22*D;
592     S21=-A21*D;
593     S12=-A12*D;
594     S22= A11*D;
595     x[2*i ]=S11*S1+S12*S2;
596     x[2*i+1]=S21*S1+S22*S2;
597     } else {
598     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
599     }
600    
601     }
602     }
603     } else if (n_block==3) {
604     #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
605     for (i = 0; i < n; ++i) {
606     if (gs->colorOf[i]==color) {
607     /* x_i=x_i-a_ik*x_k */
608     S1=x[3*i ];
609     S2=x[3*i+1];
610     S3=x[3*i+2];
611     for (iptr_ik=gs->pattern->ptr[i];iptr_ik<gs->pattern->ptr[i+1]; ++iptr_ik) {
612     k=gs->pattern->index[iptr_ik];
613     if (gs->colorOf[k]>color) {
614     R1=x[3*k];
615     R2=x[3*k+1];
616     R3=x[3*k+2];
617     S1-=gs->factors->val[9*iptr_ik ]*R1+gs->factors->val[9*iptr_ik+3]*R2+gs->factors->val[9*iptr_ik+6]*R3;
618     S2-=gs->factors->val[9*iptr_ik+1]*R1+gs->factors->val[9*iptr_ik+4]*R2+gs->factors->val[9*iptr_ik+7]*R3;
619     S3-=gs->factors->val[9*iptr_ik+2]*R1+gs->factors->val[9*iptr_ik+5]*R2+gs->factors->val[9*iptr_ik+8]*R3;
620     }
621     }
622     /* x[3*i]=S1;
623     x[3*i+1]=S2;
624     x[3*i+2]=S3;
625     */ iptr_main=gs->main_iptr[i];
626     A11=gs->factors->val[iptr_main*9 ];
627     A21=gs->factors->val[iptr_main*9+1];
628     A31=gs->factors->val[iptr_main*9+2];
629     A12=gs->factors->val[iptr_main*9+3];
630     A22=gs->factors->val[iptr_main*9+4];
631     A32=gs->factors->val[iptr_main*9+5];
632     A13=gs->factors->val[iptr_main*9+6];
633     A23=gs->factors->val[iptr_main*9+7];
634     A33=gs->factors->val[iptr_main*9+8];
635     D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
636     if (ABS(D)>0.) {
637     D=1./D;
638     S11=(A22*A33-A23*A32)*D;
639     S21=(A31*A23-A21*A33)*D;
640     S31=(A21*A32-A31*A22)*D;
641     S12=(A13*A32-A12*A33)*D;
642     S22=(A11*A33-A31*A13)*D;
643     S32=(A12*A31-A11*A32)*D;
644     S13=(A12*A23-A13*A22)*D;
645     S23=(A13*A21-A11*A23)*D;
646     S33=(A11*A22-A12*A21)*D;
647     x[3*i ]=S11*S1+S12*S2+S13*S3;
648     x[3*i+1]=S21*S1+S22*S2+S23*S3;
649     x[3*i+2]=S31*S1+S32*S2+S33*S3;
650     } else {
651     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGS: non-regular main diagonal block.");
652     }
653     }
654     }
655     }
656     }
657    
658     return;
659     }
660    

  ViewVC Help
Powered by ViewVC 1.1.26