/[escript]/trunk/paso/src/Solver_ILU.c
ViewVC logotype

Annotation of /trunk/paso/src/Solver_ILU.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 483 - (hide annotations)
Thu Feb 2 02:10:15 2006 UTC (13 years, 9 months ago) by jgs
Original Path: trunk/paso/src/Solvers/Solver_ILU.c
File MIME type: text/plain
File size: 25280 byte(s)
change includes to use PasoUtil.h, and add remainder
of includes to SConscript include install

1 jgs 150 /* $Id$ */
2    
3     /**************************************************************/
4    
5     /* Paso: ILU preconditioner with reordering */
6    
7     /**************************************************************/
8    
9     /* Copyrights by ACcESS Australia 2003,2004,2005 */
10     /* Author: gross@access.edu.au */
11    
12     /**************************************************************/
13    
14     #include "Paso.h"
15     #include "Solver.h"
16 jgs 483 #include "PasoUtil.h"
17 jgs 150
18     /**************************************************************/
19    
20     /* free all memory used by ILU */
21    
22     void Paso_Solver_ILU_free(Paso_Solver_ILU * in) {
23     if (in!=NULL) {
24 gross 431 MEMFREE(in->colorOf);
25     MEMFREE(in->factors);
26     MEMFREE(in->main_iptr);
27     Paso_SystemMatrixPattern_dealloc(in->pattern);
28 jgs 150 MEMFREE(in);
29     }
30     }
31    
32     /**************************************************************/
33    
34 gross 431 /* constructs the incomplete block factorization of
35 jgs 150
36     */
37 gross 431 Paso_Solver_ILU* Paso_Solver_getILU(Paso_SystemMatrix * A,bool_t verbose) {
38     dim_t n=A->num_rows;
39     dim_t n_block=A->row_block_size;
40     index_t num_colors=0;
41     register double A11,A12,A13,A21,A22,A23,A31,A32,A33,D;
42     register double mainA11,mainA12,mainA13,mainA21,mainA22,mainA23,mainA31,mainA32,mainA33;
43     register double S11,S12,S13,S21,S22,S23,S31,S32,S33;
44     register index_t i,iptr_main,iptr,iptr_ik,k,iptr_kj,j,iptr_ij,color,color2;
45     double time0,time_color,time_fac;
46     /* allocations: */
47     Paso_Solver_ILU* out=MEMALLOC(1,Paso_Solver_ILU);
48     if (Paso_checkPtr(out)) return NULL;
49     index_t* mis_marker=TMPMEMALLOC(n,index_t);
50     out->colorOf=MEMALLOC(n,index_t);
51     out->factors=MEMALLOC(A->len,double);
52 gross 432 out->main_iptr=MEMALLOC(n,index_t);
53 gross 431 out->pattern=Paso_SystemMatrixPattern_reference(A->pattern);
54     out->n_block=n_block;
55     out->n=n;
56     time0=Paso_timer();
57     if ( !(Paso_checkPtr(mis_marker) ||
58     Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
59     /* get coloring */
60     index_t num_colors=0;
61     #pragma omp parallel for private(i) schedule(static)
62     for (i = 0; i < n; ++i) out->colorOf[i]=-1;
63     while (Paso_Util_isAny(n,out->colorOf,-1) && Paso_noError()) {
64     #pragma omp parallel for private(i) schedule(static)
65     for (i = 0; i < n; ++i) mis_marker[i]=out->colorOf[i];
66     Paso_SystemMatrixPattern_mis(A->pattern,mis_marker);
67 jgs 150
68 gross 431 #pragma omp parallel for private(i) schedule(static)
69     for (i = 0; i < n; ++i) if (mis_marker[i]) out->colorOf[i]=num_colors;
70     ++num_colors;
71     }
72     out->num_colors=num_colors;
73     time_color=Paso_timer()-time0;
74     time0=Paso_timer();
75     /* find main diagonal and copy matrix values */
76 gross 432 #pragma omp parallel for schedule(static) private(i,iptr,iptr_main,k)
77 gross 431 for (i = 0; i < n; ++i) {
78     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
79     iptr_main=A->pattern->ptr[0]-1;
80     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; iptr++) {
81     if (A->pattern->index[iptr]==i) iptr_main=iptr;
82     for (k=0;k<n_block*n_block;++k) out->factors[n_block*n_block*iptr+k]=A->val[n_block*n_block*iptr+k];
83     }
84     out->main_iptr[i]=iptr_main;
85     if (iptr_main==A->pattern->ptr[0]-1)
86     Paso_setError(VALUE_ERROR, "Paso_Solver_getILU: no main diagonal");
87     }
88     }
89     /* start factorization */
90 jgs 150
91 gross 432 #pragma omp barrier
92 gross 431 for (color=0;color<out->num_colors && Paso_noError();++color) {
93     if (n_block==1) {
94 gross 432 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,j,iptr_ij,A11,iptr_main,D)
95 jgs 150 for (i = 0; i < n; ++i) {
96 gross 431 if (out->colorOf[i]==color) {
97     for (color2=0;color2<color;++color2) {
98     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
99     k=A->pattern->index[iptr_ik];
100     if (out->colorOf[k]==color2) {
101     A11=out->factors[iptr_ik];
102     /* a_ij=a_ij-a_ik*a_kj */
103     for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
104     j=A->pattern->index[iptr_kj];
105     if (out->colorOf[j]>color2) {
106     S11=out->factors[iptr_kj];
107     for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
108     if (j==A->pattern->index[iptr_ij]) {
109     out->factors[iptr_ij]-=A11*S11;
110     break;
111     }
112     }
113     }
114     }
115     }
116     }
117     }
118     iptr_main=out->main_iptr[i];
119     D=out->factors[iptr_main];
120     if (ABS(D)>0.) {
121     D=1./D;
122     out->factors[iptr_main]=D;
123     /* a_ik=a_ii^{-1}*a_ik */
124     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
125     k=A->pattern->index[iptr_ik];
126     if (out->colorOf[k]>color) {
127     A11=out->factors[iptr_ik];
128     out->factors[iptr_ik]=A11*D;
129     }
130     }
131     } else {
132     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
133     }
134 jgs 150 }
135     }
136 gross 431 } else if (n_block==2) {
137 gross 432 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S12,S22,j,iptr_ij,A11,A21,A12,A22,iptr_main,D)
138 gross 431 for (i = 0; i < n; ++i) {
139     if (out->colorOf[i]==color) {
140     for (color2=0;color2<color;++color2) {
141     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
142     k=A->pattern->index[iptr_ik];
143     if (out->colorOf[k]==color2) {
144     A11=out->factors[iptr_ik*4 ];
145     A21=out->factors[iptr_ik*4+1];
146     A12=out->factors[iptr_ik*4+2];
147     A22=out->factors[iptr_ik*4+3];
148     /* a_ij=a_ij-a_ik*a_kj */
149     for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
150     j=A->pattern->index[iptr_kj];
151     if (out->colorOf[j]>color2) {
152     S11=out->factors[iptr_kj*4];
153     S21=out->factors[iptr_kj*4+1];
154     S12=out->factors[iptr_kj*4+2];
155     S22=out->factors[iptr_kj*4+3];
156     for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
157     if (j==A->pattern->index[iptr_ij]) {
158     out->factors[4*iptr_ij ]-=A11*S11+A12*S21;
159     out->factors[4*iptr_ij+1]-=A21*S11+A22*S21;
160     out->factors[4*iptr_ij+2]-=A11*S12+A12*S22;
161     out->factors[4*iptr_ij+3]-=A21*S12+A22*S22;
162     break;
163     }
164     }
165     }
166     }
167     }
168 jgs 150 }
169 gross 431 }
170     iptr_main=out->main_iptr[i];
171     A11=out->factors[iptr_main*4];
172     A21=out->factors[iptr_main*4+1];
173     A12=out->factors[iptr_main*4+2];
174     A22=out->factors[iptr_main*4+3];
175     D = A11*A22-A12*A21;
176     if (ABS(D)>0.) {
177     D=1./D;
178     S11= A22*D;
179     S21=-A21*D;
180     S12=-A12*D;
181     S22= A11*D;
182     out->factors[iptr_main*4] = S11;
183     out->factors[iptr_main*4+1]= S21;
184     out->factors[iptr_main*4+2]= S12;
185     out->factors[iptr_main*4+3]= S22;
186     /* a_ik=a_ii^{-1}*a_ik */
187     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
188     k=A->pattern->index[iptr_ik];
189     if (out->colorOf[k]>color) {
190     A11=out->factors[iptr_ik*4 ];
191     A21=out->factors[iptr_ik*4+1];
192     A12=out->factors[iptr_ik*4+2];
193     A22=out->factors[iptr_ik*4+3];
194     out->factors[4*iptr_ik ]=S11*A11+S12*A21;
195     out->factors[4*iptr_ik+1]=S21*A11+S22*A21;
196     out->factors[4*iptr_ik+2]=S11*A12+S12*A22;
197     out->factors[4*iptr_ik+3]=S21*A12+S22*A22;
198     }
199 jgs 150 }
200 gross 431 } else {
201     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
202     }
203     }
204 jgs 150 }
205 gross 431 } else if (n_block==3) {
206 gross 432 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S31,S12,S22,S32,S13,S23,S33,j,iptr_ij,A11,A21,A31,A12,A22,A32,A13,A23,A33,iptr_main,D)
207 gross 431 for (i = 0; i < n; ++i) {
208     if (out->colorOf[i]==color) {
209     for (color2=0;color2<color;++color2) {
210     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
211     k=A->pattern->index[iptr_ik];
212     if (out->colorOf[k]==color2) {
213     A11=out->factors[iptr_ik*9 ];
214     A21=out->factors[iptr_ik*9+1];
215     A31=out->factors[iptr_ik*9+2];
216     A12=out->factors[iptr_ik*9+3];
217     A22=out->factors[iptr_ik*9+4];
218     A32=out->factors[iptr_ik*9+5];
219     A13=out->factors[iptr_ik*9+6];
220     A23=out->factors[iptr_ik*9+7];
221     A33=out->factors[iptr_ik*9+8];
222     /* a_ij=a_ij-a_ik*a_kj */
223     for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
224     j=A->pattern->index[iptr_kj];
225     if (out->colorOf[j]>color2) {
226     S11=out->factors[iptr_kj*9 ];
227     S21=out->factors[iptr_kj*9+1];
228     S31=out->factors[iptr_kj*9+2];
229     S12=out->factors[iptr_kj*9+3];
230     S22=out->factors[iptr_kj*9+4];
231     S32=out->factors[iptr_kj*9+5];
232     S13=out->factors[iptr_kj*9+6];
233     S23=out->factors[iptr_kj*9+7];
234     S33=out->factors[iptr_kj*9+8];
235     for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
236     if (j==A->pattern->index[iptr_ij]) {
237     out->factors[iptr_ij*9 ]-=A11*S11+A12*S21+A13*S31;
238     out->factors[iptr_ij*9+1]-=A21*S11+A22*S21+A23*S31;
239     out->factors[iptr_ij*9+2]-=A31*S11+A32*S21+A33*S31;
240     out->factors[iptr_ij*9+3]-=A11*S12+A12*S22+A13*S32;
241     out->factors[iptr_ij*9+4]-=A21*S12+A22*S22+A23*S32;
242     out->factors[iptr_ij*9+5]-=A31*S12+A32*S22+A33*S32;
243     out->factors[iptr_ij*9+6]-=A11*S13+A12*S23+A13*S33;
244     out->factors[iptr_ij*9+7]-=A21*S13+A22*S23+A23*S33;
245     out->factors[iptr_ij*9+8]-=A31*S13+A32*S23+A33*S33;
246     break;
247     }
248     }
249     }
250 jgs 150 }
251     }
252 gross 431 }
253     }
254     iptr_main=out->main_iptr[i];
255     A11=out->factors[iptr_main*9 ];
256     A21=out->factors[iptr_main*9+1];
257     A31=out->factors[iptr_main*9+2];
258     A12=out->factors[iptr_main*9+3];
259     A22=out->factors[iptr_main*9+4];
260     A32=out->factors[iptr_main*9+5];
261     A13=out->factors[iptr_main*9+6];
262     A23=out->factors[iptr_main*9+7];
263     A33=out->factors[iptr_main*9+8];
264     D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
265     if (ABS(D)>0.) {
266     D=1./D;
267     S11=(A22*A33-A23*A32)*D;
268     S21=(A31*A23-A21*A33)*D;
269     S31=(A21*A32-A31*A22)*D;
270     S12=(A13*A32-A12*A33)*D;
271     S22=(A11*A33-A31*A13)*D;
272     S32=(A12*A31-A11*A32)*D;
273     S13=(A12*A23-A13*A22)*D;
274     S23=(A13*A21-A11*A23)*D;
275     S33=(A11*A22-A12*A21)*D;
276    
277     out->factors[iptr_main*9 ]=S11;
278     out->factors[iptr_main*9+1]=S21;
279     out->factors[iptr_main*9+2]=S31;
280     out->factors[iptr_main*9+3]=S12;
281     out->factors[iptr_main*9+4]=S22;
282     out->factors[iptr_main*9+5]=S32;
283     out->factors[iptr_main*9+6]=S13;
284     out->factors[iptr_main*9+7]=S23;
285     out->factors[iptr_main*9+8]=S33;
286    
287     /* a_ik=a_ii^{-1}*a_ik */
288     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
289     k=A->pattern->index[iptr_ik];
290     if (out->colorOf[k]>color) {
291     A11=out->factors[iptr_ik*9 ];
292     A21=out->factors[iptr_ik*9+1];
293     A31=out->factors[iptr_ik*9+2];
294     A12=out->factors[iptr_ik*9+3];
295     A22=out->factors[iptr_ik*9+4];
296     A32=out->factors[iptr_ik*9+5];
297     A13=out->factors[iptr_ik*9+6];
298     A23=out->factors[iptr_ik*9+7];
299     A33=out->factors[iptr_ik*9+8];
300     out->factors[iptr_ik*9 ]=S11*A11+S12*A21+S13*A31;
301     out->factors[iptr_ik*9+1]=S21*A11+S22*A21+S23*A31;
302     out->factors[iptr_ik*9+2]=S31*A11+S32*A21+S33*A31;
303     out->factors[iptr_ik*9+3]=S11*A12+S12*A22+S13*A32;
304     out->factors[iptr_ik*9+4]=S21*A12+S22*A22+S23*A32;
305     out->factors[iptr_ik*9+5]=S31*A12+S32*A22+S33*A32;
306     out->factors[iptr_ik*9+6]=S11*A13+S12*A23+S13*A33;
307     out->factors[iptr_ik*9+7]=S21*A13+S22*A23+S23*A33;
308     out->factors[iptr_ik*9+8]=S31*A13+S32*A23+S33*A33;
309     }
310     }
311     } else {
312     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
313     }
314 jgs 150 }
315     }
316 gross 431 } else {
317     Paso_setError(VALUE_ERROR, "Paso_Solver_getILU: block size greater than 3 is not supported.");
318     }
319 gross 432 #pragma omp barrier
320 jgs 150 }
321 gross 431 time_fac=Paso_timer()-time0;
322 jgs 150 }
323 gross 431
324 jgs 150 TMPMEMFREE(mis_marker);
325     if (Paso_noError()) {
326     if (verbose) {
327 gross 431 printf("ILU: %d color used \n",out->num_colors);
328     printf("timing: ILU: coloring/elemination : %e/%e\n",time_color,time_fac);
329 jgs 150 }
330     return out;
331     } else {
332     Paso_Solver_ILU_free(out);
333     return NULL;
334     }
335     }
336    
337     /**************************************************************/
338    
339     /* apply ILU precondition b-> x
340    
341 gross 431 in fact it solves LUx=b in the form x= U^{-1} L^{-1}b
342 jgs 150
343     should be called within a parallel region
344     barrier synconization should be performed to make sure that the input vector available
345    
346     */
347    
348     void Paso_Solver_solveILU(Paso_Solver_ILU * ilu, double * x, double * b) {
349 gross 431 register dim_t i,k;
350     register index_t color,iptr_ik,iptr_main;
351     register double S1,S2,S3,R1,R2,R3;
352 jgs 150 dim_t n_block=ilu->n_block;
353 gross 431 dim_t n=ilu->n;
354 jgs 150
355 gross 431
356     /* copy x into b*/
357     #pragma omp for private(i) schedule(static)
358     for (i=0;i<n*n_block;++i) x[i]=b[i];
359     /* forward substitution */
360     for (color=0;color<ilu->num_colors;++color) {
361     if (n_block==1) {
362 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
363 gross 431 for (i = 0; i < n; ++i) {
364     if (ilu->colorOf[i]==color) {
365     /* x_i=x_i-a_ik*x_k */
366     S1=x[i];
367     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
368     k=ilu->pattern->index[iptr_ik];
369     if (ilu->colorOf[k]<color) {
370     R1=x[k];
371     S1-=ilu->factors[iptr_ik]*R1;
372     }
373     }
374     iptr_main=ilu->main_iptr[i];
375     x[i]=ilu->factors[iptr_main]*S1;
376     }
377 jgs 150 }
378 gross 431 } else if (n_block==2) {
379 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
380 gross 431 for (i = 0; i < n; ++i) {
381     if (ilu->colorOf[i]==color) {
382     /* x_i=x_i-a_ik*x_k */
383     S1=x[2*i];
384     S2=x[2*i+1];
385     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
386     k=ilu->pattern->index[iptr_ik];
387     if (ilu->colorOf[k]<color) {
388     R1=x[2*k];
389     R2=x[2*k+1];
390     S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
391     S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
392     }
393     }
394     iptr_main=ilu->main_iptr[i];
395     x[2*i ]=ilu->factors[4*iptr_main ]*S1+ilu->factors[4*iptr_main+2]*S2;
396     x[2*i+1]=ilu->factors[4*iptr_main+1]*S1+ilu->factors[4*iptr_main+3]*S2;
397     }
398    
399     }
400     } else if (n_block==3) {
401 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
402 gross 431 for (i = 0; i < n; ++i) {
403     if (ilu->colorOf[i]==color) {
404     /* x_i=x_i-a_ik*x_k */
405     S1=x[3*i];
406     S2=x[3*i+1];
407     S3=x[3*i+2];
408     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
409     k=ilu->pattern->index[iptr_ik];
410     if (ilu->colorOf[k]<color) {
411     R1=x[3*k];
412     R2=x[3*k+1];
413     R3=x[3*k+2];
414     S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
415     S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
416     S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
417     }
418     }
419     iptr_main=ilu->main_iptr[i];
420     x[3*i ]=ilu->factors[9*iptr_main ]*S1+ilu->factors[9*iptr_main+3]*S2+ilu->factors[9*iptr_main+6]*S3;
421     x[3*i+1]=ilu->factors[9*iptr_main+1]*S1+ilu->factors[9*iptr_main+4]*S2+ilu->factors[9*iptr_main+7]*S3;
422     x[3*i+2]=ilu->factors[9*iptr_main+2]*S1+ilu->factors[9*iptr_main+5]*S2+ilu->factors[9*iptr_main+8]*S3;
423 jgs 150 }
424 gross 431 }
425 jgs 150 }
426 gross 431 #pragma omp barrier
427 jgs 150 }
428 gross 431 /* backward substitution */
429     for (color=(ilu->num_colors)-1;color>-1;--color) {
430     if (n_block==1) {
431 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,R1)
432 gross 431 for (i = 0; i < n; ++i) {
433     if (ilu->colorOf[i]==color) {
434     /* x_i=x_i-a_ik*x_k */
435     S1=x[i];
436     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
437     k=ilu->pattern->index[iptr_ik];
438     if (ilu->colorOf[k]>color) {
439     R1=x[k];
440     S1-=ilu->factors[iptr_ik]*R1;
441     }
442     }
443     x[i]=S1;
444     }
445     }
446     } else if (n_block==2) {
447 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
448 gross 431 for (i = 0; i < n; ++i) {
449     if (ilu->colorOf[i]==color) {
450     /* x_i=x_i-a_ik*x_k */
451     S1=x[2*i];
452     S2=x[2*i+1];
453     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
454     k=ilu->pattern->index[iptr_ik];
455     if (ilu->colorOf[k]>color) {
456     R1=x[2*k];
457     R2=x[2*k+1];
458     S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
459     S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
460     }
461     }
462     x[2*i]=S1;
463     x[2*i+1]=S2;
464     }
465     }
466     } else if (n_block==3) {
467 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
468 gross 431 for (i = 0; i < n; ++i) {
469     if (ilu->colorOf[i]==color) {
470     /* x_i=x_i-a_ik*x_k */
471     S1=x[3*i ];
472     S2=x[3*i+1];
473     S3=x[3*i+2];
474     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
475     k=ilu->pattern->index[iptr_ik];
476     if (ilu->colorOf[k]>color) {
477     R1=x[3*k];
478     R2=x[3*k+1];
479     R3=x[3*k+2];
480     S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
481     S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
482     S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
483     }
484     }
485     x[3*i]=S1;
486     x[3*i+1]=S2;
487     x[3*i+2]=S3;
488     }
489     }
490     }
491     #pragma omp barrier
492     }
493 jgs 150 return;
494     }
495     /*
496     * $Log$
497     * Revision 1.2 2005/09/15 03:44:40 jgs
498     * Merge of development branch dev-02 back to main trunk on 2005-09-15
499     *
500     * Revision 1.1.2.1 2005/09/05 06:29:50 gross
501     * These files have been extracted from finley to define a stand alone libray for iterative
502     * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
503     * has not been tested yet.
504     *
505     *
506     */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26