/[escript]/trunk/paso/src/Solver_ILU.c
ViewVC logotype

Annotation of /trunk/paso/src/Solver_ILU.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1028 - (hide annotations)
Wed Mar 14 00:15:24 2007 UTC (12 years, 6 months ago) by gross
File MIME type: text/plain
File size: 25935 byte(s)
modifications to be compliant with _WIN32. The substitutes for asinh, acosh, atanh are still missing (erf will through an exception)
1 jgs 150 /* $Id$ */
2    
3 dhawcroft 631
4     /*
5     ********************************************************************************
6 dhawcroft 633 * Copyright 2006 by ACcESS MNRF *
7 dhawcroft 631 * *
8     * http://www.access.edu.au *
9     * Primary Business: Queensland, Australia *
10     * Licensed under the Open Software License version 3.0 *
11     * http://www.opensource.org/licenses/osl-3.0.php *
12     ********************************************************************************
13     */
14    
15 jgs 150 /**************************************************************/
16    
17     /* Paso: ILU preconditioner with reordering */
18    
19     /**************************************************************/
20    
21     /* Copyrights by ACcESS Australia 2003,2004,2005 */
22     /* Author: gross@access.edu.au */
23    
24     /**************************************************************/
25    
26 gross 700 #include "Paso.h"
27 jgs 150 #include "Solver.h"
28 gross 700 #include "PasoUtil.h"
29 jgs 150
30     /**************************************************************/
31    
32     /* free all memory used by ILU */
33    
34     void Paso_Solver_ILU_free(Paso_Solver_ILU * in) {
35     if (in!=NULL) {
36 gross 431 MEMFREE(in->colorOf);
37     MEMFREE(in->factors);
38     MEMFREE(in->main_iptr);
39     Paso_SystemMatrixPattern_dealloc(in->pattern);
40 jgs 150 MEMFREE(in);
41     }
42     }
43    
44     /**************************************************************/
45    
46 gross 431 /* constructs the incomplete block factorization of
47 jgs 150
48     */
49 gross 431 Paso_Solver_ILU* Paso_Solver_getILU(Paso_SystemMatrix * A,bool_t verbose) {
50     dim_t n=A->num_rows;
51     dim_t n_block=A->row_block_size;
52 gross 1028 index_t num_colors=0, *mis_marker=NULL;
53 gross 431 register double A11,A12,A13,A21,A22,A23,A31,A32,A33,D;
54     register double mainA11,mainA12,mainA13,mainA21,mainA22,mainA23,mainA31,mainA32,mainA33;
55     register double S11,S12,S13,S21,S22,S23,S31,S32,S33;
56     register index_t i,iptr_main,iptr,iptr_ik,k,iptr_kj,j,iptr_ij,color,color2;
57     double time0,time_color,time_fac;
58     /* allocations: */
59     Paso_Solver_ILU* out=MEMALLOC(1,Paso_Solver_ILU);
60     if (Paso_checkPtr(out)) return NULL;
61 gross 1028 mis_marker=TMPMEMALLOC(n,index_t);
62 gross 431 out->colorOf=MEMALLOC(n,index_t);
63     out->factors=MEMALLOC(A->len,double);
64 gross 432 out->main_iptr=MEMALLOC(n,index_t);
65 gross 431 out->pattern=Paso_SystemMatrixPattern_reference(A->pattern);
66     out->n_block=n_block;
67     out->n=n;
68     time0=Paso_timer();
69     if ( !(Paso_checkPtr(mis_marker) ||
70     Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
71     /* get coloring */
72     index_t num_colors=0;
73     #pragma omp parallel for private(i) schedule(static)
74     for (i = 0; i < n; ++i) out->colorOf[i]=-1;
75     while (Paso_Util_isAny(n,out->colorOf,-1) && Paso_noError()) {
76     #pragma omp parallel for private(i) schedule(static)
77     for (i = 0; i < n; ++i) mis_marker[i]=out->colorOf[i];
78     Paso_SystemMatrixPattern_mis(A->pattern,mis_marker);
79 jgs 150
80 gross 431 #pragma omp parallel for private(i) schedule(static)
81     for (i = 0; i < n; ++i) if (mis_marker[i]) out->colorOf[i]=num_colors;
82     ++num_colors;
83     }
84     out->num_colors=num_colors;
85     time_color=Paso_timer()-time0;
86     time0=Paso_timer();
87     /* find main diagonal and copy matrix values */
88 gross 432 #pragma omp parallel for schedule(static) private(i,iptr,iptr_main,k)
89 gross 431 for (i = 0; i < n; ++i) {
90     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
91     iptr_main=A->pattern->ptr[0]-1;
92     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; iptr++) {
93     if (A->pattern->index[iptr]==i) iptr_main=iptr;
94     for (k=0;k<n_block*n_block;++k) out->factors[n_block*n_block*iptr+k]=A->val[n_block*n_block*iptr+k];
95     }
96     out->main_iptr[i]=iptr_main;
97     if (iptr_main==A->pattern->ptr[0]-1)
98     Paso_setError(VALUE_ERROR, "Paso_Solver_getILU: no main diagonal");
99     }
100     }
101     /* start factorization */
102 jgs 150
103 gross 432 #pragma omp barrier
104 gross 431 for (color=0;color<out->num_colors && Paso_noError();++color) {
105     if (n_block==1) {
106 gross 432 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,j,iptr_ij,A11,iptr_main,D)
107 jgs 150 for (i = 0; i < n; ++i) {
108 gross 431 if (out->colorOf[i]==color) {
109     for (color2=0;color2<color;++color2) {
110     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
111     k=A->pattern->index[iptr_ik];
112     if (out->colorOf[k]==color2) {
113     A11=out->factors[iptr_ik];
114     /* a_ij=a_ij-a_ik*a_kj */
115     for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
116     j=A->pattern->index[iptr_kj];
117     if (out->colorOf[j]>color2) {
118     S11=out->factors[iptr_kj];
119     for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
120     if (j==A->pattern->index[iptr_ij]) {
121     out->factors[iptr_ij]-=A11*S11;
122     break;
123     }
124     }
125     }
126     }
127     }
128     }
129     }
130     iptr_main=out->main_iptr[i];
131     D=out->factors[iptr_main];
132     if (ABS(D)>0.) {
133     D=1./D;
134     out->factors[iptr_main]=D;
135     /* a_ik=a_ii^{-1}*a_ik */
136     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
137     k=A->pattern->index[iptr_ik];
138     if (out->colorOf[k]>color) {
139     A11=out->factors[iptr_ik];
140     out->factors[iptr_ik]=A11*D;
141     }
142     }
143     } else {
144     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
145     }
146 jgs 150 }
147     }
148 gross 431 } else if (n_block==2) {
149 gross 432 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S12,S22,j,iptr_ij,A11,A21,A12,A22,iptr_main,D)
150 gross 431 for (i = 0; i < n; ++i) {
151     if (out->colorOf[i]==color) {
152     for (color2=0;color2<color;++color2) {
153     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
154     k=A->pattern->index[iptr_ik];
155     if (out->colorOf[k]==color2) {
156     A11=out->factors[iptr_ik*4 ];
157     A21=out->factors[iptr_ik*4+1];
158     A12=out->factors[iptr_ik*4+2];
159     A22=out->factors[iptr_ik*4+3];
160     /* a_ij=a_ij-a_ik*a_kj */
161     for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
162     j=A->pattern->index[iptr_kj];
163     if (out->colorOf[j]>color2) {
164     S11=out->factors[iptr_kj*4];
165     S21=out->factors[iptr_kj*4+1];
166     S12=out->factors[iptr_kj*4+2];
167     S22=out->factors[iptr_kj*4+3];
168     for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
169     if (j==A->pattern->index[iptr_ij]) {
170     out->factors[4*iptr_ij ]-=A11*S11+A12*S21;
171     out->factors[4*iptr_ij+1]-=A21*S11+A22*S21;
172     out->factors[4*iptr_ij+2]-=A11*S12+A12*S22;
173     out->factors[4*iptr_ij+3]-=A21*S12+A22*S22;
174     break;
175     }
176     }
177     }
178     }
179     }
180 jgs 150 }
181 gross 431 }
182     iptr_main=out->main_iptr[i];
183     A11=out->factors[iptr_main*4];
184     A21=out->factors[iptr_main*4+1];
185     A12=out->factors[iptr_main*4+2];
186     A22=out->factors[iptr_main*4+3];
187     D = A11*A22-A12*A21;
188     if (ABS(D)>0.) {
189     D=1./D;
190     S11= A22*D;
191     S21=-A21*D;
192     S12=-A12*D;
193     S22= A11*D;
194     out->factors[iptr_main*4] = S11;
195     out->factors[iptr_main*4+1]= S21;
196     out->factors[iptr_main*4+2]= S12;
197     out->factors[iptr_main*4+3]= S22;
198     /* a_ik=a_ii^{-1}*a_ik */
199     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
200     k=A->pattern->index[iptr_ik];
201     if (out->colorOf[k]>color) {
202     A11=out->factors[iptr_ik*4 ];
203     A21=out->factors[iptr_ik*4+1];
204     A12=out->factors[iptr_ik*4+2];
205     A22=out->factors[iptr_ik*4+3];
206     out->factors[4*iptr_ik ]=S11*A11+S12*A21;
207     out->factors[4*iptr_ik+1]=S21*A11+S22*A21;
208     out->factors[4*iptr_ik+2]=S11*A12+S12*A22;
209     out->factors[4*iptr_ik+3]=S21*A12+S22*A22;
210     }
211 jgs 150 }
212 gross 431 } else {
213     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
214     }
215     }
216 jgs 150 }
217 gross 431 } else if (n_block==3) {
218 gross 432 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S31,S12,S22,S32,S13,S23,S33,j,iptr_ij,A11,A21,A31,A12,A22,A32,A13,A23,A33,iptr_main,D)
219 gross 431 for (i = 0; i < n; ++i) {
220     if (out->colorOf[i]==color) {
221     for (color2=0;color2<color;++color2) {
222     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
223     k=A->pattern->index[iptr_ik];
224     if (out->colorOf[k]==color2) {
225     A11=out->factors[iptr_ik*9 ];
226     A21=out->factors[iptr_ik*9+1];
227     A31=out->factors[iptr_ik*9+2];
228     A12=out->factors[iptr_ik*9+3];
229     A22=out->factors[iptr_ik*9+4];
230     A32=out->factors[iptr_ik*9+5];
231     A13=out->factors[iptr_ik*9+6];
232     A23=out->factors[iptr_ik*9+7];
233     A33=out->factors[iptr_ik*9+8];
234     /* a_ij=a_ij-a_ik*a_kj */
235     for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
236     j=A->pattern->index[iptr_kj];
237     if (out->colorOf[j]>color2) {
238     S11=out->factors[iptr_kj*9 ];
239     S21=out->factors[iptr_kj*9+1];
240     S31=out->factors[iptr_kj*9+2];
241     S12=out->factors[iptr_kj*9+3];
242     S22=out->factors[iptr_kj*9+4];
243     S32=out->factors[iptr_kj*9+5];
244     S13=out->factors[iptr_kj*9+6];
245     S23=out->factors[iptr_kj*9+7];
246     S33=out->factors[iptr_kj*9+8];
247     for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
248     if (j==A->pattern->index[iptr_ij]) {
249     out->factors[iptr_ij*9 ]-=A11*S11+A12*S21+A13*S31;
250     out->factors[iptr_ij*9+1]-=A21*S11+A22*S21+A23*S31;
251     out->factors[iptr_ij*9+2]-=A31*S11+A32*S21+A33*S31;
252     out->factors[iptr_ij*9+3]-=A11*S12+A12*S22+A13*S32;
253     out->factors[iptr_ij*9+4]-=A21*S12+A22*S22+A23*S32;
254     out->factors[iptr_ij*9+5]-=A31*S12+A32*S22+A33*S32;
255     out->factors[iptr_ij*9+6]-=A11*S13+A12*S23+A13*S33;
256     out->factors[iptr_ij*9+7]-=A21*S13+A22*S23+A23*S33;
257     out->factors[iptr_ij*9+8]-=A31*S13+A32*S23+A33*S33;
258     break;
259     }
260     }
261     }
262 jgs 150 }
263     }
264 gross 431 }
265     }
266     iptr_main=out->main_iptr[i];
267     A11=out->factors[iptr_main*9 ];
268     A21=out->factors[iptr_main*9+1];
269     A31=out->factors[iptr_main*9+2];
270     A12=out->factors[iptr_main*9+3];
271     A22=out->factors[iptr_main*9+4];
272     A32=out->factors[iptr_main*9+5];
273     A13=out->factors[iptr_main*9+6];
274     A23=out->factors[iptr_main*9+7];
275     A33=out->factors[iptr_main*9+8];
276     D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
277     if (ABS(D)>0.) {
278     D=1./D;
279     S11=(A22*A33-A23*A32)*D;
280     S21=(A31*A23-A21*A33)*D;
281     S31=(A21*A32-A31*A22)*D;
282     S12=(A13*A32-A12*A33)*D;
283     S22=(A11*A33-A31*A13)*D;
284     S32=(A12*A31-A11*A32)*D;
285     S13=(A12*A23-A13*A22)*D;
286     S23=(A13*A21-A11*A23)*D;
287     S33=(A11*A22-A12*A21)*D;
288    
289     out->factors[iptr_main*9 ]=S11;
290     out->factors[iptr_main*9+1]=S21;
291     out->factors[iptr_main*9+2]=S31;
292     out->factors[iptr_main*9+3]=S12;
293     out->factors[iptr_main*9+4]=S22;
294     out->factors[iptr_main*9+5]=S32;
295     out->factors[iptr_main*9+6]=S13;
296     out->factors[iptr_main*9+7]=S23;
297     out->factors[iptr_main*9+8]=S33;
298    
299     /* a_ik=a_ii^{-1}*a_ik */
300     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
301     k=A->pattern->index[iptr_ik];
302     if (out->colorOf[k]>color) {
303     A11=out->factors[iptr_ik*9 ];
304     A21=out->factors[iptr_ik*9+1];
305     A31=out->factors[iptr_ik*9+2];
306     A12=out->factors[iptr_ik*9+3];
307     A22=out->factors[iptr_ik*9+4];
308     A32=out->factors[iptr_ik*9+5];
309     A13=out->factors[iptr_ik*9+6];
310     A23=out->factors[iptr_ik*9+7];
311     A33=out->factors[iptr_ik*9+8];
312     out->factors[iptr_ik*9 ]=S11*A11+S12*A21+S13*A31;
313     out->factors[iptr_ik*9+1]=S21*A11+S22*A21+S23*A31;
314     out->factors[iptr_ik*9+2]=S31*A11+S32*A21+S33*A31;
315     out->factors[iptr_ik*9+3]=S11*A12+S12*A22+S13*A32;
316     out->factors[iptr_ik*9+4]=S21*A12+S22*A22+S23*A32;
317     out->factors[iptr_ik*9+5]=S31*A12+S32*A22+S33*A32;
318     out->factors[iptr_ik*9+6]=S11*A13+S12*A23+S13*A33;
319     out->factors[iptr_ik*9+7]=S21*A13+S22*A23+S23*A33;
320     out->factors[iptr_ik*9+8]=S31*A13+S32*A23+S33*A33;
321     }
322     }
323     } else {
324     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
325     }
326 jgs 150 }
327     }
328 gross 431 } else {
329     Paso_setError(VALUE_ERROR, "Paso_Solver_getILU: block size greater than 3 is not supported.");
330     }
331 gross 432 #pragma omp barrier
332 jgs 150 }
333 gross 431 time_fac=Paso_timer()-time0;
334 jgs 150 }
335 gross 431
336 jgs 150 TMPMEMFREE(mis_marker);
337     if (Paso_noError()) {
338     if (verbose) {
339 gross 431 printf("ILU: %d color used \n",out->num_colors);
340     printf("timing: ILU: coloring/elemination : %e/%e\n",time_color,time_fac);
341 jgs 150 }
342     return out;
343     } else {
344     Paso_Solver_ILU_free(out);
345     return NULL;
346     }
347     }
348    
349     /**************************************************************/
350    
351     /* apply ILU precondition b-> x
352    
353 gross 431 in fact it solves LUx=b in the form x= U^{-1} L^{-1}b
354 jgs 150
355     should be called within a parallel region
356     barrier synconization should be performed to make sure that the input vector available
357    
358     */
359    
360     void Paso_Solver_solveILU(Paso_Solver_ILU * ilu, double * x, double * b) {
361 gross 431 register dim_t i,k;
362     register index_t color,iptr_ik,iptr_main;
363     register double S1,S2,S3,R1,R2,R3;
364 jgs 150 dim_t n_block=ilu->n_block;
365 gross 431 dim_t n=ilu->n;
366 jgs 150
367 gross 431
368     /* copy x into b*/
369     #pragma omp for private(i) schedule(static)
370     for (i=0;i<n*n_block;++i) x[i]=b[i];
371     /* forward substitution */
372     for (color=0;color<ilu->num_colors;++color) {
373     if (n_block==1) {
374 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
375 gross 431 for (i = 0; i < n; ++i) {
376     if (ilu->colorOf[i]==color) {
377     /* x_i=x_i-a_ik*x_k */
378     S1=x[i];
379     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
380     k=ilu->pattern->index[iptr_ik];
381     if (ilu->colorOf[k]<color) {
382     R1=x[k];
383     S1-=ilu->factors[iptr_ik]*R1;
384     }
385     }
386     iptr_main=ilu->main_iptr[i];
387     x[i]=ilu->factors[iptr_main]*S1;
388     }
389 jgs 150 }
390 gross 431 } else if (n_block==2) {
391 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
392 gross 431 for (i = 0; i < n; ++i) {
393     if (ilu->colorOf[i]==color) {
394     /* x_i=x_i-a_ik*x_k */
395     S1=x[2*i];
396     S2=x[2*i+1];
397     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
398     k=ilu->pattern->index[iptr_ik];
399     if (ilu->colorOf[k]<color) {
400     R1=x[2*k];
401     R2=x[2*k+1];
402     S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
403     S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
404     }
405     }
406     iptr_main=ilu->main_iptr[i];
407     x[2*i ]=ilu->factors[4*iptr_main ]*S1+ilu->factors[4*iptr_main+2]*S2;
408     x[2*i+1]=ilu->factors[4*iptr_main+1]*S1+ilu->factors[4*iptr_main+3]*S2;
409     }
410    
411     }
412     } else if (n_block==3) {
413 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
414 gross 431 for (i = 0; i < n; ++i) {
415     if (ilu->colorOf[i]==color) {
416     /* x_i=x_i-a_ik*x_k */
417     S1=x[3*i];
418     S2=x[3*i+1];
419     S3=x[3*i+2];
420     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
421     k=ilu->pattern->index[iptr_ik];
422     if (ilu->colorOf[k]<color) {
423     R1=x[3*k];
424     R2=x[3*k+1];
425     R3=x[3*k+2];
426     S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
427     S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
428     S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
429     }
430     }
431     iptr_main=ilu->main_iptr[i];
432     x[3*i ]=ilu->factors[9*iptr_main ]*S1+ilu->factors[9*iptr_main+3]*S2+ilu->factors[9*iptr_main+6]*S3;
433     x[3*i+1]=ilu->factors[9*iptr_main+1]*S1+ilu->factors[9*iptr_main+4]*S2+ilu->factors[9*iptr_main+7]*S3;
434     x[3*i+2]=ilu->factors[9*iptr_main+2]*S1+ilu->factors[9*iptr_main+5]*S2+ilu->factors[9*iptr_main+8]*S3;
435 jgs 150 }
436 gross 431 }
437 jgs 150 }
438 gross 431 #pragma omp barrier
439 jgs 150 }
440 gross 431 /* backward substitution */
441     for (color=(ilu->num_colors)-1;color>-1;--color) {
442     if (n_block==1) {
443 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,R1)
444 gross 431 for (i = 0; i < n; ++i) {
445     if (ilu->colorOf[i]==color) {
446     /* x_i=x_i-a_ik*x_k */
447     S1=x[i];
448     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
449     k=ilu->pattern->index[iptr_ik];
450     if (ilu->colorOf[k]>color) {
451     R1=x[k];
452     S1-=ilu->factors[iptr_ik]*R1;
453     }
454     }
455     x[i]=S1;
456     }
457     }
458     } else if (n_block==2) {
459 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
460 gross 431 for (i = 0; i < n; ++i) {
461     if (ilu->colorOf[i]==color) {
462     /* x_i=x_i-a_ik*x_k */
463     S1=x[2*i];
464     S2=x[2*i+1];
465     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
466     k=ilu->pattern->index[iptr_ik];
467     if (ilu->colorOf[k]>color) {
468     R1=x[2*k];
469     R2=x[2*k+1];
470     S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
471     S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
472     }
473     }
474     x[2*i]=S1;
475     x[2*i+1]=S2;
476     }
477     }
478     } else if (n_block==3) {
479 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
480 gross 431 for (i = 0; i < n; ++i) {
481     if (ilu->colorOf[i]==color) {
482     /* x_i=x_i-a_ik*x_k */
483     S1=x[3*i ];
484     S2=x[3*i+1];
485     S3=x[3*i+2];
486     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
487     k=ilu->pattern->index[iptr_ik];
488     if (ilu->colorOf[k]>color) {
489     R1=x[3*k];
490     R2=x[3*k+1];
491     R3=x[3*k+2];
492     S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
493     S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
494     S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
495     }
496     }
497     x[3*i]=S1;
498     x[3*i+1]=S2;
499     x[3*i+2]=S3;
500     }
501     }
502     }
503     #pragma omp barrier
504     }
505 jgs 150 return;
506     }
507     /*
508     * $Log$
509     * Revision 1.2 2005/09/15 03:44:40 jgs
510     * Merge of development branch dev-02 back to main trunk on 2005-09-15
511     *
512     * Revision 1.1.2.1 2005/09/05 06:29:50 gross
513     * These files have been extracted from finley to define a stand alone libray for iterative
514     * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
515     * has not been tested yet.
516     *
517     *
518     */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26