/[escript]/trunk/paso/src/Solvers/Solver_ILU.c
ViewVC logotype

Annotation of /trunk/paso/src/Solvers/Solver_ILU.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 682 - (hide annotations)
Mon Mar 27 02:43:09 2006 UTC (14 years, 2 months ago) by robwdcock
File MIME type: text/plain
File size: 25932 byte(s)
+ NEW BUILD SYSTEM

This commit contains the new build system with cross-platform support.
Most things work are before though you can have more control.

ENVIRONMENT settings have changed:
+ You no longer require LD_LIBRARY_PATH or PYTHONPATH to point to the
esysroot for building and testing performed via scons
+ ACcESS altix users: It is recommended you change your modules to load
the latest intel compiler and other libraries required by boost to match
the setup in svn (you can override). The correct modules are as follows

module load intel_cc.9.0.026
export
MODULEPATH=${MODULEPATH}:/data/raid2/toolspp4/modulefiles/gcc-3.3.6
module load boost/1.33.0/python-2.4.1
module load python/2.4.1
module load numarray/1.3.3


1 jgs 150 /* $Id$ */
2    
3 dhawcroft 631
4     /*
5     ********************************************************************************
6 dhawcroft 633 * Copyright 2006 by ACcESS MNRF *
7 dhawcroft 631 * *
8     * http://www.access.edu.au *
9     * Primary Business: Queensland, Australia *
10     * Licensed under the Open Software License version 3.0 *
11     * http://www.opensource.org/licenses/osl-3.0.php *
12     ********************************************************************************
13     */
14    
15 jgs 150 /**************************************************************/
16    
17     /* Paso: ILU preconditioner with reordering */
18    
19     /**************************************************************/
20    
21     /* Copyrights by ACcESS Australia 2003,2004,2005 */
22     /* Author: gross@access.edu.au */
23    
24     /**************************************************************/
25    
26 robwdcock 682 #include "../Paso.h"
27 jgs 150 #include "Solver.h"
28 robwdcock 682 #include "../PasoUtil.h"
29 jgs 150
30     /**************************************************************/
31    
32     /* free all memory used by ILU */
33    
34     void Paso_Solver_ILU_free(Paso_Solver_ILU * in) {
35     if (in!=NULL) {
36 gross 431 MEMFREE(in->colorOf);
37     MEMFREE(in->factors);
38     MEMFREE(in->main_iptr);
39     Paso_SystemMatrixPattern_dealloc(in->pattern);
40 jgs 150 MEMFREE(in);
41     }
42     }
43    
44     /**************************************************************/
45    
46 gross 431 /* constructs the incomplete block factorization of
47 jgs 150
48     */
49 gross 431 Paso_Solver_ILU* Paso_Solver_getILU(Paso_SystemMatrix * A,bool_t verbose) {
50     dim_t n=A->num_rows;
51     dim_t n_block=A->row_block_size;
52     index_t num_colors=0;
53     register double A11,A12,A13,A21,A22,A23,A31,A32,A33,D;
54     register double mainA11,mainA12,mainA13,mainA21,mainA22,mainA23,mainA31,mainA32,mainA33;
55     register double S11,S12,S13,S21,S22,S23,S31,S32,S33;
56     register index_t i,iptr_main,iptr,iptr_ik,k,iptr_kj,j,iptr_ij,color,color2;
57     double time0,time_color,time_fac;
58     /* allocations: */
59     Paso_Solver_ILU* out=MEMALLOC(1,Paso_Solver_ILU);
60     if (Paso_checkPtr(out)) return NULL;
61     index_t* mis_marker=TMPMEMALLOC(n,index_t);
62     out->colorOf=MEMALLOC(n,index_t);
63     out->factors=MEMALLOC(A->len,double);
64 gross 432 out->main_iptr=MEMALLOC(n,index_t);
65 gross 431 out->pattern=Paso_SystemMatrixPattern_reference(A->pattern);
66     out->n_block=n_block;
67     out->n=n;
68     time0=Paso_timer();
69     if ( !(Paso_checkPtr(mis_marker) ||
70     Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
71     /* get coloring */
72     index_t num_colors=0;
73     #pragma omp parallel for private(i) schedule(static)
74     for (i = 0; i < n; ++i) out->colorOf[i]=-1;
75     while (Paso_Util_isAny(n,out->colorOf,-1) && Paso_noError()) {
76     #pragma omp parallel for private(i) schedule(static)
77     for (i = 0; i < n; ++i) mis_marker[i]=out->colorOf[i];
78     Paso_SystemMatrixPattern_mis(A->pattern,mis_marker);
79 jgs 150
80 gross 431 #pragma omp parallel for private(i) schedule(static)
81     for (i = 0; i < n; ++i) if (mis_marker[i]) out->colorOf[i]=num_colors;
82     ++num_colors;
83     }
84     out->num_colors=num_colors;
85     time_color=Paso_timer()-time0;
86     time0=Paso_timer();
87     /* find main diagonal and copy matrix values */
88 gross 432 #pragma omp parallel for schedule(static) private(i,iptr,iptr_main,k)
89 gross 431 for (i = 0; i < n; ++i) {
90     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
91     iptr_main=A->pattern->ptr[0]-1;
92     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; iptr++) {
93     if (A->pattern->index[iptr]==i) iptr_main=iptr;
94     for (k=0;k<n_block*n_block;++k) out->factors[n_block*n_block*iptr+k]=A->val[n_block*n_block*iptr+k];
95     }
96     out->main_iptr[i]=iptr_main;
97     if (iptr_main==A->pattern->ptr[0]-1)
98     Paso_setError(VALUE_ERROR, "Paso_Solver_getILU: no main diagonal");
99     }
100     }
101     /* start factorization */
102 jgs 150
103 gross 432 #pragma omp barrier
104 gross 431 for (color=0;color<out->num_colors && Paso_noError();++color) {
105     if (n_block==1) {
106 gross 432 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,j,iptr_ij,A11,iptr_main,D)
107 jgs 150 for (i = 0; i < n; ++i) {
108 gross 431 if (out->colorOf[i]==color) {
109     for (color2=0;color2<color;++color2) {
110     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
111     k=A->pattern->index[iptr_ik];
112     if (out->colorOf[k]==color2) {
113     A11=out->factors[iptr_ik];
114     /* a_ij=a_ij-a_ik*a_kj */
115     for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
116     j=A->pattern->index[iptr_kj];
117     if (out->colorOf[j]>color2) {
118     S11=out->factors[iptr_kj];
119     for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
120     if (j==A->pattern->index[iptr_ij]) {
121     out->factors[iptr_ij]-=A11*S11;
122     break;
123     }
124     }
125     }
126     }
127     }
128     }
129     }
130     iptr_main=out->main_iptr[i];
131     D=out->factors[iptr_main];
132     if (ABS(D)>0.) {
133     D=1./D;
134     out->factors[iptr_main]=D;
135     /* a_ik=a_ii^{-1}*a_ik */
136     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
137     k=A->pattern->index[iptr_ik];
138     if (out->colorOf[k]>color) {
139     A11=out->factors[iptr_ik];
140     out->factors[iptr_ik]=A11*D;
141     }
142     }
143     } else {
144     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
145     }
146 jgs 150 }
147     }
148 gross 431 } else if (n_block==2) {
149 gross 432 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S12,S22,j,iptr_ij,A11,A21,A12,A22,iptr_main,D)
150 gross 431 for (i = 0; i < n; ++i) {
151     if (out->colorOf[i]==color) {
152     for (color2=0;color2<color;++color2) {
153     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
154     k=A->pattern->index[iptr_ik];
155     if (out->colorOf[k]==color2) {
156     A11=out->factors[iptr_ik*4 ];
157     A21=out->factors[iptr_ik*4+1];
158     A12=out->factors[iptr_ik*4+2];
159     A22=out->factors[iptr_ik*4+3];
160     /* a_ij=a_ij-a_ik*a_kj */
161     for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
162     j=A->pattern->index[iptr_kj];
163     if (out->colorOf[j]>color2) {
164     S11=out->factors[iptr_kj*4];
165     S21=out->factors[iptr_kj*4+1];
166     S12=out->factors[iptr_kj*4+2];
167     S22=out->factors[iptr_kj*4+3];
168     for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
169     if (j==A->pattern->index[iptr_ij]) {
170     out->factors[4*iptr_ij ]-=A11*S11+A12*S21;
171     out->factors[4*iptr_ij+1]-=A21*S11+A22*S21;
172     out->factors[4*iptr_ij+2]-=A11*S12+A12*S22;
173     out->factors[4*iptr_ij+3]-=A21*S12+A22*S22;
174     break;
175     }
176     }
177     }
178     }
179     }
180 jgs 150 }
181 gross 431 }
182     iptr_main=out->main_iptr[i];
183     A11=out->factors[iptr_main*4];
184     A21=out->factors[iptr_main*4+1];
185     A12=out->factors[iptr_main*4+2];
186     A22=out->factors[iptr_main*4+3];
187     D = A11*A22-A12*A21;
188     if (ABS(D)>0.) {
189     D=1./D;
190     S11= A22*D;
191     S21=-A21*D;
192     S12=-A12*D;
193     S22= A11*D;
194     out->factors[iptr_main*4] = S11;
195     out->factors[iptr_main*4+1]= S21;
196     out->factors[iptr_main*4+2]= S12;
197     out->factors[iptr_main*4+3]= S22;
198     /* a_ik=a_ii^{-1}*a_ik */
199     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
200     k=A->pattern->index[iptr_ik];
201     if (out->colorOf[k]>color) {
202     A11=out->factors[iptr_ik*4 ];
203     A21=out->factors[iptr_ik*4+1];
204     A12=out->factors[iptr_ik*4+2];
205     A22=out->factors[iptr_ik*4+3];
206     out->factors[4*iptr_ik ]=S11*A11+S12*A21;
207     out->factors[4*iptr_ik+1]=S21*A11+S22*A21;
208     out->factors[4*iptr_ik+2]=S11*A12+S12*A22;
209     out->factors[4*iptr_ik+3]=S21*A12+S22*A22;
210     }
211 jgs 150 }
212 gross 431 } else {
213     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
214     }
215     }
216 jgs 150 }
217 gross 431 } else if (n_block==3) {
218 gross 432 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S31,S12,S22,S32,S13,S23,S33,j,iptr_ij,A11,A21,A31,A12,A22,A32,A13,A23,A33,iptr_main,D)
219 gross 431 for (i = 0; i < n; ++i) {
220     if (out->colorOf[i]==color) {
221     for (color2=0;color2<color;++color2) {
222     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
223     k=A->pattern->index[iptr_ik];
224     if (out->colorOf[k]==color2) {
225     A11=out->factors[iptr_ik*9 ];
226     A21=out->factors[iptr_ik*9+1];
227     A31=out->factors[iptr_ik*9+2];
228     A12=out->factors[iptr_ik*9+3];
229     A22=out->factors[iptr_ik*9+4];
230     A32=out->factors[iptr_ik*9+5];
231     A13=out->factors[iptr_ik*9+6];
232     A23=out->factors[iptr_ik*9+7];
233     A33=out->factors[iptr_ik*9+8];
234     /* a_ij=a_ij-a_ik*a_kj */
235     for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
236     j=A->pattern->index[iptr_kj];
237     if (out->colorOf[j]>color2) {
238     S11=out->factors[iptr_kj*9 ];
239     S21=out->factors[iptr_kj*9+1];
240     S31=out->factors[iptr_kj*9+2];
241     S12=out->factors[iptr_kj*9+3];
242     S22=out->factors[iptr_kj*9+4];
243     S32=out->factors[iptr_kj*9+5];
244     S13=out->factors[iptr_kj*9+6];
245     S23=out->factors[iptr_kj*9+7];
246     S33=out->factors[iptr_kj*9+8];
247     for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
248     if (j==A->pattern->index[iptr_ij]) {
249     out->factors[iptr_ij*9 ]-=A11*S11+A12*S21+A13*S31;
250     out->factors[iptr_ij*9+1]-=A21*S11+A22*S21+A23*S31;
251     out->factors[iptr_ij*9+2]-=A31*S11+A32*S21+A33*S31;
252     out->factors[iptr_ij*9+3]-=A11*S12+A12*S22+A13*S32;
253     out->factors[iptr_ij*9+4]-=A21*S12+A22*S22+A23*S32;
254     out->factors[iptr_ij*9+5]-=A31*S12+A32*S22+A33*S32;
255     out->factors[iptr_ij*9+6]-=A11*S13+A12*S23+A13*S33;
256     out->factors[iptr_ij*9+7]-=A21*S13+A22*S23+A23*S33;
257     out->factors[iptr_ij*9+8]-=A31*S13+A32*S23+A33*S33;
258     break;
259     }
260     }
261     }
262 jgs 150 }
263     }
264 gross 431 }
265     }
266     iptr_main=out->main_iptr[i];
267     A11=out->factors[iptr_main*9 ];
268     A21=out->factors[iptr_main*9+1];
269     A31=out->factors[iptr_main*9+2];
270     A12=out->factors[iptr_main*9+3];
271     A22=out->factors[iptr_main*9+4];
272     A32=out->factors[iptr_main*9+5];
273     A13=out->factors[iptr_main*9+6];
274     A23=out->factors[iptr_main*9+7];
275     A33=out->factors[iptr_main*9+8];
276     D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
277     if (ABS(D)>0.) {
278     D=1./D;
279     S11=(A22*A33-A23*A32)*D;
280     S21=(A31*A23-A21*A33)*D;
281     S31=(A21*A32-A31*A22)*D;
282     S12=(A13*A32-A12*A33)*D;
283     S22=(A11*A33-A31*A13)*D;
284     S32=(A12*A31-A11*A32)*D;
285     S13=(A12*A23-A13*A22)*D;
286     S23=(A13*A21-A11*A23)*D;
287     S33=(A11*A22-A12*A21)*D;
288    
289     out->factors[iptr_main*9 ]=S11;
290     out->factors[iptr_main*9+1]=S21;
291     out->factors[iptr_main*9+2]=S31;
292     out->factors[iptr_main*9+3]=S12;
293     out->factors[iptr_main*9+4]=S22;
294     out->factors[iptr_main*9+5]=S32;
295     out->factors[iptr_main*9+6]=S13;
296     out->factors[iptr_main*9+7]=S23;
297     out->factors[iptr_main*9+8]=S33;
298    
299     /* a_ik=a_ii^{-1}*a_ik */
300     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
301     k=A->pattern->index[iptr_ik];
302     if (out->colorOf[k]>color) {
303     A11=out->factors[iptr_ik*9 ];
304     A21=out->factors[iptr_ik*9+1];
305     A31=out->factors[iptr_ik*9+2];
306     A12=out->factors[iptr_ik*9+3];
307     A22=out->factors[iptr_ik*9+4];
308     A32=out->factors[iptr_ik*9+5];
309     A13=out->factors[iptr_ik*9+6];
310     A23=out->factors[iptr_ik*9+7];
311     A33=out->factors[iptr_ik*9+8];
312     out->factors[iptr_ik*9 ]=S11*A11+S12*A21+S13*A31;
313     out->factors[iptr_ik*9+1]=S21*A11+S22*A21+S23*A31;
314     out->factors[iptr_ik*9+2]=S31*A11+S32*A21+S33*A31;
315     out->factors[iptr_ik*9+3]=S11*A12+S12*A22+S13*A32;
316     out->factors[iptr_ik*9+4]=S21*A12+S22*A22+S23*A32;
317     out->factors[iptr_ik*9+5]=S31*A12+S32*A22+S33*A32;
318     out->factors[iptr_ik*9+6]=S11*A13+S12*A23+S13*A33;
319     out->factors[iptr_ik*9+7]=S21*A13+S22*A23+S23*A33;
320     out->factors[iptr_ik*9+8]=S31*A13+S32*A23+S33*A33;
321     }
322     }
323     } else {
324     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
325     }
326 jgs 150 }
327     }
328 gross 431 } else {
329     Paso_setError(VALUE_ERROR, "Paso_Solver_getILU: block size greater than 3 is not supported.");
330     }
331 gross 432 #pragma omp barrier
332 jgs 150 }
333 gross 431 time_fac=Paso_timer()-time0;
334 jgs 150 }
335 gross 431
336 jgs 150 TMPMEMFREE(mis_marker);
337     if (Paso_noError()) {
338     if (verbose) {
339 gross 431 printf("ILU: %d color used \n",out->num_colors);
340     printf("timing: ILU: coloring/elemination : %e/%e\n",time_color,time_fac);
341 jgs 150 }
342     return out;
343     } else {
344     Paso_Solver_ILU_free(out);
345     return NULL;
346     }
347     }
348    
349     /**************************************************************/
350    
351     /* apply ILU precondition b-> x
352    
353 gross 431 in fact it solves LUx=b in the form x= U^{-1} L^{-1}b
354 jgs 150
355     should be called within a parallel region
356     barrier synconization should be performed to make sure that the input vector available
357    
358     */
359    
360     void Paso_Solver_solveILU(Paso_Solver_ILU * ilu, double * x, double * b) {
361 gross 431 register dim_t i,k;
362     register index_t color,iptr_ik,iptr_main;
363     register double S1,S2,S3,R1,R2,R3;
364 jgs 150 dim_t n_block=ilu->n_block;
365 gross 431 dim_t n=ilu->n;
366 jgs 150
367 gross 431
368     /* copy x into b*/
369     #pragma omp for private(i) schedule(static)
370     for (i=0;i<n*n_block;++i) x[i]=b[i];
371     /* forward substitution */
372     for (color=0;color<ilu->num_colors;++color) {
373     if (n_block==1) {
374 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
375 gross 431 for (i = 0; i < n; ++i) {
376     if (ilu->colorOf[i]==color) {
377     /* x_i=x_i-a_ik*x_k */
378     S1=x[i];
379     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
380     k=ilu->pattern->index[iptr_ik];
381     if (ilu->colorOf[k]<color) {
382     R1=x[k];
383     S1-=ilu->factors[iptr_ik]*R1;
384     }
385     }
386     iptr_main=ilu->main_iptr[i];
387     x[i]=ilu->factors[iptr_main]*S1;
388     }
389 jgs 150 }
390 gross 431 } else if (n_block==2) {
391 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
392 gross 431 for (i = 0; i < n; ++i) {
393     if (ilu->colorOf[i]==color) {
394     /* x_i=x_i-a_ik*x_k */
395     S1=x[2*i];
396     S2=x[2*i+1];
397     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
398     k=ilu->pattern->index[iptr_ik];
399     if (ilu->colorOf[k]<color) {
400     R1=x[2*k];
401     R2=x[2*k+1];
402     S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
403     S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
404     }
405     }
406     iptr_main=ilu->main_iptr[i];
407     x[2*i ]=ilu->factors[4*iptr_main ]*S1+ilu->factors[4*iptr_main+2]*S2;
408     x[2*i+1]=ilu->factors[4*iptr_main+1]*S1+ilu->factors[4*iptr_main+3]*S2;
409     }
410    
411     }
412     } else if (n_block==3) {
413 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
414 gross 431 for (i = 0; i < n; ++i) {
415     if (ilu->colorOf[i]==color) {
416     /* x_i=x_i-a_ik*x_k */
417     S1=x[3*i];
418     S2=x[3*i+1];
419     S3=x[3*i+2];
420     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
421     k=ilu->pattern->index[iptr_ik];
422     if (ilu->colorOf[k]<color) {
423     R1=x[3*k];
424     R2=x[3*k+1];
425     R3=x[3*k+2];
426     S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
427     S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
428     S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
429     }
430     }
431     iptr_main=ilu->main_iptr[i];
432     x[3*i ]=ilu->factors[9*iptr_main ]*S1+ilu->factors[9*iptr_main+3]*S2+ilu->factors[9*iptr_main+6]*S3;
433     x[3*i+1]=ilu->factors[9*iptr_main+1]*S1+ilu->factors[9*iptr_main+4]*S2+ilu->factors[9*iptr_main+7]*S3;
434     x[3*i+2]=ilu->factors[9*iptr_main+2]*S1+ilu->factors[9*iptr_main+5]*S2+ilu->factors[9*iptr_main+8]*S3;
435 jgs 150 }
436 gross 431 }
437 jgs 150 }
438 gross 431 #pragma omp barrier
439 jgs 150 }
440 gross 431 /* backward substitution */
441     for (color=(ilu->num_colors)-1;color>-1;--color) {
442     if (n_block==1) {
443 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,R1)
444 gross 431 for (i = 0; i < n; ++i) {
445     if (ilu->colorOf[i]==color) {
446     /* x_i=x_i-a_ik*x_k */
447     S1=x[i];
448     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
449     k=ilu->pattern->index[iptr_ik];
450     if (ilu->colorOf[k]>color) {
451     R1=x[k];
452     S1-=ilu->factors[iptr_ik]*R1;
453     }
454     }
455     x[i]=S1;
456     }
457     }
458     } else if (n_block==2) {
459 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
460 gross 431 for (i = 0; i < n; ++i) {
461     if (ilu->colorOf[i]==color) {
462     /* x_i=x_i-a_ik*x_k */
463     S1=x[2*i];
464     S2=x[2*i+1];
465     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
466     k=ilu->pattern->index[iptr_ik];
467     if (ilu->colorOf[k]>color) {
468     R1=x[2*k];
469     R2=x[2*k+1];
470     S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
471     S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
472     }
473     }
474     x[2*i]=S1;
475     x[2*i+1]=S2;
476     }
477     }
478     } else if (n_block==3) {
479 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
480 gross 431 for (i = 0; i < n; ++i) {
481     if (ilu->colorOf[i]==color) {
482     /* x_i=x_i-a_ik*x_k */
483     S1=x[3*i ];
484     S2=x[3*i+1];
485     S3=x[3*i+2];
486     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
487     k=ilu->pattern->index[iptr_ik];
488     if (ilu->colorOf[k]>color) {
489     R1=x[3*k];
490     R2=x[3*k+1];
491     R3=x[3*k+2];
492     S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
493     S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
494     S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
495     }
496     }
497     x[3*i]=S1;
498     x[3*i+1]=S2;
499     x[3*i+2]=S3;
500     }
501     }
502     }
503     #pragma omp barrier
504     }
505 jgs 150 return;
506     }
507     /*
508     * $Log$
509     * Revision 1.2 2005/09/15 03:44:40 jgs
510     * Merge of development branch dev-02 back to main trunk on 2005-09-15
511     *
512     * Revision 1.1.2.1 2005/09/05 06:29:50 gross
513     * These files have been extracted from finley to define a stand alone libray for iterative
514     * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
515     * has not been tested yet.
516     *
517     *
518     */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26