/[escript]/trunk/paso/src/ILU.c
ViewVC logotype

Annotation of /trunk/paso/src/ILU.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1312 - (hide annotations)
Mon Sep 24 06:18:44 2007 UTC (12 years, 6 months ago) by ksteube
Original Path: trunk/paso/src/Solver_ILU.c
File MIME type: text/plain
File size: 25680 byte(s)
The MPI branch is hereby closed. All future work should be in trunk.

Previously in revision 1295 I merged the latest changes to trunk into trunk-mpi-branch.
In this revision I copied all files from trunk-mpi-branch over the corresponding
trunk files. I did not use 'svn merge', it was a copy.

1 ksteube 1312
2 jgs 150 /* $Id$ */
3    
4 ksteube 1312 /*******************************************************
5     *
6     * Copyright 2003-2007 by ACceSS MNRF
7     * Copyright 2007 by University of Queensland
8     *
9     * http://esscc.uq.edu.au
10     * Primary Business: Queensland, Australia
11     * Licensed under the Open Software License version 3.0
12     * http://www.opensource.org/licenses/osl-3.0.php
13     *
14     *******************************************************/
15 dhawcroft 631
16 jgs 150 /**************************************************************/
17    
18     /* Paso: ILU preconditioner with reordering */
19    
20     /**************************************************************/
21    
22     /* Copyrights by ACcESS Australia 2003,2004,2005 */
23     /* Author: gross@access.edu.au */
24    
25     /**************************************************************/
26    
27 gross 700 #include "Paso.h"
28 jgs 150 #include "Solver.h"
29 gross 700 #include "PasoUtil.h"
30 jgs 150
31     /**************************************************************/
32    
33     /* free all memory used by ILU */
34    
35     void Paso_Solver_ILU_free(Paso_Solver_ILU * in) {
36     if (in!=NULL) {
37 gross 431 MEMFREE(in->colorOf);
38     MEMFREE(in->factors);
39     MEMFREE(in->main_iptr);
40 ksteube 1312 Paso_Pattern_free(in->pattern);
41 jgs 150 MEMFREE(in);
42     }
43     }
44    
45     /**************************************************************/
46    
47 gross 431 /* constructs the incomplete block factorization of
48 jgs 150
49     */
50 ksteube 1312 Paso_Solver_ILU* Paso_Solver_getILU(Paso_SparseMatrix * A,bool_t verbose) {
51     dim_t n=A->numRows;
52 gross 431 dim_t n_block=A->row_block_size;
53 gross 1028 index_t num_colors=0, *mis_marker=NULL;
54 gross 431 register double A11,A12,A13,A21,A22,A23,A31,A32,A33,D;
55     register double mainA11,mainA12,mainA13,mainA21,mainA22,mainA23,mainA31,mainA32,mainA33;
56     register double S11,S12,S13,S21,S22,S23,S31,S32,S33;
57     register index_t i,iptr_main,iptr,iptr_ik,k,iptr_kj,j,iptr_ij,color,color2;
58     double time0,time_color,time_fac;
59     /* allocations: */
60     Paso_Solver_ILU* out=MEMALLOC(1,Paso_Solver_ILU);
61     if (Paso_checkPtr(out)) return NULL;
62 gross 1028 mis_marker=TMPMEMALLOC(n,index_t);
63 gross 431 out->colorOf=MEMALLOC(n,index_t);
64     out->factors=MEMALLOC(A->len,double);
65 gross 432 out->main_iptr=MEMALLOC(n,index_t);
66 ksteube 1312 out->pattern=Paso_Pattern_getReference(A->pattern);
67 gross 431 out->n_block=n_block;
68     out->n=n;
69     time0=Paso_timer();
70     if ( !(Paso_checkPtr(mis_marker) ||
71     Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
72     /* get coloring */
73     index_t num_colors=0;
74     #pragma omp parallel for private(i) schedule(static)
75     for (i = 0; i < n; ++i) out->colorOf[i]=-1;
76     while (Paso_Util_isAny(n,out->colorOf,-1) && Paso_noError()) {
77     #pragma omp parallel for private(i) schedule(static)
78     for (i = 0; i < n; ++i) mis_marker[i]=out->colorOf[i];
79 ksteube 1312 Paso_Pattern_mis(A->pattern,mis_marker);
80 jgs 150
81 gross 431 #pragma omp parallel for private(i) schedule(static)
82     for (i = 0; i < n; ++i) if (mis_marker[i]) out->colorOf[i]=num_colors;
83     ++num_colors;
84     }
85     out->num_colors=num_colors;
86     time_color=Paso_timer()-time0;
87     time0=Paso_timer();
88     /* find main diagonal and copy matrix values */
89 gross 432 #pragma omp parallel for schedule(static) private(i,iptr,iptr_main,k)
90 gross 431 for (i = 0; i < n; ++i) {
91     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
92     iptr_main=A->pattern->ptr[0]-1;
93     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; iptr++) {
94     if (A->pattern->index[iptr]==i) iptr_main=iptr;
95     for (k=0;k<n_block*n_block;++k) out->factors[n_block*n_block*iptr+k]=A->val[n_block*n_block*iptr+k];
96     }
97     out->main_iptr[i]=iptr_main;
98     if (iptr_main==A->pattern->ptr[0]-1)
99     Paso_setError(VALUE_ERROR, "Paso_Solver_getILU: no main diagonal");
100     }
101     }
102     /* start factorization */
103 jgs 150
104 gross 432 #pragma omp barrier
105 gross 431 for (color=0;color<out->num_colors && Paso_noError();++color) {
106     if (n_block==1) {
107 gross 432 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,j,iptr_ij,A11,iptr_main,D)
108 jgs 150 for (i = 0; i < n; ++i) {
109 gross 431 if (out->colorOf[i]==color) {
110     for (color2=0;color2<color;++color2) {
111     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
112     k=A->pattern->index[iptr_ik];
113     if (out->colorOf[k]==color2) {
114     A11=out->factors[iptr_ik];
115     /* a_ij=a_ij-a_ik*a_kj */
116     for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
117     j=A->pattern->index[iptr_kj];
118     if (out->colorOf[j]>color2) {
119     S11=out->factors[iptr_kj];
120     for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
121     if (j==A->pattern->index[iptr_ij]) {
122     out->factors[iptr_ij]-=A11*S11;
123     break;
124     }
125     }
126     }
127     }
128     }
129     }
130     }
131     iptr_main=out->main_iptr[i];
132     D=out->factors[iptr_main];
133     if (ABS(D)>0.) {
134     D=1./D;
135     out->factors[iptr_main]=D;
136     /* a_ik=a_ii^{-1}*a_ik */
137     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
138     k=A->pattern->index[iptr_ik];
139     if (out->colorOf[k]>color) {
140     A11=out->factors[iptr_ik];
141     out->factors[iptr_ik]=A11*D;
142     }
143     }
144     } else {
145     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
146     }
147 jgs 150 }
148     }
149 gross 431 } else if (n_block==2) {
150 gross 432 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S12,S22,j,iptr_ij,A11,A21,A12,A22,iptr_main,D)
151 gross 431 for (i = 0; i < n; ++i) {
152     if (out->colorOf[i]==color) {
153     for (color2=0;color2<color;++color2) {
154     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
155     k=A->pattern->index[iptr_ik];
156     if (out->colorOf[k]==color2) {
157     A11=out->factors[iptr_ik*4 ];
158     A21=out->factors[iptr_ik*4+1];
159     A12=out->factors[iptr_ik*4+2];
160     A22=out->factors[iptr_ik*4+3];
161     /* a_ij=a_ij-a_ik*a_kj */
162     for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
163     j=A->pattern->index[iptr_kj];
164     if (out->colorOf[j]>color2) {
165     S11=out->factors[iptr_kj*4];
166     S21=out->factors[iptr_kj*4+1];
167     S12=out->factors[iptr_kj*4+2];
168     S22=out->factors[iptr_kj*4+3];
169     for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
170     if (j==A->pattern->index[iptr_ij]) {
171     out->factors[4*iptr_ij ]-=A11*S11+A12*S21;
172     out->factors[4*iptr_ij+1]-=A21*S11+A22*S21;
173     out->factors[4*iptr_ij+2]-=A11*S12+A12*S22;
174     out->factors[4*iptr_ij+3]-=A21*S12+A22*S22;
175     break;
176     }
177     }
178     }
179     }
180     }
181 jgs 150 }
182 gross 431 }
183     iptr_main=out->main_iptr[i];
184     A11=out->factors[iptr_main*4];
185     A21=out->factors[iptr_main*4+1];
186     A12=out->factors[iptr_main*4+2];
187     A22=out->factors[iptr_main*4+3];
188     D = A11*A22-A12*A21;
189     if (ABS(D)>0.) {
190     D=1./D;
191     S11= A22*D;
192     S21=-A21*D;
193     S12=-A12*D;
194     S22= A11*D;
195     out->factors[iptr_main*4] = S11;
196     out->factors[iptr_main*4+1]= S21;
197     out->factors[iptr_main*4+2]= S12;
198     out->factors[iptr_main*4+3]= S22;
199     /* a_ik=a_ii^{-1}*a_ik */
200     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
201     k=A->pattern->index[iptr_ik];
202     if (out->colorOf[k]>color) {
203     A11=out->factors[iptr_ik*4 ];
204     A21=out->factors[iptr_ik*4+1];
205     A12=out->factors[iptr_ik*4+2];
206     A22=out->factors[iptr_ik*4+3];
207     out->factors[4*iptr_ik ]=S11*A11+S12*A21;
208     out->factors[4*iptr_ik+1]=S21*A11+S22*A21;
209     out->factors[4*iptr_ik+2]=S11*A12+S12*A22;
210     out->factors[4*iptr_ik+3]=S21*A12+S22*A22;
211     }
212 jgs 150 }
213 gross 431 } else {
214     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
215     }
216     }
217 jgs 150 }
218 gross 431 } else if (n_block==3) {
219 gross 432 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S31,S12,S22,S32,S13,S23,S33,j,iptr_ij,A11,A21,A31,A12,A22,A32,A13,A23,A33,iptr_main,D)
220 gross 431 for (i = 0; i < n; ++i) {
221     if (out->colorOf[i]==color) {
222     for (color2=0;color2<color;++color2) {
223     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
224     k=A->pattern->index[iptr_ik];
225     if (out->colorOf[k]==color2) {
226     A11=out->factors[iptr_ik*9 ];
227     A21=out->factors[iptr_ik*9+1];
228     A31=out->factors[iptr_ik*9+2];
229     A12=out->factors[iptr_ik*9+3];
230     A22=out->factors[iptr_ik*9+4];
231     A32=out->factors[iptr_ik*9+5];
232     A13=out->factors[iptr_ik*9+6];
233     A23=out->factors[iptr_ik*9+7];
234     A33=out->factors[iptr_ik*9+8];
235     /* a_ij=a_ij-a_ik*a_kj */
236     for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
237     j=A->pattern->index[iptr_kj];
238     if (out->colorOf[j]>color2) {
239     S11=out->factors[iptr_kj*9 ];
240     S21=out->factors[iptr_kj*9+1];
241     S31=out->factors[iptr_kj*9+2];
242     S12=out->factors[iptr_kj*9+3];
243     S22=out->factors[iptr_kj*9+4];
244     S32=out->factors[iptr_kj*9+5];
245     S13=out->factors[iptr_kj*9+6];
246     S23=out->factors[iptr_kj*9+7];
247     S33=out->factors[iptr_kj*9+8];
248     for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
249     if (j==A->pattern->index[iptr_ij]) {
250     out->factors[iptr_ij*9 ]-=A11*S11+A12*S21+A13*S31;
251     out->factors[iptr_ij*9+1]-=A21*S11+A22*S21+A23*S31;
252     out->factors[iptr_ij*9+2]-=A31*S11+A32*S21+A33*S31;
253     out->factors[iptr_ij*9+3]-=A11*S12+A12*S22+A13*S32;
254     out->factors[iptr_ij*9+4]-=A21*S12+A22*S22+A23*S32;
255     out->factors[iptr_ij*9+5]-=A31*S12+A32*S22+A33*S32;
256     out->factors[iptr_ij*9+6]-=A11*S13+A12*S23+A13*S33;
257     out->factors[iptr_ij*9+7]-=A21*S13+A22*S23+A23*S33;
258     out->factors[iptr_ij*9+8]-=A31*S13+A32*S23+A33*S33;
259     break;
260     }
261     }
262     }
263 jgs 150 }
264     }
265 gross 431 }
266     }
267     iptr_main=out->main_iptr[i];
268     A11=out->factors[iptr_main*9 ];
269     A21=out->factors[iptr_main*9+1];
270     A31=out->factors[iptr_main*9+2];
271     A12=out->factors[iptr_main*9+3];
272     A22=out->factors[iptr_main*9+4];
273     A32=out->factors[iptr_main*9+5];
274     A13=out->factors[iptr_main*9+6];
275     A23=out->factors[iptr_main*9+7];
276     A33=out->factors[iptr_main*9+8];
277     D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
278     if (ABS(D)>0.) {
279     D=1./D;
280     S11=(A22*A33-A23*A32)*D;
281     S21=(A31*A23-A21*A33)*D;
282     S31=(A21*A32-A31*A22)*D;
283     S12=(A13*A32-A12*A33)*D;
284     S22=(A11*A33-A31*A13)*D;
285     S32=(A12*A31-A11*A32)*D;
286     S13=(A12*A23-A13*A22)*D;
287     S23=(A13*A21-A11*A23)*D;
288     S33=(A11*A22-A12*A21)*D;
289    
290     out->factors[iptr_main*9 ]=S11;
291     out->factors[iptr_main*9+1]=S21;
292     out->factors[iptr_main*9+2]=S31;
293     out->factors[iptr_main*9+3]=S12;
294     out->factors[iptr_main*9+4]=S22;
295     out->factors[iptr_main*9+5]=S32;
296     out->factors[iptr_main*9+6]=S13;
297     out->factors[iptr_main*9+7]=S23;
298     out->factors[iptr_main*9+8]=S33;
299    
300     /* a_ik=a_ii^{-1}*a_ik */
301     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
302     k=A->pattern->index[iptr_ik];
303     if (out->colorOf[k]>color) {
304     A11=out->factors[iptr_ik*9 ];
305     A21=out->factors[iptr_ik*9+1];
306     A31=out->factors[iptr_ik*9+2];
307     A12=out->factors[iptr_ik*9+3];
308     A22=out->factors[iptr_ik*9+4];
309     A32=out->factors[iptr_ik*9+5];
310     A13=out->factors[iptr_ik*9+6];
311     A23=out->factors[iptr_ik*9+7];
312     A33=out->factors[iptr_ik*9+8];
313     out->factors[iptr_ik*9 ]=S11*A11+S12*A21+S13*A31;
314     out->factors[iptr_ik*9+1]=S21*A11+S22*A21+S23*A31;
315     out->factors[iptr_ik*9+2]=S31*A11+S32*A21+S33*A31;
316     out->factors[iptr_ik*9+3]=S11*A12+S12*A22+S13*A32;
317     out->factors[iptr_ik*9+4]=S21*A12+S22*A22+S23*A32;
318     out->factors[iptr_ik*9+5]=S31*A12+S32*A22+S33*A32;
319     out->factors[iptr_ik*9+6]=S11*A13+S12*A23+S13*A33;
320     out->factors[iptr_ik*9+7]=S21*A13+S22*A23+S23*A33;
321     out->factors[iptr_ik*9+8]=S31*A13+S32*A23+S33*A33;
322     }
323     }
324     } else {
325     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
326     }
327 jgs 150 }
328     }
329 gross 431 } else {
330     Paso_setError(VALUE_ERROR, "Paso_Solver_getILU: block size greater than 3 is not supported.");
331     }
332 gross 432 #pragma omp barrier
333 jgs 150 }
334 gross 431 time_fac=Paso_timer()-time0;
335 jgs 150 }
336 gross 431
337 jgs 150 TMPMEMFREE(mis_marker);
338     if (Paso_noError()) {
339     if (verbose) {
340 gross 431 printf("ILU: %d color used \n",out->num_colors);
341     printf("timing: ILU: coloring/elemination : %e/%e\n",time_color,time_fac);
342 jgs 150 }
343     return out;
344     } else {
345     Paso_Solver_ILU_free(out);
346     return NULL;
347     }
348     }
349    
350     /**************************************************************/
351    
352     /* apply ILU precondition b-> x
353    
354 gross 431 in fact it solves LUx=b in the form x= U^{-1} L^{-1}b
355 jgs 150
356     should be called within a parallel region
357     barrier synconization should be performed to make sure that the input vector available
358    
359     */
360    
361     void Paso_Solver_solveILU(Paso_Solver_ILU * ilu, double * x, double * b) {
362 gross 431 register dim_t i,k;
363     register index_t color,iptr_ik,iptr_main;
364     register double S1,S2,S3,R1,R2,R3;
365 jgs 150 dim_t n_block=ilu->n_block;
366 gross 431 dim_t n=ilu->n;
367 jgs 150
368 gross 431
369     /* copy x into b*/
370     #pragma omp for private(i) schedule(static)
371     for (i=0;i<n*n_block;++i) x[i]=b[i];
372     /* forward substitution */
373     for (color=0;color<ilu->num_colors;++color) {
374     if (n_block==1) {
375 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
376 gross 431 for (i = 0; i < n; ++i) {
377     if (ilu->colorOf[i]==color) {
378     /* x_i=x_i-a_ik*x_k */
379     S1=x[i];
380     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
381     k=ilu->pattern->index[iptr_ik];
382     if (ilu->colorOf[k]<color) {
383     R1=x[k];
384     S1-=ilu->factors[iptr_ik]*R1;
385     }
386     }
387     iptr_main=ilu->main_iptr[i];
388     x[i]=ilu->factors[iptr_main]*S1;
389     }
390 jgs 150 }
391 gross 431 } else if (n_block==2) {
392 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
393 gross 431 for (i = 0; i < n; ++i) {
394     if (ilu->colorOf[i]==color) {
395     /* x_i=x_i-a_ik*x_k */
396     S1=x[2*i];
397     S2=x[2*i+1];
398     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
399     k=ilu->pattern->index[iptr_ik];
400     if (ilu->colorOf[k]<color) {
401     R1=x[2*k];
402     R2=x[2*k+1];
403     S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
404     S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
405     }
406     }
407     iptr_main=ilu->main_iptr[i];
408     x[2*i ]=ilu->factors[4*iptr_main ]*S1+ilu->factors[4*iptr_main+2]*S2;
409     x[2*i+1]=ilu->factors[4*iptr_main+1]*S1+ilu->factors[4*iptr_main+3]*S2;
410     }
411    
412     }
413     } else if (n_block==3) {
414 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
415 gross 431 for (i = 0; i < n; ++i) {
416     if (ilu->colorOf[i]==color) {
417     /* x_i=x_i-a_ik*x_k */
418     S1=x[3*i];
419     S2=x[3*i+1];
420     S3=x[3*i+2];
421     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
422     k=ilu->pattern->index[iptr_ik];
423     if (ilu->colorOf[k]<color) {
424     R1=x[3*k];
425     R2=x[3*k+1];
426     R3=x[3*k+2];
427     S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
428     S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
429     S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
430     }
431     }
432     iptr_main=ilu->main_iptr[i];
433     x[3*i ]=ilu->factors[9*iptr_main ]*S1+ilu->factors[9*iptr_main+3]*S2+ilu->factors[9*iptr_main+6]*S3;
434     x[3*i+1]=ilu->factors[9*iptr_main+1]*S1+ilu->factors[9*iptr_main+4]*S2+ilu->factors[9*iptr_main+7]*S3;
435     x[3*i+2]=ilu->factors[9*iptr_main+2]*S1+ilu->factors[9*iptr_main+5]*S2+ilu->factors[9*iptr_main+8]*S3;
436 jgs 150 }
437 gross 431 }
438 jgs 150 }
439 gross 431 #pragma omp barrier
440 jgs 150 }
441 gross 431 /* backward substitution */
442     for (color=(ilu->num_colors)-1;color>-1;--color) {
443     if (n_block==1) {
444 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,R1)
445 gross 431 for (i = 0; i < n; ++i) {
446     if (ilu->colorOf[i]==color) {
447     /* x_i=x_i-a_ik*x_k */
448     S1=x[i];
449     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
450     k=ilu->pattern->index[iptr_ik];
451     if (ilu->colorOf[k]>color) {
452     R1=x[k];
453     S1-=ilu->factors[iptr_ik]*R1;
454     }
455     }
456     x[i]=S1;
457     }
458     }
459     } else if (n_block==2) {
460 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
461 gross 431 for (i = 0; i < n; ++i) {
462     if (ilu->colorOf[i]==color) {
463     /* x_i=x_i-a_ik*x_k */
464     S1=x[2*i];
465     S2=x[2*i+1];
466     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
467     k=ilu->pattern->index[iptr_ik];
468     if (ilu->colorOf[k]>color) {
469     R1=x[2*k];
470     R2=x[2*k+1];
471     S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
472     S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
473     }
474     }
475     x[2*i]=S1;
476     x[2*i+1]=S2;
477     }
478     }
479     } else if (n_block==3) {
480 gross 432 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
481 gross 431 for (i = 0; i < n; ++i) {
482     if (ilu->colorOf[i]==color) {
483     /* x_i=x_i-a_ik*x_k */
484     S1=x[3*i ];
485     S2=x[3*i+1];
486     S3=x[3*i+2];
487     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
488     k=ilu->pattern->index[iptr_ik];
489     if (ilu->colorOf[k]>color) {
490     R1=x[3*k];
491     R2=x[3*k+1];
492     R3=x[3*k+2];
493     S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
494     S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
495     S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
496     }
497     }
498     x[3*i]=S1;
499     x[3*i+1]=S2;
500     x[3*i+2]=S3;
501     }
502     }
503     }
504     #pragma omp barrier
505     }
506 jgs 150 return;
507     }
508     /*
509     * $Log$
510     * Revision 1.2 2005/09/15 03:44:40 jgs
511     * Merge of development branch dev-02 back to main trunk on 2005-09-15
512     *
513     * Revision 1.1.2.1 2005/09/05 06:29:50 gross
514     * These files have been extracted from finley to define a stand alone libray for iterative
515     * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
516     * has not been tested yet.
517     *
518     *
519     */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26