/[escript]/trunk/paso/src/Solver_ILU.c
ViewVC logotype

Contents of /trunk/paso/src/Solver_ILU.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 682 - (show annotations)
Mon Mar 27 02:43:09 2006 UTC (13 years, 5 months ago) by robwdcock
Original Path: trunk/paso/src/Solvers/Solver_ILU.c
File MIME type: text/plain
File size: 25932 byte(s)
+ NEW BUILD SYSTEM

This commit contains the new build system with cross-platform support.
Most things work are before though you can have more control.

ENVIRONMENT settings have changed:
+ You no longer require LD_LIBRARY_PATH or PYTHONPATH to point to the
esysroot for building and testing performed via scons
+ ACcESS altix users: It is recommended you change your modules to load
the latest intel compiler and other libraries required by boost to match
the setup in svn (you can override). The correct modules are as follows

module load intel_cc.9.0.026
export
MODULEPATH=${MODULEPATH}:/data/raid2/toolspp4/modulefiles/gcc-3.3.6
module load boost/1.33.0/python-2.4.1
module load python/2.4.1
module load numarray/1.3.3


1 /* $Id$ */
2
3
4 /*
5 ********************************************************************************
6 * Copyright 2006 by ACcESS MNRF *
7 * *
8 * http://www.access.edu.au *
9 * Primary Business: Queensland, Australia *
10 * Licensed under the Open Software License version 3.0 *
11 * http://www.opensource.org/licenses/osl-3.0.php *
12 ********************************************************************************
13 */
14
15 /**************************************************************/
16
17 /* Paso: ILU preconditioner with reordering */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia 2003,2004,2005 */
22 /* Author: gross@access.edu.au */
23
24 /**************************************************************/
25
26 #include "../Paso.h"
27 #include "Solver.h"
28 #include "../PasoUtil.h"
29
30 /**************************************************************/
31
32 /* free all memory used by ILU */
33
34 void Paso_Solver_ILU_free(Paso_Solver_ILU * in) {
35 if (in!=NULL) {
36 MEMFREE(in->colorOf);
37 MEMFREE(in->factors);
38 MEMFREE(in->main_iptr);
39 Paso_SystemMatrixPattern_dealloc(in->pattern);
40 MEMFREE(in);
41 }
42 }
43
44 /**************************************************************/
45
46 /* constructs the incomplete block factorization of
47
48 */
49 Paso_Solver_ILU* Paso_Solver_getILU(Paso_SystemMatrix * A,bool_t verbose) {
50 dim_t n=A->num_rows;
51 dim_t n_block=A->row_block_size;
52 index_t num_colors=0;
53 register double A11,A12,A13,A21,A22,A23,A31,A32,A33,D;
54 register double mainA11,mainA12,mainA13,mainA21,mainA22,mainA23,mainA31,mainA32,mainA33;
55 register double S11,S12,S13,S21,S22,S23,S31,S32,S33;
56 register index_t i,iptr_main,iptr,iptr_ik,k,iptr_kj,j,iptr_ij,color,color2;
57 double time0,time_color,time_fac;
58 /* allocations: */
59 Paso_Solver_ILU* out=MEMALLOC(1,Paso_Solver_ILU);
60 if (Paso_checkPtr(out)) return NULL;
61 index_t* mis_marker=TMPMEMALLOC(n,index_t);
62 out->colorOf=MEMALLOC(n,index_t);
63 out->factors=MEMALLOC(A->len,double);
64 out->main_iptr=MEMALLOC(n,index_t);
65 out->pattern=Paso_SystemMatrixPattern_reference(A->pattern);
66 out->n_block=n_block;
67 out->n=n;
68 time0=Paso_timer();
69 if ( !(Paso_checkPtr(mis_marker) ||
70 Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
71 /* get coloring */
72 index_t num_colors=0;
73 #pragma omp parallel for private(i) schedule(static)
74 for (i = 0; i < n; ++i) out->colorOf[i]=-1;
75 while (Paso_Util_isAny(n,out->colorOf,-1) && Paso_noError()) {
76 #pragma omp parallel for private(i) schedule(static)
77 for (i = 0; i < n; ++i) mis_marker[i]=out->colorOf[i];
78 Paso_SystemMatrixPattern_mis(A->pattern,mis_marker);
79
80 #pragma omp parallel for private(i) schedule(static)
81 for (i = 0; i < n; ++i) if (mis_marker[i]) out->colorOf[i]=num_colors;
82 ++num_colors;
83 }
84 out->num_colors=num_colors;
85 time_color=Paso_timer()-time0;
86 time0=Paso_timer();
87 /* find main diagonal and copy matrix values */
88 #pragma omp parallel for schedule(static) private(i,iptr,iptr_main,k)
89 for (i = 0; i < n; ++i) {
90 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
91 iptr_main=A->pattern->ptr[0]-1;
92 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; iptr++) {
93 if (A->pattern->index[iptr]==i) iptr_main=iptr;
94 for (k=0;k<n_block*n_block;++k) out->factors[n_block*n_block*iptr+k]=A->val[n_block*n_block*iptr+k];
95 }
96 out->main_iptr[i]=iptr_main;
97 if (iptr_main==A->pattern->ptr[0]-1)
98 Paso_setError(VALUE_ERROR, "Paso_Solver_getILU: no main diagonal");
99 }
100 }
101 /* start factorization */
102
103 #pragma omp barrier
104 for (color=0;color<out->num_colors && Paso_noError();++color) {
105 if (n_block==1) {
106 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,j,iptr_ij,A11,iptr_main,D)
107 for (i = 0; i < n; ++i) {
108 if (out->colorOf[i]==color) {
109 for (color2=0;color2<color;++color2) {
110 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
111 k=A->pattern->index[iptr_ik];
112 if (out->colorOf[k]==color2) {
113 A11=out->factors[iptr_ik];
114 /* a_ij=a_ij-a_ik*a_kj */
115 for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
116 j=A->pattern->index[iptr_kj];
117 if (out->colorOf[j]>color2) {
118 S11=out->factors[iptr_kj];
119 for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
120 if (j==A->pattern->index[iptr_ij]) {
121 out->factors[iptr_ij]-=A11*S11;
122 break;
123 }
124 }
125 }
126 }
127 }
128 }
129 }
130 iptr_main=out->main_iptr[i];
131 D=out->factors[iptr_main];
132 if (ABS(D)>0.) {
133 D=1./D;
134 out->factors[iptr_main]=D;
135 /* a_ik=a_ii^{-1}*a_ik */
136 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
137 k=A->pattern->index[iptr_ik];
138 if (out->colorOf[k]>color) {
139 A11=out->factors[iptr_ik];
140 out->factors[iptr_ik]=A11*D;
141 }
142 }
143 } else {
144 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
145 }
146 }
147 }
148 } else if (n_block==2) {
149 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S12,S22,j,iptr_ij,A11,A21,A12,A22,iptr_main,D)
150 for (i = 0; i < n; ++i) {
151 if (out->colorOf[i]==color) {
152 for (color2=0;color2<color;++color2) {
153 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
154 k=A->pattern->index[iptr_ik];
155 if (out->colorOf[k]==color2) {
156 A11=out->factors[iptr_ik*4 ];
157 A21=out->factors[iptr_ik*4+1];
158 A12=out->factors[iptr_ik*4+2];
159 A22=out->factors[iptr_ik*4+3];
160 /* a_ij=a_ij-a_ik*a_kj */
161 for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
162 j=A->pattern->index[iptr_kj];
163 if (out->colorOf[j]>color2) {
164 S11=out->factors[iptr_kj*4];
165 S21=out->factors[iptr_kj*4+1];
166 S12=out->factors[iptr_kj*4+2];
167 S22=out->factors[iptr_kj*4+3];
168 for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
169 if (j==A->pattern->index[iptr_ij]) {
170 out->factors[4*iptr_ij ]-=A11*S11+A12*S21;
171 out->factors[4*iptr_ij+1]-=A21*S11+A22*S21;
172 out->factors[4*iptr_ij+2]-=A11*S12+A12*S22;
173 out->factors[4*iptr_ij+3]-=A21*S12+A22*S22;
174 break;
175 }
176 }
177 }
178 }
179 }
180 }
181 }
182 iptr_main=out->main_iptr[i];
183 A11=out->factors[iptr_main*4];
184 A21=out->factors[iptr_main*4+1];
185 A12=out->factors[iptr_main*4+2];
186 A22=out->factors[iptr_main*4+3];
187 D = A11*A22-A12*A21;
188 if (ABS(D)>0.) {
189 D=1./D;
190 S11= A22*D;
191 S21=-A21*D;
192 S12=-A12*D;
193 S22= A11*D;
194 out->factors[iptr_main*4] = S11;
195 out->factors[iptr_main*4+1]= S21;
196 out->factors[iptr_main*4+2]= S12;
197 out->factors[iptr_main*4+3]= S22;
198 /* a_ik=a_ii^{-1}*a_ik */
199 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
200 k=A->pattern->index[iptr_ik];
201 if (out->colorOf[k]>color) {
202 A11=out->factors[iptr_ik*4 ];
203 A21=out->factors[iptr_ik*4+1];
204 A12=out->factors[iptr_ik*4+2];
205 A22=out->factors[iptr_ik*4+3];
206 out->factors[4*iptr_ik ]=S11*A11+S12*A21;
207 out->factors[4*iptr_ik+1]=S21*A11+S22*A21;
208 out->factors[4*iptr_ik+2]=S11*A12+S12*A22;
209 out->factors[4*iptr_ik+3]=S21*A12+S22*A22;
210 }
211 }
212 } else {
213 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
214 }
215 }
216 }
217 } else if (n_block==3) {
218 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S31,S12,S22,S32,S13,S23,S33,j,iptr_ij,A11,A21,A31,A12,A22,A32,A13,A23,A33,iptr_main,D)
219 for (i = 0; i < n; ++i) {
220 if (out->colorOf[i]==color) {
221 for (color2=0;color2<color;++color2) {
222 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
223 k=A->pattern->index[iptr_ik];
224 if (out->colorOf[k]==color2) {
225 A11=out->factors[iptr_ik*9 ];
226 A21=out->factors[iptr_ik*9+1];
227 A31=out->factors[iptr_ik*9+2];
228 A12=out->factors[iptr_ik*9+3];
229 A22=out->factors[iptr_ik*9+4];
230 A32=out->factors[iptr_ik*9+5];
231 A13=out->factors[iptr_ik*9+6];
232 A23=out->factors[iptr_ik*9+7];
233 A33=out->factors[iptr_ik*9+8];
234 /* a_ij=a_ij-a_ik*a_kj */
235 for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
236 j=A->pattern->index[iptr_kj];
237 if (out->colorOf[j]>color2) {
238 S11=out->factors[iptr_kj*9 ];
239 S21=out->factors[iptr_kj*9+1];
240 S31=out->factors[iptr_kj*9+2];
241 S12=out->factors[iptr_kj*9+3];
242 S22=out->factors[iptr_kj*9+4];
243 S32=out->factors[iptr_kj*9+5];
244 S13=out->factors[iptr_kj*9+6];
245 S23=out->factors[iptr_kj*9+7];
246 S33=out->factors[iptr_kj*9+8];
247 for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
248 if (j==A->pattern->index[iptr_ij]) {
249 out->factors[iptr_ij*9 ]-=A11*S11+A12*S21+A13*S31;
250 out->factors[iptr_ij*9+1]-=A21*S11+A22*S21+A23*S31;
251 out->factors[iptr_ij*9+2]-=A31*S11+A32*S21+A33*S31;
252 out->factors[iptr_ij*9+3]-=A11*S12+A12*S22+A13*S32;
253 out->factors[iptr_ij*9+4]-=A21*S12+A22*S22+A23*S32;
254 out->factors[iptr_ij*9+5]-=A31*S12+A32*S22+A33*S32;
255 out->factors[iptr_ij*9+6]-=A11*S13+A12*S23+A13*S33;
256 out->factors[iptr_ij*9+7]-=A21*S13+A22*S23+A23*S33;
257 out->factors[iptr_ij*9+8]-=A31*S13+A32*S23+A33*S33;
258 break;
259 }
260 }
261 }
262 }
263 }
264 }
265 }
266 iptr_main=out->main_iptr[i];
267 A11=out->factors[iptr_main*9 ];
268 A21=out->factors[iptr_main*9+1];
269 A31=out->factors[iptr_main*9+2];
270 A12=out->factors[iptr_main*9+3];
271 A22=out->factors[iptr_main*9+4];
272 A32=out->factors[iptr_main*9+5];
273 A13=out->factors[iptr_main*9+6];
274 A23=out->factors[iptr_main*9+7];
275 A33=out->factors[iptr_main*9+8];
276 D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
277 if (ABS(D)>0.) {
278 D=1./D;
279 S11=(A22*A33-A23*A32)*D;
280 S21=(A31*A23-A21*A33)*D;
281 S31=(A21*A32-A31*A22)*D;
282 S12=(A13*A32-A12*A33)*D;
283 S22=(A11*A33-A31*A13)*D;
284 S32=(A12*A31-A11*A32)*D;
285 S13=(A12*A23-A13*A22)*D;
286 S23=(A13*A21-A11*A23)*D;
287 S33=(A11*A22-A12*A21)*D;
288
289 out->factors[iptr_main*9 ]=S11;
290 out->factors[iptr_main*9+1]=S21;
291 out->factors[iptr_main*9+2]=S31;
292 out->factors[iptr_main*9+3]=S12;
293 out->factors[iptr_main*9+4]=S22;
294 out->factors[iptr_main*9+5]=S32;
295 out->factors[iptr_main*9+6]=S13;
296 out->factors[iptr_main*9+7]=S23;
297 out->factors[iptr_main*9+8]=S33;
298
299 /* a_ik=a_ii^{-1}*a_ik */
300 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
301 k=A->pattern->index[iptr_ik];
302 if (out->colorOf[k]>color) {
303 A11=out->factors[iptr_ik*9 ];
304 A21=out->factors[iptr_ik*9+1];
305 A31=out->factors[iptr_ik*9+2];
306 A12=out->factors[iptr_ik*9+3];
307 A22=out->factors[iptr_ik*9+4];
308 A32=out->factors[iptr_ik*9+5];
309 A13=out->factors[iptr_ik*9+6];
310 A23=out->factors[iptr_ik*9+7];
311 A33=out->factors[iptr_ik*9+8];
312 out->factors[iptr_ik*9 ]=S11*A11+S12*A21+S13*A31;
313 out->factors[iptr_ik*9+1]=S21*A11+S22*A21+S23*A31;
314 out->factors[iptr_ik*9+2]=S31*A11+S32*A21+S33*A31;
315 out->factors[iptr_ik*9+3]=S11*A12+S12*A22+S13*A32;
316 out->factors[iptr_ik*9+4]=S21*A12+S22*A22+S23*A32;
317 out->factors[iptr_ik*9+5]=S31*A12+S32*A22+S33*A32;
318 out->factors[iptr_ik*9+6]=S11*A13+S12*A23+S13*A33;
319 out->factors[iptr_ik*9+7]=S21*A13+S22*A23+S23*A33;
320 out->factors[iptr_ik*9+8]=S31*A13+S32*A23+S33*A33;
321 }
322 }
323 } else {
324 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
325 }
326 }
327 }
328 } else {
329 Paso_setError(VALUE_ERROR, "Paso_Solver_getILU: block size greater than 3 is not supported.");
330 }
331 #pragma omp barrier
332 }
333 time_fac=Paso_timer()-time0;
334 }
335
336 TMPMEMFREE(mis_marker);
337 if (Paso_noError()) {
338 if (verbose) {
339 printf("ILU: %d color used \n",out->num_colors);
340 printf("timing: ILU: coloring/elemination : %e/%e\n",time_color,time_fac);
341 }
342 return out;
343 } else {
344 Paso_Solver_ILU_free(out);
345 return NULL;
346 }
347 }
348
349 /**************************************************************/
350
351 /* apply ILU precondition b-> x
352
353 in fact it solves LUx=b in the form x= U^{-1} L^{-1}b
354
355 should be called within a parallel region
356 barrier synconization should be performed to make sure that the input vector available
357
358 */
359
360 void Paso_Solver_solveILU(Paso_Solver_ILU * ilu, double * x, double * b) {
361 register dim_t i,k;
362 register index_t color,iptr_ik,iptr_main;
363 register double S1,S2,S3,R1,R2,R3;
364 dim_t n_block=ilu->n_block;
365 dim_t n=ilu->n;
366
367
368 /* copy x into b*/
369 #pragma omp for private(i) schedule(static)
370 for (i=0;i<n*n_block;++i) x[i]=b[i];
371 /* forward substitution */
372 for (color=0;color<ilu->num_colors;++color) {
373 if (n_block==1) {
374 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
375 for (i = 0; i < n; ++i) {
376 if (ilu->colorOf[i]==color) {
377 /* x_i=x_i-a_ik*x_k */
378 S1=x[i];
379 for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
380 k=ilu->pattern->index[iptr_ik];
381 if (ilu->colorOf[k]<color) {
382 R1=x[k];
383 S1-=ilu->factors[iptr_ik]*R1;
384 }
385 }
386 iptr_main=ilu->main_iptr[i];
387 x[i]=ilu->factors[iptr_main]*S1;
388 }
389 }
390 } else if (n_block==2) {
391 #pragma omp for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
392 for (i = 0; i < n; ++i) {
393 if (ilu->colorOf[i]==color) {
394 /* x_i=x_i-a_ik*x_k */
395 S1=x[2*i];
396 S2=x[2*i+1];
397 for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
398 k=ilu->pattern->index[iptr_ik];
399 if (ilu->colorOf[k]<color) {
400 R1=x[2*k];
401 R2=x[2*k+1];
402 S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
403 S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
404 }
405 }
406 iptr_main=ilu->main_iptr[i];
407 x[2*i ]=ilu->factors[4*iptr_main ]*S1+ilu->factors[4*iptr_main+2]*S2;
408 x[2*i+1]=ilu->factors[4*iptr_main+1]*S1+ilu->factors[4*iptr_main+3]*S2;
409 }
410
411 }
412 } else if (n_block==3) {
413 #pragma omp for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
414 for (i = 0; i < n; ++i) {
415 if (ilu->colorOf[i]==color) {
416 /* x_i=x_i-a_ik*x_k */
417 S1=x[3*i];
418 S2=x[3*i+1];
419 S3=x[3*i+2];
420 for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
421 k=ilu->pattern->index[iptr_ik];
422 if (ilu->colorOf[k]<color) {
423 R1=x[3*k];
424 R2=x[3*k+1];
425 R3=x[3*k+2];
426 S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
427 S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
428 S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
429 }
430 }
431 iptr_main=ilu->main_iptr[i];
432 x[3*i ]=ilu->factors[9*iptr_main ]*S1+ilu->factors[9*iptr_main+3]*S2+ilu->factors[9*iptr_main+6]*S3;
433 x[3*i+1]=ilu->factors[9*iptr_main+1]*S1+ilu->factors[9*iptr_main+4]*S2+ilu->factors[9*iptr_main+7]*S3;
434 x[3*i+2]=ilu->factors[9*iptr_main+2]*S1+ilu->factors[9*iptr_main+5]*S2+ilu->factors[9*iptr_main+8]*S3;
435 }
436 }
437 }
438 #pragma omp barrier
439 }
440 /* backward substitution */
441 for (color=(ilu->num_colors)-1;color>-1;--color) {
442 if (n_block==1) {
443 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,R1)
444 for (i = 0; i < n; ++i) {
445 if (ilu->colorOf[i]==color) {
446 /* x_i=x_i-a_ik*x_k */
447 S1=x[i];
448 for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
449 k=ilu->pattern->index[iptr_ik];
450 if (ilu->colorOf[k]>color) {
451 R1=x[k];
452 S1-=ilu->factors[iptr_ik]*R1;
453 }
454 }
455 x[i]=S1;
456 }
457 }
458 } else if (n_block==2) {
459 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
460 for (i = 0; i < n; ++i) {
461 if (ilu->colorOf[i]==color) {
462 /* x_i=x_i-a_ik*x_k */
463 S1=x[2*i];
464 S2=x[2*i+1];
465 for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
466 k=ilu->pattern->index[iptr_ik];
467 if (ilu->colorOf[k]>color) {
468 R1=x[2*k];
469 R2=x[2*k+1];
470 S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
471 S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
472 }
473 }
474 x[2*i]=S1;
475 x[2*i+1]=S2;
476 }
477 }
478 } else if (n_block==3) {
479 #pragma omp for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
480 for (i = 0; i < n; ++i) {
481 if (ilu->colorOf[i]==color) {
482 /* x_i=x_i-a_ik*x_k */
483 S1=x[3*i ];
484 S2=x[3*i+1];
485 S3=x[3*i+2];
486 for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
487 k=ilu->pattern->index[iptr_ik];
488 if (ilu->colorOf[k]>color) {
489 R1=x[3*k];
490 R2=x[3*k+1];
491 R3=x[3*k+2];
492 S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
493 S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
494 S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
495 }
496 }
497 x[3*i]=S1;
498 x[3*i+1]=S2;
499 x[3*i+2]=S3;
500 }
501 }
502 }
503 #pragma omp barrier
504 }
505 return;
506 }
507 /*
508 * $Log$
509 * Revision 1.2 2005/09/15 03:44:40 jgs
510 * Merge of development branch dev-02 back to main trunk on 2005-09-15
511 *
512 * Revision 1.1.2.1 2005/09/05 06:29:50 gross
513 * These files have been extracted from finley to define a stand alone libray for iterative
514 * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
515 * has not been tested yet.
516 *
517 *
518 */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26