/[escript]/trunk/paso/src/ILU.c
ViewVC logotype

Contents of /trunk/paso/src/ILU.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1628 - (show annotations)
Fri Jul 11 13:12:46 2008 UTC (11 years, 8 months ago) by phornby
Original Path: trunk/paso/src/Solver_ILU.c
File MIME type: text/plain
File size: 25766 byte(s)

Merge in /branches/windows_from_1456_trunk_1620_merged_in branch.

You will find a preserved pre-merge trunk in tags under tags/trunk_at_1625.
That will be useful for diffing & checking on my stupidity.

Here is a list of the conflicts and their resolution at this
point in time.


=================================================================================
(LLWS == looks like white space).

finley/src/Assemble_addToSystemMatrix.c - resolve to branch - unused var. may be wrong.....
finley/src/CPPAdapter/SystemMatrixAdapter.cpp - resolve to branch - LLWS
finley/src/CPPAdapter/MeshAdapter.cpp - resolve to branch - LLWS
paso/src/PCG.c - resolve to branch - unused var fixes.
paso/src/SolverFCT.c - resolve to branch - LLWS
paso/src/FGMRES.c - resolve to branch - LLWS
paso/src/Common.h - resolve to trunk version. It's omp.h's include... not sure it's needed,
but for the sake of saftey.....
paso/src/Functions.c - resolve to branch version, indentation/tab removal and return error
on bad unimplemented Paso_FunctionCall.
paso/src/SolverFCT_solve.c - resolve to branch version, unused vars
paso/src/SparseMatrix_MatrixVector.c - resolve to branch version, unused vars.
escript/src/Utils.cpp - resloved to branch, needs WinSock2.h
escript/src/DataExpanded.cpp - resolved to branch version - LLWS
escript/src/DataFactory.cpp - resolve to branch version
=================================================================================

This currently passes tests on linux (debian), but is not checked on windows or Altix yet.

This checkin is to make a trunk I can check out for windows to do tests on it.

Known outstanding problem is in the operator=() method of exceptions
causing warning messages on the intel compilers.

May the God of doughnuts have mercy on my soul.


1
2 /* $Id$ */
3
4 /*******************************************************
5 *
6 * Copyright 2003-2007 by ACceSS MNRF
7 * Copyright 2007 by University of Queensland
8 *
9 * http://esscc.uq.edu.au
10 * Primary Business: Queensland, Australia
11 * Licensed under the Open Software License version 3.0
12 * http://www.opensource.org/licenses/osl-3.0.php
13 *
14 *******************************************************/
15
16 /**************************************************************/
17
18 /* Paso: ILU preconditioner with reordering */
19
20 /**************************************************************/
21
22 /* Copyrights by ACcESS Australia 2003,2004,2005 */
23 /* Author: gross@access.edu.au */
24
25 /**************************************************************/
26
27 #include "Paso.h"
28 #include "Solver.h"
29 #include "PasoUtil.h"
30
31 /**************************************************************/
32
33 /* free all memory used by ILU */
34
35 void Paso_Solver_ILU_free(Paso_Solver_ILU * in) {
36 if (in!=NULL) {
37 MEMFREE(in->colorOf);
38 MEMFREE(in->factors);
39 MEMFREE(in->main_iptr);
40 Paso_Pattern_free(in->pattern);
41 MEMFREE(in);
42 }
43 }
44
45 /**************************************************************/
46
47 /* constructs the incomplete block factorization of
48
49 */
50 Paso_Solver_ILU* Paso_Solver_getILU(Paso_SparseMatrix * A,bool_t verbose) {
51 dim_t n=A->numRows;
52 dim_t n_block=A->row_block_size;
53 index_t num_colors=0, *mis_marker=NULL;
54 register double A11,A12,A13,A21,A22,A23,A31,A32,A33,D;
55 register double S11,S12,S13,S21,S22,S23,S31,S32,S33;
56 register index_t i,iptr_main,iptr,iptr_ik,k,iptr_kj,j,iptr_ij,color,color2;
57 double time0,time_color,time_fac;
58 /* allocations: */
59 Paso_Solver_ILU* out=MEMALLOC(1,Paso_Solver_ILU);
60 if (Paso_checkPtr(out)) return NULL;
61 out->colorOf=MEMALLOC(n,index_t);
62 out->factors=MEMALLOC(A->len,double);
63 out->main_iptr=MEMALLOC(n,index_t);
64 out->pattern=Paso_Pattern_getReference(A->pattern);
65 out->n_block=n_block;
66 out->n=n;
67 if ( !(Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
68 time0=Paso_timer();
69 Paso_Pattern_color(A->pattern,&out->num_colors,out->colorOf);
70 time_color=Paso_timer()-time0;
71
72 if (Paso_noError()) {
73 time0=Paso_timer();
74 /* find main diagonal and copy matrix values */
75 #pragma omp parallel for schedule(static) private(i,iptr,iptr_main,k)
76 for (i = 0; i < n; ++i) {
77 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
78 iptr_main=A->pattern->ptr[0]-1;
79 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; iptr++) {
80 if (A->pattern->index[iptr]==i) iptr_main=iptr;
81 for (k=0;k<n_block*n_block;++k) out->factors[n_block*n_block*iptr+k]=A->val[n_block*n_block*iptr+k];
82 }
83 out->main_iptr[i]=iptr_main;
84 if (iptr_main==A->pattern->ptr[0]-1)
85 Paso_setError(VALUE_ERROR, "Paso_Solver_getILU: no main diagonal");
86 }
87 }
88 /* start factorization */
89
90 #pragma omp barrier
91 for (color=0;color<out->num_colors && Paso_noError();++color) {
92 if (n_block==1) {
93 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,j,iptr_ij,A11,iptr_main,D)
94 for (i = 0; i < n; ++i) {
95 if (out->colorOf[i]==color) {
96 for (color2=0;color2<color;++color2) {
97 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
98 k=A->pattern->index[iptr_ik];
99 if (out->colorOf[k]==color2) {
100 A11=out->factors[iptr_ik];
101 /* a_ij=a_ij-a_ik*a_kj */
102 for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
103 j=A->pattern->index[iptr_kj];
104 if (out->colorOf[j]>color2) {
105 S11=out->factors[iptr_kj];
106 for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
107 if (j==A->pattern->index[iptr_ij]) {
108 out->factors[iptr_ij]-=A11*S11;
109 break;
110 }
111 }
112 }
113 }
114 }
115 }
116 }
117 iptr_main=out->main_iptr[i];
118 D=out->factors[iptr_main];
119 if (ABS(D)>0.) {
120 D=1./D;
121 out->factors[iptr_main]=D;
122 /* a_ik=a_ii^{-1}*a_ik */
123 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
124 k=A->pattern->index[iptr_ik];
125 if (out->colorOf[k]>color) {
126 A11=out->factors[iptr_ik];
127 out->factors[iptr_ik]=A11*D;
128 }
129 }
130 } else {
131 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
132 }
133 }
134 }
135 } else if (n_block==2) {
136 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S12,S22,j,iptr_ij,A11,A21,A12,A22,iptr_main,D)
137 for (i = 0; i < n; ++i) {
138 if (out->colorOf[i]==color) {
139 for (color2=0;color2<color;++color2) {
140 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
141 k=A->pattern->index[iptr_ik];
142 if (out->colorOf[k]==color2) {
143 A11=out->factors[iptr_ik*4 ];
144 A21=out->factors[iptr_ik*4+1];
145 A12=out->factors[iptr_ik*4+2];
146 A22=out->factors[iptr_ik*4+3];
147 /* a_ij=a_ij-a_ik*a_kj */
148 for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
149 j=A->pattern->index[iptr_kj];
150 if (out->colorOf[j]>color2) {
151 S11=out->factors[iptr_kj*4];
152 S21=out->factors[iptr_kj*4+1];
153 S12=out->factors[iptr_kj*4+2];
154 S22=out->factors[iptr_kj*4+3];
155 for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
156 if (j==A->pattern->index[iptr_ij]) {
157 out->factors[4*iptr_ij ]-=A11*S11+A12*S21;
158 out->factors[4*iptr_ij+1]-=A21*S11+A22*S21;
159 out->factors[4*iptr_ij+2]-=A11*S12+A12*S22;
160 out->factors[4*iptr_ij+3]-=A21*S12+A22*S22;
161 break;
162 }
163 }
164 }
165 }
166 }
167 }
168 }
169 iptr_main=out->main_iptr[i];
170 A11=out->factors[iptr_main*4];
171 A21=out->factors[iptr_main*4+1];
172 A12=out->factors[iptr_main*4+2];
173 A22=out->factors[iptr_main*4+3];
174 D = A11*A22-A12*A21;
175 if (ABS(D)>0.) {
176 D=1./D;
177 S11= A22*D;
178 S21=-A21*D;
179 S12=-A12*D;
180 S22= A11*D;
181 out->factors[iptr_main*4] = S11;
182 out->factors[iptr_main*4+1]= S21;
183 out->factors[iptr_main*4+2]= S12;
184 out->factors[iptr_main*4+3]= S22;
185 /* a_ik=a_ii^{-1}*a_ik */
186 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
187 k=A->pattern->index[iptr_ik];
188 if (out->colorOf[k]>color) {
189 A11=out->factors[iptr_ik*4 ];
190 A21=out->factors[iptr_ik*4+1];
191 A12=out->factors[iptr_ik*4+2];
192 A22=out->factors[iptr_ik*4+3];
193 out->factors[4*iptr_ik ]=S11*A11+S12*A21;
194 out->factors[4*iptr_ik+1]=S21*A11+S22*A21;
195 out->factors[4*iptr_ik+2]=S11*A12+S12*A22;
196 out->factors[4*iptr_ik+3]=S21*A12+S22*A22;
197 }
198 }
199 } else {
200 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
201 }
202 }
203 }
204 } else if (n_block==3) {
205 #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S31,S12,S22,S32,S13,S23,S33,j,iptr_ij,A11,A21,A31,A12,A22,A32,A13,A23,A33,iptr_main,D)
206 for (i = 0; i < n; ++i) {
207 if (out->colorOf[i]==color) {
208 for (color2=0;color2<color;++color2) {
209 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
210 k=A->pattern->index[iptr_ik];
211 if (out->colorOf[k]==color2) {
212 A11=out->factors[iptr_ik*9 ];
213 A21=out->factors[iptr_ik*9+1];
214 A31=out->factors[iptr_ik*9+2];
215 A12=out->factors[iptr_ik*9+3];
216 A22=out->factors[iptr_ik*9+4];
217 A32=out->factors[iptr_ik*9+5];
218 A13=out->factors[iptr_ik*9+6];
219 A23=out->factors[iptr_ik*9+7];
220 A33=out->factors[iptr_ik*9+8];
221 /* a_ij=a_ij-a_ik*a_kj */
222 for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
223 j=A->pattern->index[iptr_kj];
224 if (out->colorOf[j]>color2) {
225 S11=out->factors[iptr_kj*9 ];
226 S21=out->factors[iptr_kj*9+1];
227 S31=out->factors[iptr_kj*9+2];
228 S12=out->factors[iptr_kj*9+3];
229 S22=out->factors[iptr_kj*9+4];
230 S32=out->factors[iptr_kj*9+5];
231 S13=out->factors[iptr_kj*9+6];
232 S23=out->factors[iptr_kj*9+7];
233 S33=out->factors[iptr_kj*9+8];
234 for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
235 if (j==A->pattern->index[iptr_ij]) {
236 out->factors[iptr_ij*9 ]-=A11*S11+A12*S21+A13*S31;
237 out->factors[iptr_ij*9+1]-=A21*S11+A22*S21+A23*S31;
238 out->factors[iptr_ij*9+2]-=A31*S11+A32*S21+A33*S31;
239 out->factors[iptr_ij*9+3]-=A11*S12+A12*S22+A13*S32;
240 out->factors[iptr_ij*9+4]-=A21*S12+A22*S22+A23*S32;
241 out->factors[iptr_ij*9+5]-=A31*S12+A32*S22+A33*S32;
242 out->factors[iptr_ij*9+6]-=A11*S13+A12*S23+A13*S33;
243 out->factors[iptr_ij*9+7]-=A21*S13+A22*S23+A23*S33;
244 out->factors[iptr_ij*9+8]-=A31*S13+A32*S23+A33*S33;
245 break;
246 }
247 }
248 }
249 }
250 }
251 }
252 }
253 iptr_main=out->main_iptr[i];
254 A11=out->factors[iptr_main*9 ];
255 A21=out->factors[iptr_main*9+1];
256 A31=out->factors[iptr_main*9+2];
257 A12=out->factors[iptr_main*9+3];
258 A22=out->factors[iptr_main*9+4];
259 A32=out->factors[iptr_main*9+5];
260 A13=out->factors[iptr_main*9+6];
261 A23=out->factors[iptr_main*9+7];
262 A33=out->factors[iptr_main*9+8];
263 D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
264 if (ABS(D)>0.) {
265 D=1./D;
266 S11=(A22*A33-A23*A32)*D;
267 S21=(A31*A23-A21*A33)*D;
268 S31=(A21*A32-A31*A22)*D;
269 S12=(A13*A32-A12*A33)*D;
270 S22=(A11*A33-A31*A13)*D;
271 S32=(A12*A31-A11*A32)*D;
272 S13=(A12*A23-A13*A22)*D;
273 S23=(A13*A21-A11*A23)*D;
274 S33=(A11*A22-A12*A21)*D;
275
276 out->factors[iptr_main*9 ]=S11;
277 out->factors[iptr_main*9+1]=S21;
278 out->factors[iptr_main*9+2]=S31;
279 out->factors[iptr_main*9+3]=S12;
280 out->factors[iptr_main*9+4]=S22;
281 out->factors[iptr_main*9+5]=S32;
282 out->factors[iptr_main*9+6]=S13;
283 out->factors[iptr_main*9+7]=S23;
284 out->factors[iptr_main*9+8]=S33;
285
286 /* a_ik=a_ii^{-1}*a_ik */
287 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
288 k=A->pattern->index[iptr_ik];
289 if (out->colorOf[k]>color) {
290 A11=out->factors[iptr_ik*9 ];
291 A21=out->factors[iptr_ik*9+1];
292 A31=out->factors[iptr_ik*9+2];
293 A12=out->factors[iptr_ik*9+3];
294 A22=out->factors[iptr_ik*9+4];
295 A32=out->factors[iptr_ik*9+5];
296 A13=out->factors[iptr_ik*9+6];
297 A23=out->factors[iptr_ik*9+7];
298 A33=out->factors[iptr_ik*9+8];
299 out->factors[iptr_ik*9 ]=S11*A11+S12*A21+S13*A31;
300 out->factors[iptr_ik*9+1]=S21*A11+S22*A21+S23*A31;
301 out->factors[iptr_ik*9+2]=S31*A11+S32*A21+S33*A31;
302 out->factors[iptr_ik*9+3]=S11*A12+S12*A22+S13*A32;
303 out->factors[iptr_ik*9+4]=S21*A12+S22*A22+S23*A32;
304 out->factors[iptr_ik*9+5]=S31*A12+S32*A22+S33*A32;
305 out->factors[iptr_ik*9+6]=S11*A13+S12*A23+S13*A33;
306 out->factors[iptr_ik*9+7]=S21*A13+S22*A23+S23*A33;
307 out->factors[iptr_ik*9+8]=S31*A13+S32*A23+S33*A33;
308 }
309 }
310 } else {
311 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
312 }
313 }
314 }
315 } else {
316 Paso_setError(VALUE_ERROR, "Paso_Solver_getILU: block size greater than 3 is not supported.");
317 }
318 #pragma omp barrier
319 }
320 time_fac=Paso_timer()-time0;
321 }
322 }
323 if (Paso_noError()) {
324 if (verbose) {
325 printf("ILU: %d color used \n",out->num_colors);
326 printf("timing: ILU: coloring/elemination : %e/%e\n",time_color,time_fac);
327 }
328 return out;
329 } else {
330 Paso_Solver_ILU_free(out);
331 return NULL;
332 }
333 }
334
335 /**************************************************************/
336
337 /* apply ILU precondition b-> x
338
339 in fact it solves LUx=b in the form x= U^{-1} L^{-1}b
340
341 should be called within a parallel region
342 barrier synconization should be performed to make sure that the input vector available
343
344 */
345
346 void Paso_Solver_solveILU(Paso_Solver_ILU * ilu, double * x, double * b) {
347 register dim_t i,k;
348 register index_t color,iptr_ik,iptr_main;
349 register double S1,S2,S3,R1,R2,R3;
350 dim_t n_block=ilu->n_block;
351 dim_t n=ilu->n;
352
353
354 /* copy x into b*/
355 #pragma omp parallel for private(i) schedule(static)
356 for (i=0;i<n*n_block;++i) x[i]=b[i];
357 /* forward substitution */
358 for (color=0;color<ilu->num_colors;++color) {
359 if (n_block==1) {
360 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
361 for (i = 0; i < n; ++i) {
362 if (ilu->colorOf[i]==color) {
363 /* x_i=x_i-a_ik*x_k */
364 S1=x[i];
365 for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
366 k=ilu->pattern->index[iptr_ik];
367 if (ilu->colorOf[k]<color) {
368 R1=x[k];
369 S1-=ilu->factors[iptr_ik]*R1;
370 }
371 }
372 iptr_main=ilu->main_iptr[i];
373 x[i]=ilu->factors[iptr_main]*S1;
374 }
375 }
376 } else if (n_block==2) {
377 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
378 for (i = 0; i < n; ++i) {
379 if (ilu->colorOf[i]==color) {
380 /* x_i=x_i-a_ik*x_k */
381 S1=x[2*i];
382 S2=x[2*i+1];
383 for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
384 k=ilu->pattern->index[iptr_ik];
385 if (ilu->colorOf[k]<color) {
386 R1=x[2*k];
387 R2=x[2*k+1];
388 S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
389 S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
390 }
391 }
392 iptr_main=ilu->main_iptr[i];
393 x[2*i ]=ilu->factors[4*iptr_main ]*S1+ilu->factors[4*iptr_main+2]*S2;
394 x[2*i+1]=ilu->factors[4*iptr_main+1]*S1+ilu->factors[4*iptr_main+3]*S2;
395 }
396
397 }
398 } else if (n_block==3) {
399 #pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
400 for (i = 0; i < n; ++i) {
401 if (ilu->colorOf[i]==color) {
402 /* x_i=x_i-a_ik*x_k */
403 S1=x[3*i];
404 S2=x[3*i+1];
405 S3=x[3*i+2];
406 for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
407 k=ilu->pattern->index[iptr_ik];
408 if (ilu->colorOf[k]<color) {
409 R1=x[3*k];
410 R2=x[3*k+1];
411 R3=x[3*k+2];
412 S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
413 S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
414 S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
415 }
416 }
417 iptr_main=ilu->main_iptr[i];
418 x[3*i ]=ilu->factors[9*iptr_main ]*S1+ilu->factors[9*iptr_main+3]*S2+ilu->factors[9*iptr_main+6]*S3;
419 x[3*i+1]=ilu->factors[9*iptr_main+1]*S1+ilu->factors[9*iptr_main+4]*S2+ilu->factors[9*iptr_main+7]*S3;
420 x[3*i+2]=ilu->factors[9*iptr_main+2]*S1+ilu->factors[9*iptr_main+5]*S2+ilu->factors[9*iptr_main+8]*S3;
421 }
422 }
423 }
424 }
425 /* backward substitution */
426 for (color=(ilu->num_colors)-1;color>-1;--color) {
427 if (n_block==1) {
428 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1)
429 for (i = 0; i < n; ++i) {
430 if (ilu->colorOf[i]==color) {
431 /* x_i=x_i-a_ik*x_k */
432 S1=x[i];
433 for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
434 k=ilu->pattern->index[iptr_ik];
435 if (ilu->colorOf[k]>color) {
436 R1=x[k];
437 S1-=ilu->factors[iptr_ik]*R1;
438 }
439 }
440 x[i]=S1;
441 }
442 }
443 } else if (n_block==2) {
444 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
445 for (i = 0; i < n; ++i) {
446 if (ilu->colorOf[i]==color) {
447 /* x_i=x_i-a_ik*x_k */
448 S1=x[2*i];
449 S2=x[2*i+1];
450 for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
451 k=ilu->pattern->index[iptr_ik];
452 if (ilu->colorOf[k]>color) {
453 R1=x[2*k];
454 R2=x[2*k+1];
455 S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
456 S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
457 }
458 }
459 x[2*i]=S1;
460 x[2*i+1]=S2;
461 }
462 }
463 } else if (n_block==3) {
464 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
465 for (i = 0; i < n; ++i) {
466 if (ilu->colorOf[i]==color) {
467 /* x_i=x_i-a_ik*x_k */
468 S1=x[3*i ];
469 S2=x[3*i+1];
470 S3=x[3*i+2];
471 for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
472 k=ilu->pattern->index[iptr_ik];
473 if (ilu->colorOf[k]>color) {
474 R1=x[3*k];
475 R2=x[3*k+1];
476 R3=x[3*k+2];
477 S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
478 S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
479 S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
480 }
481 }
482 x[3*i]=S1;
483 x[3*i+1]=S2;
484 x[3*i+2]=S3;
485 }
486 }
487 }
488 #pragma omp barrier
489 }
490 return;
491 }
492 /*
493 * $Log$
494 * Revision 1.2 2005/09/15 03:44:40 jgs
495 * Merge of development branch dev-02 back to main trunk on 2005-09-15
496 *
497 * Revision 1.1.2.1 2005/09/05 06:29:50 gross
498 * These files have been extracted from finley to define a stand alone libray for iterative
499 * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
500 * has not been tested yet.
501 *
502 *
503 */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26