/[escript]/trunk/paso/src/Solver_ILU.c
ViewVC logotype

Annotation of /trunk/paso/src/Solver_ILU.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1628 - (hide annotations)
Fri Jul 11 13:12:46 2008 UTC (11 years, 10 months ago) by phornby
File MIME type: text/plain
File size: 25766 byte(s)

Merge in /branches/windows_from_1456_trunk_1620_merged_in branch.

You will find a preserved pre-merge trunk in tags under tags/trunk_at_1625.
That will be useful for diffing & checking on my stupidity.

Here is a list of the conflicts and their resolution at this
point in time.


=================================================================================
(LLWS == looks like white space).

finley/src/Assemble_addToSystemMatrix.c - resolve to branch - unused var. may be wrong.....
finley/src/CPPAdapter/SystemMatrixAdapter.cpp - resolve to branch - LLWS
finley/src/CPPAdapter/MeshAdapter.cpp - resolve to branch - LLWS
paso/src/PCG.c - resolve to branch - unused var fixes.
paso/src/SolverFCT.c - resolve to branch - LLWS
paso/src/FGMRES.c - resolve to branch - LLWS
paso/src/Common.h - resolve to trunk version. It's omp.h's include... not sure it's needed,
but for the sake of saftey.....
paso/src/Functions.c - resolve to branch version, indentation/tab removal and return error
on bad unimplemented Paso_FunctionCall.
paso/src/SolverFCT_solve.c - resolve to branch version, unused vars
paso/src/SparseMatrix_MatrixVector.c - resolve to branch version, unused vars.
escript/src/Utils.cpp - resloved to branch, needs WinSock2.h
escript/src/DataExpanded.cpp - resolved to branch version - LLWS
escript/src/DataFactory.cpp - resolve to branch version
=================================================================================

This currently passes tests on linux (debian), but is not checked on windows or Altix yet.

This checkin is to make a trunk I can check out for windows to do tests on it.

Known outstanding problem is in the operator=() method of exceptions
causing warning messages on the intel compilers.

May the God of doughnuts have mercy on my soul.


1 ksteube 1312
2 jgs 150 /* $Id$ */
3    
4 ksteube 1312 /*******************************************************
5     *
6     * Copyright 2003-2007 by ACceSS MNRF
7     * Copyright 2007 by University of Queensland
8     *
9     * http://esscc.uq.edu.au
10     * Primary Business: Queensland, Australia
11     * Licensed under the Open Software License version 3.0
12     * http://www.opensource.org/licenses/osl-3.0.php
13     *
14     *******************************************************/
15 dhawcroft 631
16 jgs 150 /**************************************************************/
17    
18     /* Paso: ILU preconditioner with reordering */
19    
20     /**************************************************************/
21    
22     /* Copyrights by ACcESS Australia 2003,2004,2005 */
23     /* Author: gross@access.edu.au */
24    
25     /**************************************************************/
26    
27 gross 700 #include "Paso.h"
28 jgs 150 #include "Solver.h"
29 gross 700 #include "PasoUtil.h"
30 jgs 150
31     /**************************************************************/
32    
33     /* free all memory used by ILU */
34    
35     void Paso_Solver_ILU_free(Paso_Solver_ILU * in) {
36     if (in!=NULL) {
37 gross 431 MEMFREE(in->colorOf);
38     MEMFREE(in->factors);
39     MEMFREE(in->main_iptr);
40 ksteube 1312 Paso_Pattern_free(in->pattern);
41 jgs 150 MEMFREE(in);
42     }
43     }
44    
45     /**************************************************************/
46    
47 gross 431 /* constructs the incomplete block factorization of
48 jgs 150
49     */
50 ksteube 1312 Paso_Solver_ILU* Paso_Solver_getILU(Paso_SparseMatrix * A,bool_t verbose) {
51     dim_t n=A->numRows;
52 gross 431 dim_t n_block=A->row_block_size;
53 gross 1028 index_t num_colors=0, *mis_marker=NULL;
54 gross 431 register double A11,A12,A13,A21,A22,A23,A31,A32,A33,D;
55     register double S11,S12,S13,S21,S22,S23,S31,S32,S33;
56     register index_t i,iptr_main,iptr,iptr_ik,k,iptr_kj,j,iptr_ij,color,color2;
57     double time0,time_color,time_fac;
58     /* allocations: */
59     Paso_Solver_ILU* out=MEMALLOC(1,Paso_Solver_ILU);
60     if (Paso_checkPtr(out)) return NULL;
61     out->colorOf=MEMALLOC(n,index_t);
62     out->factors=MEMALLOC(A->len,double);
63 gross 432 out->main_iptr=MEMALLOC(n,index_t);
64 ksteube 1312 out->pattern=Paso_Pattern_getReference(A->pattern);
65 gross 431 out->n_block=n_block;
66     out->n=n;
67 gross 1361 if ( !(Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
68     time0=Paso_timer();
69     Paso_Pattern_color(A->pattern,&out->num_colors,out->colorOf);
70 gross 431 time_color=Paso_timer()-time0;
71 jgs 150
72 gross 1361 if (Paso_noError()) {
73     time0=Paso_timer();
74     /* find main diagonal and copy matrix values */
75     #pragma omp parallel for schedule(static) private(i,iptr,iptr_main,k)
76     for (i = 0; i < n; ++i) {
77     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
78     iptr_main=A->pattern->ptr[0]-1;
79     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; iptr++) {
80     if (A->pattern->index[iptr]==i) iptr_main=iptr;
81     for (k=0;k<n_block*n_block;++k) out->factors[n_block*n_block*iptr+k]=A->val[n_block*n_block*iptr+k];
82     }
83     out->main_iptr[i]=iptr_main;
84     if (iptr_main==A->pattern->ptr[0]-1)
85     Paso_setError(VALUE_ERROR, "Paso_Solver_getILU: no main diagonal");
86     }
87     }
88     /* start factorization */
89    
90     #pragma omp barrier
91     for (color=0;color<out->num_colors && Paso_noError();++color) {
92     if (n_block==1) {
93     #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,j,iptr_ij,A11,iptr_main,D)
94     for (i = 0; i < n; ++i) {
95     if (out->colorOf[i]==color) {
96     for (color2=0;color2<color;++color2) {
97     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
98     k=A->pattern->index[iptr_ik];
99     if (out->colorOf[k]==color2) {
100     A11=out->factors[iptr_ik];
101     /* a_ij=a_ij-a_ik*a_kj */
102     for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
103     j=A->pattern->index[iptr_kj];
104     if (out->colorOf[j]>color2) {
105     S11=out->factors[iptr_kj];
106     for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
107     if (j==A->pattern->index[iptr_ij]) {
108     out->factors[iptr_ij]-=A11*S11;
109     break;
110     }
111 gross 431 }
112     }
113     }
114     }
115     }
116     }
117 gross 1361 iptr_main=out->main_iptr[i];
118     D=out->factors[iptr_main];
119     if (ABS(D)>0.) {
120     D=1./D;
121     out->factors[iptr_main]=D;
122     /* a_ik=a_ii^{-1}*a_ik */
123     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
124     k=A->pattern->index[iptr_ik];
125     if (out->colorOf[k]>color) {
126     A11=out->factors[iptr_ik];
127     out->factors[iptr_ik]=A11*D;
128     }
129     }
130     } else {
131     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
132 gross 431 }
133     }
134 jgs 150 }
135 gross 1361 } else if (n_block==2) {
136     #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S12,S22,j,iptr_ij,A11,A21,A12,A22,iptr_main,D)
137     for (i = 0; i < n; ++i) {
138     if (out->colorOf[i]==color) {
139     for (color2=0;color2<color;++color2) {
140     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
141     k=A->pattern->index[iptr_ik];
142     if (out->colorOf[k]==color2) {
143     A11=out->factors[iptr_ik*4 ];
144     A21=out->factors[iptr_ik*4+1];
145     A12=out->factors[iptr_ik*4+2];
146     A22=out->factors[iptr_ik*4+3];
147     /* a_ij=a_ij-a_ik*a_kj */
148     for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
149     j=A->pattern->index[iptr_kj];
150     if (out->colorOf[j]>color2) {
151     S11=out->factors[iptr_kj*4];
152     S21=out->factors[iptr_kj*4+1];
153     S12=out->factors[iptr_kj*4+2];
154     S22=out->factors[iptr_kj*4+3];
155     for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
156     if (j==A->pattern->index[iptr_ij]) {
157     out->factors[4*iptr_ij ]-=A11*S11+A12*S21;
158     out->factors[4*iptr_ij+1]-=A21*S11+A22*S21;
159     out->factors[4*iptr_ij+2]-=A11*S12+A12*S22;
160     out->factors[4*iptr_ij+3]-=A21*S12+A22*S22;
161     break;
162     }
163 gross 431 }
164     }
165     }
166     }
167     }
168 jgs 150 }
169 gross 1361 iptr_main=out->main_iptr[i];
170     A11=out->factors[iptr_main*4];
171     A21=out->factors[iptr_main*4+1];
172     A12=out->factors[iptr_main*4+2];
173     A22=out->factors[iptr_main*4+3];
174     D = A11*A22-A12*A21;
175     if (ABS(D)>0.) {
176     D=1./D;
177     S11= A22*D;
178     S21=-A21*D;
179     S12=-A12*D;
180     S22= A11*D;
181     out->factors[iptr_main*4] = S11;
182     out->factors[iptr_main*4+1]= S21;
183     out->factors[iptr_main*4+2]= S12;
184     out->factors[iptr_main*4+3]= S22;
185     /* a_ik=a_ii^{-1}*a_ik */
186     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
187     k=A->pattern->index[iptr_ik];
188     if (out->colorOf[k]>color) {
189     A11=out->factors[iptr_ik*4 ];
190     A21=out->factors[iptr_ik*4+1];
191     A12=out->factors[iptr_ik*4+2];
192     A22=out->factors[iptr_ik*4+3];
193     out->factors[4*iptr_ik ]=S11*A11+S12*A21;
194     out->factors[4*iptr_ik+1]=S21*A11+S22*A21;
195     out->factors[4*iptr_ik+2]=S11*A12+S12*A22;
196     out->factors[4*iptr_ik+3]=S21*A12+S22*A22;
197     }
198     }
199     } else {
200     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
201 jgs 150 }
202 gross 431 }
203     }
204 gross 1361 } else if (n_block==3) {
205     #pragma omp parallel for schedule(static) private(i,color2,iptr_ik,k,iptr_kj,S11,S21,S31,S12,S22,S32,S13,S23,S33,j,iptr_ij,A11,A21,A31,A12,A22,A32,A13,A23,A33,iptr_main,D)
206     for (i = 0; i < n; ++i) {
207     if (out->colorOf[i]==color) {
208     for (color2=0;color2<color;++color2) {
209     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
210     k=A->pattern->index[iptr_ik];
211     if (out->colorOf[k]==color2) {
212     A11=out->factors[iptr_ik*9 ];
213     A21=out->factors[iptr_ik*9+1];
214     A31=out->factors[iptr_ik*9+2];
215     A12=out->factors[iptr_ik*9+3];
216     A22=out->factors[iptr_ik*9+4];
217     A32=out->factors[iptr_ik*9+5];
218     A13=out->factors[iptr_ik*9+6];
219     A23=out->factors[iptr_ik*9+7];
220     A33=out->factors[iptr_ik*9+8];
221     /* a_ij=a_ij-a_ik*a_kj */
222     for (iptr_kj=A->pattern->ptr[k];iptr_kj<A->pattern->ptr[k+1]; iptr_kj++) {
223     j=A->pattern->index[iptr_kj];
224     if (out->colorOf[j]>color2) {
225     S11=out->factors[iptr_kj*9 ];
226     S21=out->factors[iptr_kj*9+1];
227     S31=out->factors[iptr_kj*9+2];
228     S12=out->factors[iptr_kj*9+3];
229     S22=out->factors[iptr_kj*9+4];
230     S32=out->factors[iptr_kj*9+5];
231     S13=out->factors[iptr_kj*9+6];
232     S23=out->factors[iptr_kj*9+7];
233     S33=out->factors[iptr_kj*9+8];
234     for (iptr_ij=A->pattern->ptr[i];iptr_ij<A->pattern->ptr[i+1]; iptr_ij++) {
235     if (j==A->pattern->index[iptr_ij]) {
236     out->factors[iptr_ij*9 ]-=A11*S11+A12*S21+A13*S31;
237     out->factors[iptr_ij*9+1]-=A21*S11+A22*S21+A23*S31;
238     out->factors[iptr_ij*9+2]-=A31*S11+A32*S21+A33*S31;
239     out->factors[iptr_ij*9+3]-=A11*S12+A12*S22+A13*S32;
240     out->factors[iptr_ij*9+4]-=A21*S12+A22*S22+A23*S32;
241     out->factors[iptr_ij*9+5]-=A31*S12+A32*S22+A33*S32;
242     out->factors[iptr_ij*9+6]-=A11*S13+A12*S23+A13*S33;
243     out->factors[iptr_ij*9+7]-=A21*S13+A22*S23+A23*S33;
244     out->factors[iptr_ij*9+8]-=A31*S13+A32*S23+A33*S33;
245     break;
246     }
247 gross 431 }
248     }
249     }
250 jgs 150 }
251     }
252 gross 431 }
253 gross 1361 iptr_main=out->main_iptr[i];
254     A11=out->factors[iptr_main*9 ];
255     A21=out->factors[iptr_main*9+1];
256     A31=out->factors[iptr_main*9+2];
257     A12=out->factors[iptr_main*9+3];
258     A22=out->factors[iptr_main*9+4];
259     A32=out->factors[iptr_main*9+5];
260     A13=out->factors[iptr_main*9+6];
261     A23=out->factors[iptr_main*9+7];
262     A33=out->factors[iptr_main*9+8];
263     D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);
264     if (ABS(D)>0.) {
265     D=1./D;
266     S11=(A22*A33-A23*A32)*D;
267     S21=(A31*A23-A21*A33)*D;
268     S31=(A21*A32-A31*A22)*D;
269     S12=(A13*A32-A12*A33)*D;
270     S22=(A11*A33-A31*A13)*D;
271     S32=(A12*A31-A11*A32)*D;
272     S13=(A12*A23-A13*A22)*D;
273     S23=(A13*A21-A11*A23)*D;
274     S33=(A11*A22-A12*A21)*D;
275    
276     out->factors[iptr_main*9 ]=S11;
277     out->factors[iptr_main*9+1]=S21;
278     out->factors[iptr_main*9+2]=S31;
279     out->factors[iptr_main*9+3]=S12;
280     out->factors[iptr_main*9+4]=S22;
281     out->factors[iptr_main*9+5]=S32;
282     out->factors[iptr_main*9+6]=S13;
283     out->factors[iptr_main*9+7]=S23;
284     out->factors[iptr_main*9+8]=S33;
285    
286     /* a_ik=a_ii^{-1}*a_ik */
287     for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
288     k=A->pattern->index[iptr_ik];
289     if (out->colorOf[k]>color) {
290     A11=out->factors[iptr_ik*9 ];
291     A21=out->factors[iptr_ik*9+1];
292     A31=out->factors[iptr_ik*9+2];
293     A12=out->factors[iptr_ik*9+3];
294     A22=out->factors[iptr_ik*9+4];
295     A32=out->factors[iptr_ik*9+5];
296     A13=out->factors[iptr_ik*9+6];
297     A23=out->factors[iptr_ik*9+7];
298     A33=out->factors[iptr_ik*9+8];
299     out->factors[iptr_ik*9 ]=S11*A11+S12*A21+S13*A31;
300     out->factors[iptr_ik*9+1]=S21*A11+S22*A21+S23*A31;
301     out->factors[iptr_ik*9+2]=S31*A11+S32*A21+S33*A31;
302     out->factors[iptr_ik*9+3]=S11*A12+S12*A22+S13*A32;
303     out->factors[iptr_ik*9+4]=S21*A12+S22*A22+S23*A32;
304     out->factors[iptr_ik*9+5]=S31*A12+S32*A22+S33*A32;
305     out->factors[iptr_ik*9+6]=S11*A13+S12*A23+S13*A33;
306     out->factors[iptr_ik*9+7]=S21*A13+S22*A23+S23*A33;
307     out->factors[iptr_ik*9+8]=S31*A13+S32*A23+S33*A33;
308     }
309     }
310     } else {
311     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getILU: non-regular main diagonal block.");
312 gross 431 }
313     }
314 jgs 150 }
315 gross 1361 } else {
316     Paso_setError(VALUE_ERROR, "Paso_Solver_getILU: block size greater than 3 is not supported.");
317     }
318     #pragma omp barrier
319     }
320     time_fac=Paso_timer()-time0;
321     }
322 jgs 150 }
323     if (Paso_noError()) {
324     if (verbose) {
325 gross 431 printf("ILU: %d color used \n",out->num_colors);
326     printf("timing: ILU: coloring/elemination : %e/%e\n",time_color,time_fac);
327 jgs 150 }
328     return out;
329     } else {
330     Paso_Solver_ILU_free(out);
331     return NULL;
332     }
333     }
334    
335     /**************************************************************/
336    
337     /* apply ILU precondition b-> x
338    
339 gross 431 in fact it solves LUx=b in the form x= U^{-1} L^{-1}b
340 jgs 150
341     should be called within a parallel region
342     barrier synconization should be performed to make sure that the input vector available
343    
344     */
345    
346     void Paso_Solver_solveILU(Paso_Solver_ILU * ilu, double * x, double * b) {
347 gross 431 register dim_t i,k;
348     register index_t color,iptr_ik,iptr_main;
349     register double S1,S2,S3,R1,R2,R3;
350 jgs 150 dim_t n_block=ilu->n_block;
351 gross 431 dim_t n=ilu->n;
352 jgs 150
353 gross 431
354     /* copy x into b*/
355 gross 1556 #pragma omp parallel for private(i) schedule(static)
356 gross 431 for (i=0;i<n*n_block;++i) x[i]=b[i];
357     /* forward substitution */
358     for (color=0;color<ilu->num_colors;++color) {
359     if (n_block==1) {
360 gross 1556 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1,iptr_main)
361 gross 431 for (i = 0; i < n; ++i) {
362     if (ilu->colorOf[i]==color) {
363     /* x_i=x_i-a_ik*x_k */
364     S1=x[i];
365     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
366     k=ilu->pattern->index[iptr_ik];
367     if (ilu->colorOf[k]<color) {
368     R1=x[k];
369     S1-=ilu->factors[iptr_ik]*R1;
370     }
371     }
372     iptr_main=ilu->main_iptr[i];
373     x[i]=ilu->factors[iptr_main]*S1;
374     }
375 jgs 150 }
376 gross 431 } else if (n_block==2) {
377 gross 1556 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,iptr_main,S1,S2,R1,R2)
378 gross 431 for (i = 0; i < n; ++i) {
379     if (ilu->colorOf[i]==color) {
380     /* x_i=x_i-a_ik*x_k */
381     S1=x[2*i];
382     S2=x[2*i+1];
383     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
384     k=ilu->pattern->index[iptr_ik];
385     if (ilu->colorOf[k]<color) {
386     R1=x[2*k];
387     R2=x[2*k+1];
388     S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
389     S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
390     }
391     }
392     iptr_main=ilu->main_iptr[i];
393     x[2*i ]=ilu->factors[4*iptr_main ]*S1+ilu->factors[4*iptr_main+2]*S2;
394     x[2*i+1]=ilu->factors[4*iptr_main+1]*S1+ilu->factors[4*iptr_main+3]*S2;
395     }
396    
397     }
398     } else if (n_block==3) {
399 gross 1556 #pragma omp parallel for schedule(static) private(i,iptr_ik,iptr_main,k,S1,S2,S3,R1,R2,R3)
400 gross 431 for (i = 0; i < n; ++i) {
401     if (ilu->colorOf[i]==color) {
402     /* x_i=x_i-a_ik*x_k */
403     S1=x[3*i];
404     S2=x[3*i+1];
405     S3=x[3*i+2];
406     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
407     k=ilu->pattern->index[iptr_ik];
408     if (ilu->colorOf[k]<color) {
409     R1=x[3*k];
410     R2=x[3*k+1];
411     R3=x[3*k+2];
412     S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
413     S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
414     S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
415     }
416     }
417     iptr_main=ilu->main_iptr[i];
418     x[3*i ]=ilu->factors[9*iptr_main ]*S1+ilu->factors[9*iptr_main+3]*S2+ilu->factors[9*iptr_main+6]*S3;
419     x[3*i+1]=ilu->factors[9*iptr_main+1]*S1+ilu->factors[9*iptr_main+4]*S2+ilu->factors[9*iptr_main+7]*S3;
420     x[3*i+2]=ilu->factors[9*iptr_main+2]*S1+ilu->factors[9*iptr_main+5]*S2+ilu->factors[9*iptr_main+8]*S3;
421 jgs 150 }
422 gross 431 }
423 jgs 150 }
424     }
425 gross 431 /* backward substitution */
426     for (color=(ilu->num_colors)-1;color>-1;--color) {
427     if (n_block==1) {
428 gross 1556 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,R1)
429 gross 431 for (i = 0; i < n; ++i) {
430     if (ilu->colorOf[i]==color) {
431     /* x_i=x_i-a_ik*x_k */
432     S1=x[i];
433     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
434     k=ilu->pattern->index[iptr_ik];
435     if (ilu->colorOf[k]>color) {
436     R1=x[k];
437     S1-=ilu->factors[iptr_ik]*R1;
438     }
439     }
440     x[i]=S1;
441     }
442     }
443     } else if (n_block==2) {
444 gross 1556 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,R1,R2)
445 gross 431 for (i = 0; i < n; ++i) {
446     if (ilu->colorOf[i]==color) {
447     /* x_i=x_i-a_ik*x_k */
448     S1=x[2*i];
449     S2=x[2*i+1];
450     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
451     k=ilu->pattern->index[iptr_ik];
452     if (ilu->colorOf[k]>color) {
453     R1=x[2*k];
454     R2=x[2*k+1];
455     S1-=ilu->factors[4*iptr_ik ]*R1+ilu->factors[4*iptr_ik+2]*R2;
456     S2-=ilu->factors[4*iptr_ik+1]*R1+ilu->factors[4*iptr_ik+3]*R2;
457     }
458     }
459     x[2*i]=S1;
460     x[2*i+1]=S2;
461     }
462     }
463     } else if (n_block==3) {
464 gross 1556 #pragma omp parallel for schedule(static) private(i,iptr_ik,k,S1,S2,S3,R1,R2,R3)
465 gross 431 for (i = 0; i < n; ++i) {
466     if (ilu->colorOf[i]==color) {
467     /* x_i=x_i-a_ik*x_k */
468     S1=x[3*i ];
469     S2=x[3*i+1];
470     S3=x[3*i+2];
471     for (iptr_ik=ilu->pattern->ptr[i];iptr_ik<ilu->pattern->ptr[i+1]; ++iptr_ik) {
472     k=ilu->pattern->index[iptr_ik];
473     if (ilu->colorOf[k]>color) {
474     R1=x[3*k];
475     R2=x[3*k+1];
476     R3=x[3*k+2];
477     S1-=ilu->factors[9*iptr_ik ]*R1+ilu->factors[9*iptr_ik+3]*R2+ilu->factors[9*iptr_ik+6]*R3;
478     S2-=ilu->factors[9*iptr_ik+1]*R1+ilu->factors[9*iptr_ik+4]*R2+ilu->factors[9*iptr_ik+7]*R3;
479     S3-=ilu->factors[9*iptr_ik+2]*R1+ilu->factors[9*iptr_ik+5]*R2+ilu->factors[9*iptr_ik+8]*R3;
480     }
481     }
482     x[3*i]=S1;
483     x[3*i+1]=S2;
484     x[3*i+2]=S3;
485     }
486     }
487     }
488     #pragma omp barrier
489     }
490 jgs 150 return;
491     }
492     /*
493     * $Log$
494     * Revision 1.2 2005/09/15 03:44:40 jgs
495     * Merge of development branch dev-02 back to main trunk on 2005-09-15
496     *
497     * Revision 1.1.2.1 2005/09/05 06:29:50 gross
498     * These files have been extracted from finley to define a stand alone libray for iterative
499     * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
500     * has not been tested yet.
501     *
502     *
503     */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26