/[escript]/trunk/paso/src/GSMPI.c
ViewVC logotype

Annotation of /trunk/paso/src/GSMPI.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3051 - (hide annotations)
Tue Jun 29 00:45:49 2010 UTC (9 years, 3 months ago) by lgao
Original Path: trunk/paso/src/Solver_GSMPI.c
File MIME type: text/plain
File size: 21965 byte(s)
Hybrid MPI/OpenMP versioned Gauss_Seidel preconditioner is added. 
To use it, use "SolverOptions.GAUSS_SEIDEL_MPI" in python script. 


1 lgao 3051
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2010 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17     /* Paso: GS preconditioner with reordering */
18    
19     /**************************************************************/
20    
21     /* Copyrights by ACcESS Australia 2003-2010 */
22     /* Author: l.gao@uq.edu.au */
23    
24     /**************************************************************/
25    
26     #include "Paso.h"
27     #include "SystemMatrix.h"
28     #include "Solver.h"
29     #include "PasoUtil.h"
30    
31     #include <stdio.h>
32    
33     /**************************************************************/
34    
35     /* free all memory used by GS */
36    
37     void Paso_Solver_GSMPI_free(Paso_Solver_GS * in) {
38     if (in!=NULL) {
39     MEMFREE(in->colorOf);
40     Paso_SparseMatrix_free(in->factors);
41     MEMFREE(in->diag);
42     MEMFREE(in->main_iptr);
43     Paso_Pattern_free(in->pattern);
44     MEMFREE(in);
45     }
46     }
47    
48     /**************************************************************/
49    
50     /* gs->diag saves the matrix of D{-1}
51     this is different from Paso_Solver_getGS(), in which, gs->diag
52     is the matrix D.
53     */
54     Paso_Solver_GS* Paso_Solver_getGSMPI(Paso_SparseMatrix * A,bool_t verbose) {
55     dim_t n=A->numRows;
56     dim_t n_block=A->row_block_size;
57     dim_t block_size=A->block_size;
58     register index_t i,iptr_main=0,iPtr;
59     double time0=0,time_color=0,time_fac=0;
60     double D, A11, A21, A31, A12, A22, A32, A13, A23, A33;
61    
62     /* allocations: */
63     /* printf("n_block= %d, n= %d\n", n_block, n); */
64     Paso_Solver_GS* out=MEMALLOC(1,Paso_Solver_GS);
65     if (Paso_checkPtr(out)) return NULL;
66     out->colorOf=MEMALLOC(n,index_t);
67     out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
68     /*out->diag=MEMALLOC(A->len,double);*/
69     out->main_iptr=MEMALLOC(n,index_t);
70     out->pattern=Paso_Pattern_getReference(A->pattern);
71     out->factors=Paso_SparseMatrix_getReference(A);
72     out->n_block=n_block;
73     out->n=n;
74    
75     if ( !(Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
76     time0=Paso_timer();
77     Paso_Pattern_color(A->pattern,&out->num_colors,out->colorOf);
78     time_color=Paso_timer()-time0;
79    
80     if (Paso_noError()) {
81     time0=Paso_timer();
82    
83     if (! (Paso_checkPtr(out->diag))) {
84     if (n_block==1) {
85     #pragma omp parallel for private(i,iPtr,iptr_main) schedule(static)
86     for (i = 0; i < A->pattern->numOutput; i++) {
87     iptr_main=0;
88     out->diag[i]=1.;
89     /* find main diagonal */
90     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
91     if (A->pattern->index[iPtr]==i) {
92     iptr_main=iPtr;
93     if (ABS(A->val[iPtr]) > 0.) {
94     out->diag[i]=1./(A->val[iPtr]);
95     } else {
96     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGSMPI: non-regular main diagonal block.");
97     }
98     break;
99     }
100     }
101     out->main_iptr[i]=iptr_main;
102     }
103     } else if (n_block==2) {
104     #pragma omp parallel for private(i,iPtr,iptr_main) schedule(static)
105     for (i = 0; i < A->pattern->numOutput; i++) {
106     out->diag[i*4+0]= 1.;
107     out->diag[i*4+1]= 0.;
108     out->diag[i*4+2]= 0.;
109     out->diag[i*4+3]= 1.;
110     iptr_main=0;
111     /* find main diagonal */
112     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
113     if (A->pattern->index[iPtr]==i) {
114     iptr_main=iPtr;
115     A11=A->val[iPtr*4];
116     A21=A->val[iPtr*4+1];
117     A12=A->val[iPtr*4+2];
118     A22=A->val[iPtr*4+3];
119     D = A11*A22-A12*A21;
120     if (ABS(D)>0.) {
121     D=1./D;
122     out->diag[i*4 ]= A22*D;
123     out->diag[i*4+1]= -A21*D;
124     out->diag[i*4+2]= -A12*D;
125     out->diag[i*4+3]= A11*D;
126     } else {
127     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGSMPI: non-regular main diagonal block.");
128     }
129     break;
130     }
131     }
132     out->main_iptr[i]=iptr_main;
133     }
134     } else if (n_block==3) {
135     #pragma omp parallel for private(i, iPtr,iptr_main) schedule(static)
136     for (i = 0; i < A->pattern->numOutput; i++) {
137     out->diag[i*9 ]=1.;
138     out->diag[i*9+1]=0.;
139     out->diag[i*9+2]=0.;
140     out->diag[i*9+3]=0.;
141     out->diag[i*9+4]=1.;
142     out->diag[i*9+5]=0.;
143     out->diag[i*9+6]=0.;
144     out->diag[i*9+7]=0.;
145     out->diag[i*9+8]=1.;
146     iptr_main=0;
147     /* find main diagonal */
148     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
149     if (A->pattern->index[iPtr]==i) {
150     iptr_main=iPtr;
151     A11=A->val[iPtr*9 ];
152     A21=A->val[iPtr*9+1];
153     A31=A->val[iPtr*9+2];
154     A12=A->val[iPtr*9+3];
155     A22=A->val[iPtr*9+4];
156     A32=A->val[iPtr*9+5];
157     A13=A->val[iPtr*9+6];
158     A23=A->val[iPtr*9+7];
159     A33=A->val[iPtr*9+8];
160     D = A11*(A22*A33-A23*A32) + A12*(A31*A23-A21*A33) + A13*(A21*A32-A31*A22);
161     if (ABS(D)>0.) {
162     D=1./D;
163     out->diag[i*9 ]= (A22*A33-A23*A32)*D;
164     out->diag[i*9+1]= (A31*A23-A21*A33)*D;
165     out->diag[i*9+2]= (A21*A32-A31*A22)*D;
166     out->diag[i*9+3]= (A13*A32-A12*A33)*D;
167     out->diag[i*9+4]= (A11*A33-A31*A13)*D;
168     out->diag[i*9+5]= (A12*A31-A11*A32)*D;
169     out->diag[i*9+6]= (A12*A23-A13*A22)*D;
170     out->diag[i*9+7]= (A13*A21-A11*A23)*D;
171     out->diag[i*9+8]= (A11*A22-A12*A21)*D;
172     } else {
173     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGSMPI: non-regular main diagonal block.");
174     }
175     break;
176     }
177     }
178     out->main_iptr[i]=iptr_main;
179     }
180     }
181     }
182    
183     time_fac=Paso_timer()-time0;
184     }
185     }
186     if (Paso_noError()) {
187     if (verbose) {
188     printf("GS_MPI: %d color used \n",out->num_colors);
189     printf("timing: GS_MPI: coloring/elemination : %e/%e\n",time_color,time_fac);
190     }
191     return out;
192     } else {
193     Paso_Solver_GSMPI_free(out);
194     return NULL;
195     }
196     }
197    
198     void Paso_Solver_GS_local(Paso_SystemMatrix* A, Paso_Solver_GS * gs, double * x, double * b);
199    
200     /**************************************************************/
201    
202     /* apply MPI versioned GS
203    
204     in fact it solves Ax=b in two steps:
205     step1: among different nodes (MPI ranks), we use block Jacobi
206     x{k} = x{k-1} + D{-1}(b-A*x{k-1})
207     => D*x{k} = b - (E+F)x{k-1}
208     where matrix D is (let p be the number of nodes):
209     --------------------
210     |A1| | | ... | |
211     --------------------
212     | |A2| | ... | |
213     --------------------
214     | | |A3| ... | |
215     --------------------
216     | ... |
217     --------------------
218     | | | | ... |Ap|
219     --------------------
220     and Ai (i \in [1,p]) represents the mainBlock of matrix
221     A on node i. Matrix (E+F) is represented as the coupleBlock
222     of matrix A on each node (annotated as ACi).
223     Therefore, step1 can be turned into the following for node i:
224     => Ai * x{k} = b - ACi * x{k-1}
225     where both x{k} and b are the segment of x and b on node i,
226     and x{k-1} is the old segment values of x on all other nodes.
227    
228     step2: inside node i, we use Gauss-Seidel
229     let b'= b - ACi * x{k-1} we have Ai * x{k} = b' for node i
230     by using symetrix Gauss-Seidel, this can be solved in a forward
231     phase and a backward phase:
232     forward phase: x{m} = diag(Ai){-1} (b' - E*x{m} - F*x{m-1})
233     backward phase: x{m+1} = diag(Ai){-1} (b' - F*{m+1} - E*x{m})
234     */
235    
236     void Paso_Solver_solveGSMPI(Paso_SystemMatrix* A, Paso_Solver_GS * gs, double * x, double * b) {
237     register dim_t i;
238     dim_t n_block=gs->n_block;
239     dim_t n=gs->n;
240     dim_t sweeps=gs->sweeps;
241    
242     /*xi{0} = 0
243     xi{1} = Ai{-1} * bi
244     xi{2} = Ai{-1} * (bi - ACi * xj{1})
245     ...
246     xi{k} = Ai{-1} * (bi - ACi * xj{k-1}) */
247     #pragma omp parallel for private(i) schedule(static)
248     for (i=0;i<n*n_block;++i) x[i]=0;
249    
250     Paso_Solver_GS_local(A,gs,x,b);
251    
252     if (sweeps > 1) {
253     double *new_b=MEMALLOC(n*n_block,double);
254     double *remote_x=NULL;
255    
256     while (sweeps > 1) {
257     /* calculate new_b = b - ACi * x{k-1}, where x{k-1} are remote
258     value of x, which requires MPI communications */
259     #pragma omp parallel for private(i) schedule(static)
260     for (i=0;i<n*n_block;++i) new_b[i]=b[i];
261    
262     if (A->col_coupleBlock->pattern->ptr!=NULL){
263     Paso_SystemMatrix_startCollect(A,x);
264     remote_x=Paso_SystemMatrix_finishCollect(A);
265     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(DBLE(-1),A->col_coupleBlock,remote_x,DBLE(1), new_b);
266     }
267    
268     Paso_Solver_GS_local(A,gs,x,new_b);
269     sweeps --;
270     }
271     MEMFREE(new_b);
272     }
273    
274     return;
275     }
276    
277     /* Locally solve A'x=b, where A' is the mainBlock of global system matrix A */
278     void Paso_Solver_GS_local(Paso_SystemMatrix* A, Paso_Solver_GS * gs, double * x, double * b) {
279     dim_t n_block=gs->n_block;
280     dim_t n=gs->n;
281     double sum0, sum1, sum2, X0, X1, X2;
282     double *val=A->mainBlock->val;
283     double *diag=gs->diag;
284     index_t *ptr=gs->pattern->ptr;
285     index_t *index=gs->pattern->index;
286     dim_t i, j, iptr, xi, ai, xj, aj;
287     #ifdef _OPENMP
288     dim_t nt, len, rest, t, istart, iend;
289    
290     nt=omp_get_max_threads();
291     len=n/nt;
292     rest=n-len*nt;
293     #endif
294     /* TO BE DONE: add handler to deal with the case "n" is too small
295     to be worth run in threads. */
296    
297     #ifdef _OPENMP
298     /* calculate new_b = b - ACi * x{k-1}, where x{k-1} are x value
299     computed by other threads in previous sweep */
300     if (nt > 1) {
301     if (n_block == 1){
302     #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
303     for (t=0; t<nt; t++) {
304     istart=len*t+MIN(t,rest);
305     iend=istart+len+(t<rest ? 1:0);
306     for (i=istart; i<iend; i++){
307     sum0=b[i];
308     for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
309     j=index[iptr];
310     if (j<istart || j>=iend){
311     sum0 = sum0 - val[iptr] * x[j];
312     }
313     }
314     b[i]=sum0;
315     }
316     }
317     } else if (n_block == 2) {
318     #pragma omp parallel for private(t,istart,iend,i,xi,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
319     for (t=0; t<nt; t++) {
320     istart=len*t+MIN(t,rest);
321     iend=istart+len+(t<rest ? 1:0);
322     for (i=istart; i<iend; i++){
323     xi=2*i;
324     sum0=b[xi];
325     sum1=b[xi+1];
326     for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
327     j=index[iptr];
328     if (j<istart || j>=iend){
329     xj=2*j;
330     aj=4*iptr;
331     X0=x[xj];
332     X1=x[xj+1];
333     sum0 = sum0 - val[aj ]*X0 - val[aj+2]*X1;
334     sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
335     }
336     }
337     b[xi]=sum0;
338     b[xi+1]=sum1;
339     }
340     }
341     } else if (n_block == 3) {
342     #pragma omp parallel for private(t,istart,iend,i,xi,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
343     for (t=0; t<nt; t++) {
344     istart=len*t+MIN(t,rest);
345     iend=istart+len+(t<rest ? 1:0);
346     for (i=istart; i<iend; i++){
347     xi=3*i;
348     sum0=b[xi];
349     sum1=b[xi+1];
350     sum2=b[xi+2];
351     for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
352     j=index[iptr];
353     if (j<istart || j>=iend){
354     xj=3*j;
355     aj=9*iptr;
356     X0=x[xj];
357     X1=x[xj+1];
358     X2=x[xj+2];
359     sum0 = sum0 - val[aj ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
360     sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
361     sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
362     }
363     }
364     b[xi]=sum0;
365     b[xi+1]=sum1;
366     b[xi+2]=sum2;
367     }
368     }
369     }
370     }
371     #endif
372    
373     /* step1: forward iteration
374     x{k} = D{-1}(b - E*x{k} - F*x{k-1}) */
375     /* One Guass-Seidel itertion
376     In case of forward iteration x{k} = D{-1}(b - E*x{k} - F*x{k-1})
377     => into a loop (without coloring):
378     for i in [0,n-1] do
379     x_i = (1/a_ii) *
380     (b_i - \sum{j=0}{i-1}(a_ij*x_j) - \sum{j=i+1}{n-1}(a_ij*x_j))
381     where the first "\sum" sums up newly updated values of x elements
382     while the second "\sum" sums up previous (old) values of x elements.
383     In case of backward iteration x{k} = D{-1}(b - F*x{k} - E*x{k-1})
384     */
385     if (n_block == 1){
386     #ifdef _OPENMP
387     #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
388     for (t=0; t<nt; t++) {
389     istart=len*t+MIN(t,rest);
390     iend=istart+len+(t<rest ? 1:0);
391     for (i=istart; i<iend; i++){
392     #else
393     for (i=0; i<n; i++) {
394     #endif
395     sum0 = b[i];
396     for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
397     j=index[iptr];
398     #ifdef _OPENMP
399     if (j >= istart && j < iend && i != j){
400     #else
401     if (i != j) {
402     #endif
403     sum0 = sum0 - val[iptr] * x[j];
404     }
405     }
406     x[i] = sum0*diag[i];
407     #ifdef _OPENMP
408     }
409     }
410     #else
411     }
412     #endif
413     } else if (n_block == 2) {
414     #ifdef _OPENMP
415     #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
416     for (t=0; t<nt; t++) {
417     istart=len*t+MIN(t,rest);
418     iend=istart+len+(t<rest ? 1:0);
419     for (i=istart; i<iend; i++){
420     #else
421     for (i=0; i<n; i++) {
422     #endif
423     xi=2*i;
424     ai=4*i;
425     sum0 = b[xi];
426     sum1 = b[xi+1];
427     for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
428     j=index[iptr];
429     #ifdef _OPENMP
430     if (j >= istart && j < iend && i != j){
431     #else
432     if (i != j) {
433     #endif
434     xj=2*j;
435     aj=4*iptr;
436     X0=x[xj];
437     X1=x[xj+1];
438     sum0 = sum0 - val[aj ]*X0 - val[aj+2]*X1;
439     sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
440     }
441     }
442     x[xi ]=diag[ai ]*sum0 + diag[ai+2]*sum1;
443     x[xi+1]=diag[ai+1]*sum0 + diag[ai+3]*sum1;
444     #ifdef _OPENMP
445     }
446     }
447     #else
448     }
449     #endif
450     } else if (n_block == 3) {
451     #ifdef _OPENMP
452     #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
453     for (t=0; t<nt; t++) {
454     istart=len*t+MIN(t,rest);
455     iend=istart+len+(t<rest ? 1:0);
456     for (i=istart; i<iend; i++){
457     #else
458     for (i=0; i<n; i++) {
459     #endif
460     xi=3*i;
461     ai=9*i;
462     sum0 = b[xi];
463     sum1 = b[xi+1];
464     sum2 = b[xi+2];
465     for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
466     j=index[iptr];
467     #ifdef _OPENMP
468     if (j >= istart && j < iend && i != j){
469     #else
470     if (i != j) {
471     #endif
472     xj=3*j;
473     aj=9*iptr;
474     X0=x[xj];
475     X1=x[xj+1];
476     X2=x[xj+2];
477     sum0 = sum0 - val[aj ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
478     sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
479     sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
480     }
481     }
482     x[xi ] = diag[ai ]*sum0 + diag[ai+3]*sum1 + diag[ai+6]*sum2;
483     x[xi+1] = diag[ai+1]*sum0 + diag[ai+4]*sum1 + diag[ai+7]*sum2;
484     x[xi+2] = diag[ai+2]*sum0 + diag[ai+5]*sum1 + diag[ai+8]*sum2;
485     #ifdef _OPENMP
486     }
487     }
488     #else
489     }
490     #endif
491     }
492    
493     /* step2: backward iteration
494     x{k} = D{-1}(b - F*x{k} - E*x{k-1}) */
495     if (n_block == 1){
496     #ifdef _OPENMP
497     #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
498     for (t=nt-1; t>=0; t--) {
499     istart=len*t+MIN(t,rest);
500     iend=istart+len+(t<rest ? 1:0);
501     for (i=iend-1; i>=istart; i--){
502     #else
503     for (i=n-1; i>=0; i--) {
504     #endif
505     sum0 = b[i];
506     for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
507     j=index[iptr];
508     #ifdef _OPENMP
509     if (j >= istart && j < iend && i != j){
510     #else
511     if (i != j) {
512     #endif
513     sum0 = sum0 - val[iptr] * x[j];
514     }
515     }
516     x[i] = sum0*diag[i];
517     #ifdef _OPENMP
518     }
519     }
520     #else
521     }
522     #endif
523     } else if (n_block == 2) {
524     #ifdef _OPENMP
525     #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
526     for (t=nt-1; t>=0; t--) {
527     istart=len*t+MIN(t,rest);
528     iend=istart+len+(t<rest ? 1:0);
529     for (i=iend-1; i>=istart; i--){
530     #else
531     for (i=n-1; i>=0; i--) {
532     #endif
533     xi=2*i;
534     ai=4*i;
535     sum0 = b[xi];
536     sum1 = b[xi+1];
537     for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
538     j=index[iptr];
539     #ifdef _OPENMP
540     if (j >= istart && j < iend && i != j){
541     #else
542     if (i != j) {
543     #endif
544     xj=2*j;
545     aj=4*iptr;
546     X0=x[xj];
547     X1=x[xj+1];
548     sum0 = sum0 - val[aj ]*X0 - val[aj+2]*X1;
549     sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
550     }
551     }
552     x[xi ]=diag[ai ]*sum0 + diag[ai+2]*sum1;
553     x[xi+1]=diag[ai+1]*sum0 + diag[ai+3]*sum1;
554     #ifdef _OPENMP
555     }
556     }
557     #else
558     }
559     #endif
560     } else if (n_block == 3) {
561     #ifdef _OPENMP
562     #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
563     for (t=nt-1; t>=0; t--) {
564     istart=len*t+MIN(t,rest);
565     iend=istart+len+(t<rest ? 1:0);
566     for (i=iend-1; i>=istart; i--){
567     #else
568     for (i=n-1; i>=0; i--) {
569     #endif
570     xi=3*i;
571     ai=9*i;
572     sum0 = b[xi];
573     sum1 = b[xi+1];
574     sum2 = b[xi+2];
575     for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
576     j=index[iptr];
577     #ifdef _OPENMP
578     if (j >= istart && j < iend && i != j){
579     #else
580     if (i != j) {
581     #endif
582     xj=3*j;
583     aj=9*iptr;
584     X0=x[xj];
585     X1=x[xj+1];
586     X2=x[xj+2];
587     sum0 = sum0 - val[aj ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
588     sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
589     sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
590     }
591     }
592     x[xi ] = diag[ai ]*sum0 + diag[ai+3]*sum1 + diag[ai+6]*sum2;
593     x[xi+1] = diag[ai+1]*sum0 + diag[ai+4]*sum1 + diag[ai+7]*sum2;
594     x[xi+2] = diag[ai+2]*sum0 + diag[ai+5]*sum1 + diag[ai+8]*sum2;
595     #ifdef _OPENMP
596     }
597     }
598     #else
599     }
600     #endif
601     }
602    
603     return;
604     }
605    

  ViewVC Help
Powered by ViewVC 1.1.26