/[escript]/trunk/paso/src/GSMPI.c
ViewVC logotype

Annotation of /trunk/paso/src/GSMPI.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3094 - (hide annotations)
Fri Aug 13 08:38:06 2010 UTC (9 years, 2 months ago) by gross
Original Path: trunk/paso/src/Solver_GSMPI.c
File MIME type: text/plain
File size: 22059 byte(s)
The MPI and sequational GAUSS_SEIDEL have been merged.
The couring and main diagonal pointer is now manged by the patternm which means that they are calculated once only even if the preconditioner is deleted.



1 lgao 3051
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2010 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17     /* Paso: GS preconditioner with reordering */
18    
19     /**************************************************************/
20    
21     /* Copyrights by ACcESS Australia 2003-2010 */
22     /* Author: l.gao@uq.edu.au */
23    
24     /**************************************************************/
25    
26     #include "Paso.h"
27     #include "SystemMatrix.h"
28     #include "Solver.h"
29     #include "PasoUtil.h"
30    
31     #include <stdio.h>
32    
33     /**************************************************************/
34    
35     /* free all memory used by GS */
36    
37     void Paso_Solver_GSMPI_free(Paso_Solver_GS * in) {
38     if (in!=NULL) {
39     MEMFREE(in->colorOf);
40     Paso_SparseMatrix_free(in->factors);
41     MEMFREE(in->diag);
42     MEMFREE(in->main_iptr);
43     Paso_Pattern_free(in->pattern);
44     MEMFREE(in);
45     }
46     }
47    
48 gross 3094 =============================================================================================
49 lgao 3051 /**************************************************************/
50    
51     /* gs->diag saves the matrix of D{-1}
52     this is different from Paso_Solver_getGS(), in which, gs->diag
53     is the matrix D.
54     */
55     Paso_Solver_GS* Paso_Solver_getGSMPI(Paso_SparseMatrix * A,bool_t verbose) {
56     dim_t n=A->numRows;
57     dim_t n_block=A->row_block_size;
58     dim_t block_size=A->block_size;
59     register index_t i,iptr_main=0,iPtr;
60     double time0=0,time_color=0,time_fac=0;
61     double D, A11, A21, A31, A12, A22, A32, A13, A23, A33;
62    
63     /* allocations: */
64     /* printf("n_block= %d, n= %d\n", n_block, n); */
65     Paso_Solver_GS* out=MEMALLOC(1,Paso_Solver_GS);
66     if (Paso_checkPtr(out)) return NULL;
67     out->colorOf=MEMALLOC(n,index_t);
68     out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
69     /*out->diag=MEMALLOC(A->len,double);*/
70     out->main_iptr=MEMALLOC(n,index_t);
71     out->pattern=Paso_Pattern_getReference(A->pattern);
72     out->factors=Paso_SparseMatrix_getReference(A);
73     out->n_block=n_block;
74     out->n=n;
75    
76     if ( !(Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
77     time0=Paso_timer();
78     Paso_Pattern_color(A->pattern,&out->num_colors,out->colorOf);
79     time_color=Paso_timer()-time0;
80    
81     if (Paso_noError()) {
82     time0=Paso_timer();
83    
84     if (! (Paso_checkPtr(out->diag))) {
85     if (n_block==1) {
86     #pragma omp parallel for private(i,iPtr,iptr_main) schedule(static)
87     for (i = 0; i < A->pattern->numOutput; i++) {
88     iptr_main=0;
89     out->diag[i]=1.;
90     /* find main diagonal */
91     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
92     if (A->pattern->index[iPtr]==i) {
93     iptr_main=iPtr;
94     if (ABS(A->val[iPtr]) > 0.) {
95     out->diag[i]=1./(A->val[iPtr]);
96     } else {
97     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGSMPI: non-regular main diagonal block.");
98     }
99     break;
100     }
101     }
102     out->main_iptr[i]=iptr_main;
103     }
104     } else if (n_block==2) {
105     #pragma omp parallel for private(i,iPtr,iptr_main) schedule(static)
106     for (i = 0; i < A->pattern->numOutput; i++) {
107     out->diag[i*4+0]= 1.;
108     out->diag[i*4+1]= 0.;
109     out->diag[i*4+2]= 0.;
110     out->diag[i*4+3]= 1.;
111     iptr_main=0;
112     /* find main diagonal */
113     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
114     if (A->pattern->index[iPtr]==i) {
115     iptr_main=iPtr;
116     A11=A->val[iPtr*4];
117     A21=A->val[iPtr*4+1];
118     A12=A->val[iPtr*4+2];
119     A22=A->val[iPtr*4+3];
120     D = A11*A22-A12*A21;
121     if (ABS(D)>0.) {
122     D=1./D;
123     out->diag[i*4 ]= A22*D;
124     out->diag[i*4+1]= -A21*D;
125     out->diag[i*4+2]= -A12*D;
126     out->diag[i*4+3]= A11*D;
127     } else {
128     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGSMPI: non-regular main diagonal block.");
129     }
130     break;
131     }
132     }
133     out->main_iptr[i]=iptr_main;
134     }
135     } else if (n_block==3) {
136     #pragma omp parallel for private(i, iPtr,iptr_main) schedule(static)
137     for (i = 0; i < A->pattern->numOutput; i++) {
138     out->diag[i*9 ]=1.;
139     out->diag[i*9+1]=0.;
140     out->diag[i*9+2]=0.;
141     out->diag[i*9+3]=0.;
142     out->diag[i*9+4]=1.;
143     out->diag[i*9+5]=0.;
144     out->diag[i*9+6]=0.;
145     out->diag[i*9+7]=0.;
146     out->diag[i*9+8]=1.;
147     iptr_main=0;
148     /* find main diagonal */
149     for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
150     if (A->pattern->index[iPtr]==i) {
151     iptr_main=iPtr;
152     A11=A->val[iPtr*9 ];
153     A21=A->val[iPtr*9+1];
154     A31=A->val[iPtr*9+2];
155     A12=A->val[iPtr*9+3];
156     A22=A->val[iPtr*9+4];
157     A32=A->val[iPtr*9+5];
158     A13=A->val[iPtr*9+6];
159     A23=A->val[iPtr*9+7];
160     A33=A->val[iPtr*9+8];
161     D = A11*(A22*A33-A23*A32) + A12*(A31*A23-A21*A33) + A13*(A21*A32-A31*A22);
162     if (ABS(D)>0.) {
163     D=1./D;
164     out->diag[i*9 ]= (A22*A33-A23*A32)*D;
165     out->diag[i*9+1]= (A31*A23-A21*A33)*D;
166     out->diag[i*9+2]= (A21*A32-A31*A22)*D;
167     out->diag[i*9+3]= (A13*A32-A12*A33)*D;
168     out->diag[i*9+4]= (A11*A33-A31*A13)*D;
169     out->diag[i*9+5]= (A12*A31-A11*A32)*D;
170     out->diag[i*9+6]= (A12*A23-A13*A22)*D;
171     out->diag[i*9+7]= (A13*A21-A11*A23)*D;
172     out->diag[i*9+8]= (A11*A22-A12*A21)*D;
173     } else {
174     Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGSMPI: non-regular main diagonal block.");
175     }
176     break;
177     }
178     }
179     out->main_iptr[i]=iptr_main;
180     }
181     }
182     }
183    
184     time_fac=Paso_timer()-time0;
185     }
186     }
187     if (Paso_noError()) {
188     if (verbose) {
189     printf("GS_MPI: %d color used \n",out->num_colors);
190     printf("timing: GS_MPI: coloring/elemination : %e/%e\n",time_color,time_fac);
191     }
192     return out;
193     } else {
194     Paso_Solver_GSMPI_free(out);
195     return NULL;
196     }
197     }
198    
199     void Paso_Solver_GS_local(Paso_SystemMatrix* A, Paso_Solver_GS * gs, double * x, double * b);
200    
201     /**************************************************************/
202    
203     /* apply MPI versioned GS
204    
205     in fact it solves Ax=b in two steps:
206     step1: among different nodes (MPI ranks), we use block Jacobi
207     x{k} = x{k-1} + D{-1}(b-A*x{k-1})
208     => D*x{k} = b - (E+F)x{k-1}
209     where matrix D is (let p be the number of nodes):
210     --------------------
211     |A1| | | ... | |
212     --------------------
213     | |A2| | ... | |
214     --------------------
215     | | |A3| ... | |
216     --------------------
217     | ... |
218     --------------------
219     | | | | ... |Ap|
220     --------------------
221     and Ai (i \in [1,p]) represents the mainBlock of matrix
222     A on node i. Matrix (E+F) is represented as the coupleBlock
223     of matrix A on each node (annotated as ACi).
224     Therefore, step1 can be turned into the following for node i:
225     => Ai * x{k} = b - ACi * x{k-1}
226     where both x{k} and b are the segment of x and b on node i,
227     and x{k-1} is the old segment values of x on all other nodes.
228    
229     step2: inside node i, we use Gauss-Seidel
230     let b'= b - ACi * x{k-1} we have Ai * x{k} = b' for node i
231     by using symetrix Gauss-Seidel, this can be solved in a forward
232     phase and a backward phase:
233     forward phase: x{m} = diag(Ai){-1} (b' - E*x{m} - F*x{m-1})
234     backward phase: x{m+1} = diag(Ai){-1} (b' - F*{m+1} - E*x{m})
235     */
236    
237     void Paso_Solver_solveGSMPI(Paso_SystemMatrix* A, Paso_Solver_GS * gs, double * x, double * b) {
238     register dim_t i;
239     dim_t n_block=gs->n_block;
240     dim_t n=gs->n;
241     dim_t sweeps=gs->sweeps;
242    
243     /*xi{0} = 0
244     xi{1} = Ai{-1} * bi
245     xi{2} = Ai{-1} * (bi - ACi * xj{1})
246     ...
247     xi{k} = Ai{-1} * (bi - ACi * xj{k-1}) */
248     #pragma omp parallel for private(i) schedule(static)
249     for (i=0;i<n*n_block;++i) x[i]=0;
250    
251     Paso_Solver_GS_local(A,gs,x,b);
252    
253     if (sweeps > 1) {
254     double *new_b=MEMALLOC(n*n_block,double);
255     double *remote_x=NULL;
256    
257     while (sweeps > 1) {
258     /* calculate new_b = b - ACi * x{k-1}, where x{k-1} are remote
259     value of x, which requires MPI communications */
260     #pragma omp parallel for private(i) schedule(static)
261     for (i=0;i<n*n_block;++i) new_b[i]=b[i];
262    
263     if (A->col_coupleBlock->pattern->ptr!=NULL){
264     Paso_SystemMatrix_startCollect(A,x);
265     remote_x=Paso_SystemMatrix_finishCollect(A);
266     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(DBLE(-1),A->col_coupleBlock,remote_x,DBLE(1), new_b);
267     }
268    
269     Paso_Solver_GS_local(A,gs,x,new_b);
270     sweeps --;
271     }
272     MEMFREE(new_b);
273     }
274    
275     return;
276     }
277    
278     /* Locally solve A'x=b, where A' is the mainBlock of global system matrix A */
279     void Paso_Solver_GS_local(Paso_SystemMatrix* A, Paso_Solver_GS * gs, double * x, double * b) {
280     dim_t n_block=gs->n_block;
281     dim_t n=gs->n;
282     double sum0, sum1, sum2, X0, X1, X2;
283     double *val=A->mainBlock->val;
284     double *diag=gs->diag;
285     index_t *ptr=gs->pattern->ptr;
286     index_t *index=gs->pattern->index;
287     dim_t i, j, iptr, xi, ai, xj, aj;
288     #ifdef _OPENMP
289     dim_t nt, len, rest, t, istart, iend;
290    
291     nt=omp_get_max_threads();
292     len=n/nt;
293     rest=n-len*nt;
294     #endif
295     /* TO BE DONE: add handler to deal with the case "n" is too small
296     to be worth run in threads. */
297    
298     #ifdef _OPENMP
299     /* calculate new_b = b - ACi * x{k-1}, where x{k-1} are x value
300     computed by other threads in previous sweep */
301     if (nt > 1) {
302     if (n_block == 1){
303     #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
304     for (t=0; t<nt; t++) {
305     istart=len*t+MIN(t,rest);
306     iend=istart+len+(t<rest ? 1:0);
307     for (i=istart; i<iend; i++){
308     sum0=b[i];
309     for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
310     j=index[iptr];
311     if (j<istart || j>=iend){
312     sum0 = sum0 - val[iptr] * x[j];
313     }
314     }
315     b[i]=sum0;
316     }
317     }
318     } else if (n_block == 2) {
319     #pragma omp parallel for private(t,istart,iend,i,xi,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
320     for (t=0; t<nt; t++) {
321     istart=len*t+MIN(t,rest);
322     iend=istart+len+(t<rest ? 1:0);
323     for (i=istart; i<iend; i++){
324     xi=2*i;
325     sum0=b[xi];
326     sum1=b[xi+1];
327     for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
328     j=index[iptr];
329     if (j<istart || j>=iend){
330     xj=2*j;
331     aj=4*iptr;
332     X0=x[xj];
333     X1=x[xj+1];
334     sum0 = sum0 - val[aj ]*X0 - val[aj+2]*X1;
335     sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
336     }
337     }
338     b[xi]=sum0;
339     b[xi+1]=sum1;
340     }
341     }
342     } else if (n_block == 3) {
343     #pragma omp parallel for private(t,istart,iend,i,xi,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
344     for (t=0; t<nt; t++) {
345     istart=len*t+MIN(t,rest);
346     iend=istart+len+(t<rest ? 1:0);
347     for (i=istart; i<iend; i++){
348     xi=3*i;
349     sum0=b[xi];
350     sum1=b[xi+1];
351     sum2=b[xi+2];
352     for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
353     j=index[iptr];
354     if (j<istart || j>=iend){
355     xj=3*j;
356     aj=9*iptr;
357     X0=x[xj];
358     X1=x[xj+1];
359     X2=x[xj+2];
360     sum0 = sum0 - val[aj ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
361     sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
362     sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
363     }
364     }
365     b[xi]=sum0;
366     b[xi+1]=sum1;
367     b[xi+2]=sum2;
368     }
369     }
370     }
371     }
372     #endif
373    
374     /* step1: forward iteration
375     x{k} = D{-1}(b - E*x{k} - F*x{k-1}) */
376     /* One Guass-Seidel itertion
377     In case of forward iteration x{k} = D{-1}(b - E*x{k} - F*x{k-1})
378     => into a loop (without coloring):
379     for i in [0,n-1] do
380     x_i = (1/a_ii) *
381     (b_i - \sum{j=0}{i-1}(a_ij*x_j) - \sum{j=i+1}{n-1}(a_ij*x_j))
382     where the first "\sum" sums up newly updated values of x elements
383     while the second "\sum" sums up previous (old) values of x elements.
384     In case of backward iteration x{k} = D{-1}(b - F*x{k} - E*x{k-1})
385     */
386     if (n_block == 1){
387     #ifdef _OPENMP
388     #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
389     for (t=0; t<nt; t++) {
390     istart=len*t+MIN(t,rest);
391     iend=istart+len+(t<rest ? 1:0);
392     for (i=istart; i<iend; i++){
393     #else
394     for (i=0; i<n; i++) {
395     #endif
396     sum0 = b[i];
397     for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
398     j=index[iptr];
399     #ifdef _OPENMP
400     if (j >= istart && j < iend && i != j){
401     #else
402     if (i != j) {
403     #endif
404     sum0 = sum0 - val[iptr] * x[j];
405     }
406     }
407     x[i] = sum0*diag[i];
408     #ifdef _OPENMP
409     }
410     }
411     #else
412     }
413     #endif
414     } else if (n_block == 2) {
415     #ifdef _OPENMP
416     #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
417     for (t=0; t<nt; t++) {
418     istart=len*t+MIN(t,rest);
419     iend=istart+len+(t<rest ? 1:0);
420     for (i=istart; i<iend; i++){
421     #else
422     for (i=0; i<n; i++) {
423     #endif
424     xi=2*i;
425     ai=4*i;
426     sum0 = b[xi];
427     sum1 = b[xi+1];
428     for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
429     j=index[iptr];
430     #ifdef _OPENMP
431     if (j >= istart && j < iend && i != j){
432     #else
433     if (i != j) {
434     #endif
435     xj=2*j;
436     aj=4*iptr;
437     X0=x[xj];
438     X1=x[xj+1];
439     sum0 = sum0 - val[aj ]*X0 - val[aj+2]*X1;
440     sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
441     }
442     }
443     x[xi ]=diag[ai ]*sum0 + diag[ai+2]*sum1;
444     x[xi+1]=diag[ai+1]*sum0 + diag[ai+3]*sum1;
445     #ifdef _OPENMP
446     }
447     }
448     #else
449     }
450     #endif
451     } else if (n_block == 3) {
452     #ifdef _OPENMP
453     #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
454     for (t=0; t<nt; t++) {
455     istart=len*t+MIN(t,rest);
456     iend=istart+len+(t<rest ? 1:0);
457     for (i=istart; i<iend; i++){
458     #else
459     for (i=0; i<n; i++) {
460     #endif
461     xi=3*i;
462     ai=9*i;
463     sum0 = b[xi];
464     sum1 = b[xi+1];
465     sum2 = b[xi+2];
466     for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
467     j=index[iptr];
468     #ifdef _OPENMP
469     if (j >= istart && j < iend && i != j){
470     #else
471     if (i != j) {
472     #endif
473     xj=3*j;
474     aj=9*iptr;
475     X0=x[xj];
476     X1=x[xj+1];
477     X2=x[xj+2];
478     sum0 = sum0 - val[aj ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
479     sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
480     sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
481     }
482     }
483     x[xi ] = diag[ai ]*sum0 + diag[ai+3]*sum1 + diag[ai+6]*sum2;
484     x[xi+1] = diag[ai+1]*sum0 + diag[ai+4]*sum1 + diag[ai+7]*sum2;
485     x[xi+2] = diag[ai+2]*sum0 + diag[ai+5]*sum1 + diag[ai+8]*sum2;
486     #ifdef _OPENMP
487     }
488     }
489     #else
490     }
491     #endif
492     }
493    
494     /* step2: backward iteration
495     x{k} = D{-1}(b - F*x{k} - E*x{k-1}) */
496     if (n_block == 1){
497     #ifdef _OPENMP
498     #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
499     for (t=nt-1; t>=0; t--) {
500     istart=len*t+MIN(t,rest);
501     iend=istart+len+(t<rest ? 1:0);
502     for (i=iend-1; i>=istart; i--){
503     #else
504     for (i=n-1; i>=0; i--) {
505     #endif
506     sum0 = b[i];
507     for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
508     j=index[iptr];
509     #ifdef _OPENMP
510     if (j >= istart && j < iend && i != j){
511     #else
512     if (i != j) {
513     #endif
514     sum0 = sum0 - val[iptr] * x[j];
515     }
516     }
517     x[i] = sum0*diag[i];
518     #ifdef _OPENMP
519     }
520     }
521     #else
522     }
523     #endif
524     } else if (n_block == 2) {
525     #ifdef _OPENMP
526     #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
527     for (t=nt-1; t>=0; t--) {
528     istart=len*t+MIN(t,rest);
529     iend=istart+len+(t<rest ? 1:0);
530     for (i=iend-1; i>=istart; i--){
531     #else
532     for (i=n-1; i>=0; i--) {
533     #endif
534     xi=2*i;
535     ai=4*i;
536     sum0 = b[xi];
537     sum1 = b[xi+1];
538     for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
539     j=index[iptr];
540     #ifdef _OPENMP
541     if (j >= istart && j < iend && i != j){
542     #else
543     if (i != j) {
544     #endif
545     xj=2*j;
546     aj=4*iptr;
547     X0=x[xj];
548     X1=x[xj+1];
549     sum0 = sum0 - val[aj ]*X0 - val[aj+2]*X1;
550     sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
551     }
552     }
553     x[xi ]=diag[ai ]*sum0 + diag[ai+2]*sum1;
554     x[xi+1]=diag[ai+1]*sum0 + diag[ai+3]*sum1;
555     #ifdef _OPENMP
556     }
557     }
558     #else
559     }
560     #endif
561     } else if (n_block == 3) {
562     #ifdef _OPENMP
563     #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
564     for (t=nt-1; t>=0; t--) {
565     istart=len*t+MIN(t,rest);
566     iend=istart+len+(t<rest ? 1:0);
567     for (i=iend-1; i>=istart; i--){
568     #else
569     for (i=n-1; i>=0; i--) {
570     #endif
571     xi=3*i;
572     ai=9*i;
573     sum0 = b[xi];
574     sum1 = b[xi+1];
575     sum2 = b[xi+2];
576     for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
577     j=index[iptr];
578     #ifdef _OPENMP
579     if (j >= istart && j < iend && i != j){
580     #else
581     if (i != j) {
582     #endif
583     xj=3*j;
584     aj=9*iptr;
585     X0=x[xj];
586     X1=x[xj+1];
587     X2=x[xj+2];
588     sum0 = sum0 - val[aj ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
589     sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
590     sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
591     }
592     }
593     x[xi ] = diag[ai ]*sum0 + diag[ai+3]*sum1 + diag[ai+6]*sum2;
594     x[xi+1] = diag[ai+1]*sum0 + diag[ai+4]*sum1 + diag[ai+7]*sum2;
595     x[xi+2] = diag[ai+2]*sum0 + diag[ai+5]*sum1 + diag[ai+8]*sum2;
596     #ifdef _OPENMP
597     }
598     }
599     #else
600     }
601     #endif
602     }
603    
604     return;
605     }
606    

  ViewVC Help
Powered by ViewVC 1.1.26