/[escript]/trunk/paso/src/Solver_GSMPI.c
ViewVC logotype

Contents of /trunk/paso/src/Solver_GSMPI.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3094 - (show annotations)
Fri Aug 13 08:38:06 2010 UTC (8 years, 11 months ago) by gross
File MIME type: text/plain
File size: 22059 byte(s)
The MPI and sequational GAUSS_SEIDEL have been merged.
The couring and main diagonal pointer is now manged by the patternm which means that they are calculated once only even if the preconditioner is deleted.



1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: GS preconditioner with reordering */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia 2003-2010 */
22 /* Author: l.gao@uq.edu.au */
23
24 /**************************************************************/
25
26 #include "Paso.h"
27 #include "SystemMatrix.h"
28 #include "Solver.h"
29 #include "PasoUtil.h"
30
31 #include <stdio.h>
32
33 /**************************************************************/
34
35 /* free all memory used by GS */
36
37 void Paso_Solver_GSMPI_free(Paso_Solver_GS * in) {
38 if (in!=NULL) {
39 MEMFREE(in->colorOf);
40 Paso_SparseMatrix_free(in->factors);
41 MEMFREE(in->diag);
42 MEMFREE(in->main_iptr);
43 Paso_Pattern_free(in->pattern);
44 MEMFREE(in);
45 }
46 }
47
48 =============================================================================================
49 /**************************************************************/
50
51 /* gs->diag saves the matrix of D{-1}
52 this is different from Paso_Solver_getGS(), in which, gs->diag
53 is the matrix D.
54 */
55 Paso_Solver_GS* Paso_Solver_getGSMPI(Paso_SparseMatrix * A,bool_t verbose) {
56 dim_t n=A->numRows;
57 dim_t n_block=A->row_block_size;
58 dim_t block_size=A->block_size;
59 register index_t i,iptr_main=0,iPtr;
60 double time0=0,time_color=0,time_fac=0;
61 double D, A11, A21, A31, A12, A22, A32, A13, A23, A33;
62
63 /* allocations: */
64 /* printf("n_block= %d, n= %d\n", n_block, n); */
65 Paso_Solver_GS* out=MEMALLOC(1,Paso_Solver_GS);
66 if (Paso_checkPtr(out)) return NULL;
67 out->colorOf=MEMALLOC(n,index_t);
68 out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
69 /*out->diag=MEMALLOC(A->len,double);*/
70 out->main_iptr=MEMALLOC(n,index_t);
71 out->pattern=Paso_Pattern_getReference(A->pattern);
72 out->factors=Paso_SparseMatrix_getReference(A);
73 out->n_block=n_block;
74 out->n=n;
75
76 if ( !(Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
77 time0=Paso_timer();
78 Paso_Pattern_color(A->pattern,&out->num_colors,out->colorOf);
79 time_color=Paso_timer()-time0;
80
81 if (Paso_noError()) {
82 time0=Paso_timer();
83
84 if (! (Paso_checkPtr(out->diag))) {
85 if (n_block==1) {
86 #pragma omp parallel for private(i,iPtr,iptr_main) schedule(static)
87 for (i = 0; i < A->pattern->numOutput; i++) {
88 iptr_main=0;
89 out->diag[i]=1.;
90 /* find main diagonal */
91 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
92 if (A->pattern->index[iPtr]==i) {
93 iptr_main=iPtr;
94 if (ABS(A->val[iPtr]) > 0.) {
95 out->diag[i]=1./(A->val[iPtr]);
96 } else {
97 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGSMPI: non-regular main diagonal block.");
98 }
99 break;
100 }
101 }
102 out->main_iptr[i]=iptr_main;
103 }
104 } else if (n_block==2) {
105 #pragma omp parallel for private(i,iPtr,iptr_main) schedule(static)
106 for (i = 0; i < A->pattern->numOutput; i++) {
107 out->diag[i*4+0]= 1.;
108 out->diag[i*4+1]= 0.;
109 out->diag[i*4+2]= 0.;
110 out->diag[i*4+3]= 1.;
111 iptr_main=0;
112 /* find main diagonal */
113 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
114 if (A->pattern->index[iPtr]==i) {
115 iptr_main=iPtr;
116 A11=A->val[iPtr*4];
117 A21=A->val[iPtr*4+1];
118 A12=A->val[iPtr*4+2];
119 A22=A->val[iPtr*4+3];
120 D = A11*A22-A12*A21;
121 if (ABS(D)>0.) {
122 D=1./D;
123 out->diag[i*4 ]= A22*D;
124 out->diag[i*4+1]= -A21*D;
125 out->diag[i*4+2]= -A12*D;
126 out->diag[i*4+3]= A11*D;
127 } else {
128 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGSMPI: non-regular main diagonal block.");
129 }
130 break;
131 }
132 }
133 out->main_iptr[i]=iptr_main;
134 }
135 } else if (n_block==3) {
136 #pragma omp parallel for private(i, iPtr,iptr_main) schedule(static)
137 for (i = 0; i < A->pattern->numOutput; i++) {
138 out->diag[i*9 ]=1.;
139 out->diag[i*9+1]=0.;
140 out->diag[i*9+2]=0.;
141 out->diag[i*9+3]=0.;
142 out->diag[i*9+4]=1.;
143 out->diag[i*9+5]=0.;
144 out->diag[i*9+6]=0.;
145 out->diag[i*9+7]=0.;
146 out->diag[i*9+8]=1.;
147 iptr_main=0;
148 /* find main diagonal */
149 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
150 if (A->pattern->index[iPtr]==i) {
151 iptr_main=iPtr;
152 A11=A->val[iPtr*9 ];
153 A21=A->val[iPtr*9+1];
154 A31=A->val[iPtr*9+2];
155 A12=A->val[iPtr*9+3];
156 A22=A->val[iPtr*9+4];
157 A32=A->val[iPtr*9+5];
158 A13=A->val[iPtr*9+6];
159 A23=A->val[iPtr*9+7];
160 A33=A->val[iPtr*9+8];
161 D = A11*(A22*A33-A23*A32) + A12*(A31*A23-A21*A33) + A13*(A21*A32-A31*A22);
162 if (ABS(D)>0.) {
163 D=1./D;
164 out->diag[i*9 ]= (A22*A33-A23*A32)*D;
165 out->diag[i*9+1]= (A31*A23-A21*A33)*D;
166 out->diag[i*9+2]= (A21*A32-A31*A22)*D;
167 out->diag[i*9+3]= (A13*A32-A12*A33)*D;
168 out->diag[i*9+4]= (A11*A33-A31*A13)*D;
169 out->diag[i*9+5]= (A12*A31-A11*A32)*D;
170 out->diag[i*9+6]= (A12*A23-A13*A22)*D;
171 out->diag[i*9+7]= (A13*A21-A11*A23)*D;
172 out->diag[i*9+8]= (A11*A22-A12*A21)*D;
173 } else {
174 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGSMPI: non-regular main diagonal block.");
175 }
176 break;
177 }
178 }
179 out->main_iptr[i]=iptr_main;
180 }
181 }
182 }
183
184 time_fac=Paso_timer()-time0;
185 }
186 }
187 if (Paso_noError()) {
188 if (verbose) {
189 printf("GS_MPI: %d color used \n",out->num_colors);
190 printf("timing: GS_MPI: coloring/elemination : %e/%e\n",time_color,time_fac);
191 }
192 return out;
193 } else {
194 Paso_Solver_GSMPI_free(out);
195 return NULL;
196 }
197 }
198
199 void Paso_Solver_GS_local(Paso_SystemMatrix* A, Paso_Solver_GS * gs, double * x, double * b);
200
201 /**************************************************************/
202
203 /* apply MPI versioned GS
204
205 in fact it solves Ax=b in two steps:
206 step1: among different nodes (MPI ranks), we use block Jacobi
207 x{k} = x{k-1} + D{-1}(b-A*x{k-1})
208 => D*x{k} = b - (E+F)x{k-1}
209 where matrix D is (let p be the number of nodes):
210 --------------------
211 |A1| | | ... | |
212 --------------------
213 | |A2| | ... | |
214 --------------------
215 | | |A3| ... | |
216 --------------------
217 | ... |
218 --------------------
219 | | | | ... |Ap|
220 --------------------
221 and Ai (i \in [1,p]) represents the mainBlock of matrix
222 A on node i. Matrix (E+F) is represented as the coupleBlock
223 of matrix A on each node (annotated as ACi).
224 Therefore, step1 can be turned into the following for node i:
225 => Ai * x{k} = b - ACi * x{k-1}
226 where both x{k} and b are the segment of x and b on node i,
227 and x{k-1} is the old segment values of x on all other nodes.
228
229 step2: inside node i, we use Gauss-Seidel
230 let b'= b - ACi * x{k-1} we have Ai * x{k} = b' for node i
231 by using symetrix Gauss-Seidel, this can be solved in a forward
232 phase and a backward phase:
233 forward phase: x{m} = diag(Ai){-1} (b' - E*x{m} - F*x{m-1})
234 backward phase: x{m+1} = diag(Ai){-1} (b' - F*{m+1} - E*x{m})
235 */
236
237 void Paso_Solver_solveGSMPI(Paso_SystemMatrix* A, Paso_Solver_GS * gs, double * x, double * b) {
238 register dim_t i;
239 dim_t n_block=gs->n_block;
240 dim_t n=gs->n;
241 dim_t sweeps=gs->sweeps;
242
243 /*xi{0} = 0
244 xi{1} = Ai{-1} * bi
245 xi{2} = Ai{-1} * (bi - ACi * xj{1})
246 ...
247 xi{k} = Ai{-1} * (bi - ACi * xj{k-1}) */
248 #pragma omp parallel for private(i) schedule(static)
249 for (i=0;i<n*n_block;++i) x[i]=0;
250
251 Paso_Solver_GS_local(A,gs,x,b);
252
253 if (sweeps > 1) {
254 double *new_b=MEMALLOC(n*n_block,double);
255 double *remote_x=NULL;
256
257 while (sweeps > 1) {
258 /* calculate new_b = b - ACi * x{k-1}, where x{k-1} are remote
259 value of x, which requires MPI communications */
260 #pragma omp parallel for private(i) schedule(static)
261 for (i=0;i<n*n_block;++i) new_b[i]=b[i];
262
263 if (A->col_coupleBlock->pattern->ptr!=NULL){
264 Paso_SystemMatrix_startCollect(A,x);
265 remote_x=Paso_SystemMatrix_finishCollect(A);
266 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(DBLE(-1),A->col_coupleBlock,remote_x,DBLE(1), new_b);
267 }
268
269 Paso_Solver_GS_local(A,gs,x,new_b);
270 sweeps --;
271 }
272 MEMFREE(new_b);
273 }
274
275 return;
276 }
277
278 /* Locally solve A'x=b, where A' is the mainBlock of global system matrix A */
279 void Paso_Solver_GS_local(Paso_SystemMatrix* A, Paso_Solver_GS * gs, double * x, double * b) {
280 dim_t n_block=gs->n_block;
281 dim_t n=gs->n;
282 double sum0, sum1, sum2, X0, X1, X2;
283 double *val=A->mainBlock->val;
284 double *diag=gs->diag;
285 index_t *ptr=gs->pattern->ptr;
286 index_t *index=gs->pattern->index;
287 dim_t i, j, iptr, xi, ai, xj, aj;
288 #ifdef _OPENMP
289 dim_t nt, len, rest, t, istart, iend;
290
291 nt=omp_get_max_threads();
292 len=n/nt;
293 rest=n-len*nt;
294 #endif
295 /* TO BE DONE: add handler to deal with the case "n" is too small
296 to be worth run in threads. */
297
298 #ifdef _OPENMP
299 /* calculate new_b = b - ACi * x{k-1}, where x{k-1} are x value
300 computed by other threads in previous sweep */
301 if (nt > 1) {
302 if (n_block == 1){
303 #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
304 for (t=0; t<nt; t++) {
305 istart=len*t+MIN(t,rest);
306 iend=istart+len+(t<rest ? 1:0);
307 for (i=istart; i<iend; i++){
308 sum0=b[i];
309 for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
310 j=index[iptr];
311 if (j<istart || j>=iend){
312 sum0 = sum0 - val[iptr] * x[j];
313 }
314 }
315 b[i]=sum0;
316 }
317 }
318 } else if (n_block == 2) {
319 #pragma omp parallel for private(t,istart,iend,i,xi,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
320 for (t=0; t<nt; t++) {
321 istart=len*t+MIN(t,rest);
322 iend=istart+len+(t<rest ? 1:0);
323 for (i=istart; i<iend; i++){
324 xi=2*i;
325 sum0=b[xi];
326 sum1=b[xi+1];
327 for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
328 j=index[iptr];
329 if (j<istart || j>=iend){
330 xj=2*j;
331 aj=4*iptr;
332 X0=x[xj];
333 X1=x[xj+1];
334 sum0 = sum0 - val[aj ]*X0 - val[aj+2]*X1;
335 sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
336 }
337 }
338 b[xi]=sum0;
339 b[xi+1]=sum1;
340 }
341 }
342 } else if (n_block == 3) {
343 #pragma omp parallel for private(t,istart,iend,i,xi,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
344 for (t=0; t<nt; t++) {
345 istart=len*t+MIN(t,rest);
346 iend=istart+len+(t<rest ? 1:0);
347 for (i=istart; i<iend; i++){
348 xi=3*i;
349 sum0=b[xi];
350 sum1=b[xi+1];
351 sum2=b[xi+2];
352 for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
353 j=index[iptr];
354 if (j<istart || j>=iend){
355 xj=3*j;
356 aj=9*iptr;
357 X0=x[xj];
358 X1=x[xj+1];
359 X2=x[xj+2];
360 sum0 = sum0 - val[aj ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
361 sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
362 sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
363 }
364 }
365 b[xi]=sum0;
366 b[xi+1]=sum1;
367 b[xi+2]=sum2;
368 }
369 }
370 }
371 }
372 #endif
373
374 /* step1: forward iteration
375 x{k} = D{-1}(b - E*x{k} - F*x{k-1}) */
376 /* One Guass-Seidel itertion
377 In case of forward iteration x{k} = D{-1}(b - E*x{k} - F*x{k-1})
378 => into a loop (without coloring):
379 for i in [0,n-1] do
380 x_i = (1/a_ii) *
381 (b_i - \sum{j=0}{i-1}(a_ij*x_j) - \sum{j=i+1}{n-1}(a_ij*x_j))
382 where the first "\sum" sums up newly updated values of x elements
383 while the second "\sum" sums up previous (old) values of x elements.
384 In case of backward iteration x{k} = D{-1}(b - F*x{k} - E*x{k-1})
385 */
386 if (n_block == 1){
387 #ifdef _OPENMP
388 #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
389 for (t=0; t<nt; t++) {
390 istart=len*t+MIN(t,rest);
391 iend=istart+len+(t<rest ? 1:0);
392 for (i=istart; i<iend; i++){
393 #else
394 for (i=0; i<n; i++) {
395 #endif
396 sum0 = b[i];
397 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
398 j=index[iptr];
399 #ifdef _OPENMP
400 if (j >= istart && j < iend && i != j){
401 #else
402 if (i != j) {
403 #endif
404 sum0 = sum0 - val[iptr] * x[j];
405 }
406 }
407 x[i] = sum0*diag[i];
408 #ifdef _OPENMP
409 }
410 }
411 #else
412 }
413 #endif
414 } else if (n_block == 2) {
415 #ifdef _OPENMP
416 #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
417 for (t=0; t<nt; t++) {
418 istart=len*t+MIN(t,rest);
419 iend=istart+len+(t<rest ? 1:0);
420 for (i=istart; i<iend; i++){
421 #else
422 for (i=0; i<n; i++) {
423 #endif
424 xi=2*i;
425 ai=4*i;
426 sum0 = b[xi];
427 sum1 = b[xi+1];
428 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
429 j=index[iptr];
430 #ifdef _OPENMP
431 if (j >= istart && j < iend && i != j){
432 #else
433 if (i != j) {
434 #endif
435 xj=2*j;
436 aj=4*iptr;
437 X0=x[xj];
438 X1=x[xj+1];
439 sum0 = sum0 - val[aj ]*X0 - val[aj+2]*X1;
440 sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
441 }
442 }
443 x[xi ]=diag[ai ]*sum0 + diag[ai+2]*sum1;
444 x[xi+1]=diag[ai+1]*sum0 + diag[ai+3]*sum1;
445 #ifdef _OPENMP
446 }
447 }
448 #else
449 }
450 #endif
451 } else if (n_block == 3) {
452 #ifdef _OPENMP
453 #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
454 for (t=0; t<nt; t++) {
455 istart=len*t+MIN(t,rest);
456 iend=istart+len+(t<rest ? 1:0);
457 for (i=istart; i<iend; i++){
458 #else
459 for (i=0; i<n; i++) {
460 #endif
461 xi=3*i;
462 ai=9*i;
463 sum0 = b[xi];
464 sum1 = b[xi+1];
465 sum2 = b[xi+2];
466 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
467 j=index[iptr];
468 #ifdef _OPENMP
469 if (j >= istart && j < iend && i != j){
470 #else
471 if (i != j) {
472 #endif
473 xj=3*j;
474 aj=9*iptr;
475 X0=x[xj];
476 X1=x[xj+1];
477 X2=x[xj+2];
478 sum0 = sum0 - val[aj ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
479 sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
480 sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
481 }
482 }
483 x[xi ] = diag[ai ]*sum0 + diag[ai+3]*sum1 + diag[ai+6]*sum2;
484 x[xi+1] = diag[ai+1]*sum0 + diag[ai+4]*sum1 + diag[ai+7]*sum2;
485 x[xi+2] = diag[ai+2]*sum0 + diag[ai+5]*sum1 + diag[ai+8]*sum2;
486 #ifdef _OPENMP
487 }
488 }
489 #else
490 }
491 #endif
492 }
493
494 /* step2: backward iteration
495 x{k} = D{-1}(b - F*x{k} - E*x{k-1}) */
496 if (n_block == 1){
497 #ifdef _OPENMP
498 #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
499 for (t=nt-1; t>=0; t--) {
500 istart=len*t+MIN(t,rest);
501 iend=istart+len+(t<rest ? 1:0);
502 for (i=iend-1; i>=istart; i--){
503 #else
504 for (i=n-1; i>=0; i--) {
505 #endif
506 sum0 = b[i];
507 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
508 j=index[iptr];
509 #ifdef _OPENMP
510 if (j >= istart && j < iend && i != j){
511 #else
512 if (i != j) {
513 #endif
514 sum0 = sum0 - val[iptr] * x[j];
515 }
516 }
517 x[i] = sum0*diag[i];
518 #ifdef _OPENMP
519 }
520 }
521 #else
522 }
523 #endif
524 } else if (n_block == 2) {
525 #ifdef _OPENMP
526 #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
527 for (t=nt-1; t>=0; t--) {
528 istart=len*t+MIN(t,rest);
529 iend=istart+len+(t<rest ? 1:0);
530 for (i=iend-1; i>=istart; i--){
531 #else
532 for (i=n-1; i>=0; i--) {
533 #endif
534 xi=2*i;
535 ai=4*i;
536 sum0 = b[xi];
537 sum1 = b[xi+1];
538 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
539 j=index[iptr];
540 #ifdef _OPENMP
541 if (j >= istart && j < iend && i != j){
542 #else
543 if (i != j) {
544 #endif
545 xj=2*j;
546 aj=4*iptr;
547 X0=x[xj];
548 X1=x[xj+1];
549 sum0 = sum0 - val[aj ]*X0 - val[aj+2]*X1;
550 sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
551 }
552 }
553 x[xi ]=diag[ai ]*sum0 + diag[ai+2]*sum1;
554 x[xi+1]=diag[ai+1]*sum0 + diag[ai+3]*sum1;
555 #ifdef _OPENMP
556 }
557 }
558 #else
559 }
560 #endif
561 } else if (n_block == 3) {
562 #ifdef _OPENMP
563 #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
564 for (t=nt-1; t>=0; t--) {
565 istart=len*t+MIN(t,rest);
566 iend=istart+len+(t<rest ? 1:0);
567 for (i=iend-1; i>=istart; i--){
568 #else
569 for (i=n-1; i>=0; i--) {
570 #endif
571 xi=3*i;
572 ai=9*i;
573 sum0 = b[xi];
574 sum1 = b[xi+1];
575 sum2 = b[xi+2];
576 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
577 j=index[iptr];
578 #ifdef _OPENMP
579 if (j >= istart && j < iend && i != j){
580 #else
581 if (i != j) {
582 #endif
583 xj=3*j;
584 aj=9*iptr;
585 X0=x[xj];
586 X1=x[xj+1];
587 X2=x[xj+2];
588 sum0 = sum0 - val[aj ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
589 sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
590 sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
591 }
592 }
593 x[xi ] = diag[ai ]*sum0 + diag[ai+3]*sum1 + diag[ai+6]*sum2;
594 x[xi+1] = diag[ai+1]*sum0 + diag[ai+4]*sum1 + diag[ai+7]*sum2;
595 x[xi+2] = diag[ai+2]*sum0 + diag[ai+5]*sum1 + diag[ai+8]*sum2;
596 #ifdef _OPENMP
597 }
598 }
599 #else
600 }
601 #endif
602 }
603
604 return;
605 }
606

  ViewVC Help
Powered by ViewVC 1.1.26