/[escript]/trunk/paso/src/GSMPI.c
ViewVC logotype

Contents of /trunk/paso/src/GSMPI.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4286 - (show annotations)
Thu Mar 7 04:28:11 2013 UTC (6 years, 5 months ago) by caltinay
File MIME type: text/plain
File size: 22211 byte(s)
Assorted spelling fixes.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16
17 /************************************************************************************/
18
19 /* Paso: GS preconditioner with reordering */
20
21 /************************************************************************************/
22
23 /* Author: l.gao@uq.edu.au */
24
25 /************************************************************************************/
26
27 #include "Paso.h"
28 #include "SystemMatrix.h"
29 #include "Solver.h"
30 #include "PasoUtil.h"
31
32 #include <stdio.h>
33
34 /************************************************************************************/
35
36 /* free all memory used by GS */
37
38 void Paso_Solver_GSMPI_free(Paso_Solver_GS * in) {
39 if (in!=NULL) {
40 MEMFREE(in->colorOf);
41 Paso_SparseMatrix_free(in->factors);
42 MEMFREE(in->diag);
43 MEMFREE(in->main_iptr);
44 Paso_Pattern_free(in->pattern);
45 MEMFREE(in);
46 }
47 }
48
49 ==========================================================================
50 /************************************************************************************/
51
52 /* gs->diag saves the matrix of D{-1}
53 This is different from Paso_Solver_getGS(), in which, gs->diag
54 is the matrix D.
55 */
56 Paso_Solver_GS* Paso_Solver_getGSMPI(Paso_SparseMatrix * A,bool_t verbose) {
57 dim_t n=A->numRows;
58 dim_t n_block=A->row_block_size;
59 dim_t block_size=A->block_size;
60 register index_t i,iptr_main=0,iPtr;
61 double time0=0,time_color=0,time_fac=0;
62 double D, A11, A21, A31, A12, A22, A32, A13, A23, A33;
63
64 /* allocations: */
65 /* printf("n_block= %d, n= %d\n", n_block, n); */
66 Paso_Solver_GS* out=MEMALLOC(1,Paso_Solver_GS);
67 if (Paso_checkPtr(out)) return NULL;
68 out->colorOf=MEMALLOC(n,index_t);
69 out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
70 /*out->diag=MEMALLOC(A->len,double);*/
71 out->main_iptr=MEMALLOC(n,index_t);
72 out->pattern=Paso_Pattern_getReference(A->pattern);
73 out->factors=Paso_SparseMatrix_getReference(A);
74 out->n_block=n_block;
75 out->n=n;
76
77 if ( !(Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
78 time0=Paso_timer();
79 Paso_Pattern_color(A->pattern,&out->num_colors,out->colorOf);
80 time_color=Paso_timer()-time0;
81
82 if (Paso_noError()) {
83 time0=Paso_timer();
84
85 if (! (Paso_checkPtr(out->diag))) {
86 if (n_block==1) {
87 #pragma omp parallel for private(i,iPtr,iptr_main) schedule(static)
88 for (i = 0; i < A->pattern->numOutput; i++) {
89 iptr_main=0;
90 out->diag[i]=1.;
91 /* find main diagonal */
92 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
93 if (A->pattern->index[iPtr]==i) {
94 iptr_main=iPtr;
95 if (ABS(A->val[iPtr]) > 0.) {
96 out->diag[i]=1./(A->val[iPtr]);
97 } else {
98 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGSMPI: non-regular main diagonal block.");
99 }
100 break;
101 }
102 }
103 out->main_iptr[i]=iptr_main;
104 }
105 } else if (n_block==2) {
106 #pragma omp parallel for private(i,iPtr,iptr_main) schedule(static)
107 for (i = 0; i < A->pattern->numOutput; i++) {
108 out->diag[i*4+0]= 1.;
109 out->diag[i*4+1]= 0.;
110 out->diag[i*4+2]= 0.;
111 out->diag[i*4+3]= 1.;
112 iptr_main=0;
113 /* find main diagonal */
114 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
115 if (A->pattern->index[iPtr]==i) {
116 iptr_main=iPtr;
117 A11=A->val[iPtr*4];
118 A21=A->val[iPtr*4+1];
119 A12=A->val[iPtr*4+2];
120 A22=A->val[iPtr*4+3];
121 D = A11*A22-A12*A21;
122 if (ABS(D)>0.) {
123 D=1./D;
124 out->diag[i*4 ]= A22*D;
125 out->diag[i*4+1]= -A21*D;
126 out->diag[i*4+2]= -A12*D;
127 out->diag[i*4+3]= A11*D;
128 } else {
129 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGSMPI: non-regular main diagonal block.");
130 }
131 break;
132 }
133 }
134 out->main_iptr[i]=iptr_main;
135 }
136 } else if (n_block==3) {
137 #pragma omp parallel for private(i, iPtr,iptr_main) schedule(static)
138 for (i = 0; i < A->pattern->numOutput; i++) {
139 out->diag[i*9 ]=1.;
140 out->diag[i*9+1]=0.;
141 out->diag[i*9+2]=0.;
142 out->diag[i*9+3]=0.;
143 out->diag[i*9+4]=1.;
144 out->diag[i*9+5]=0.;
145 out->diag[i*9+6]=0.;
146 out->diag[i*9+7]=0.;
147 out->diag[i*9+8]=1.;
148 iptr_main=0;
149 /* find main diagonal */
150 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
151 if (A->pattern->index[iPtr]==i) {
152 iptr_main=iPtr;
153 A11=A->val[iPtr*9 ];
154 A21=A->val[iPtr*9+1];
155 A31=A->val[iPtr*9+2];
156 A12=A->val[iPtr*9+3];
157 A22=A->val[iPtr*9+4];
158 A32=A->val[iPtr*9+5];
159 A13=A->val[iPtr*9+6];
160 A23=A->val[iPtr*9+7];
161 A33=A->val[iPtr*9+8];
162 D = A11*(A22*A33-A23*A32) + A12*(A31*A23-A21*A33) + A13*(A21*A32-A31*A22);
163 if (ABS(D)>0.) {
164 D=1./D;
165 out->diag[i*9 ]= (A22*A33-A23*A32)*D;
166 out->diag[i*9+1]= (A31*A23-A21*A33)*D;
167 out->diag[i*9+2]= (A21*A32-A31*A22)*D;
168 out->diag[i*9+3]= (A13*A32-A12*A33)*D;
169 out->diag[i*9+4]= (A11*A33-A31*A13)*D;
170 out->diag[i*9+5]= (A12*A31-A11*A32)*D;
171 out->diag[i*9+6]= (A12*A23-A13*A22)*D;
172 out->diag[i*9+7]= (A13*A21-A11*A23)*D;
173 out->diag[i*9+8]= (A11*A22-A12*A21)*D;
174 } else {
175 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGSMPI: non-regular main diagonal block.");
176 }
177 break;
178 }
179 }
180 out->main_iptr[i]=iptr_main;
181 }
182 }
183 }
184
185 time_fac=Paso_timer()-time0;
186 }
187 }
188 if (Paso_noError()) {
189 if (verbose) {
190 printf("GS_MPI: %d color used \n",out->num_colors);
191 printf("timing: GS_MPI: coloring/elimination : %e/%e\n",time_color,time_fac);
192 }
193 return out;
194 } else {
195 Paso_Solver_GSMPI_free(out);
196 return NULL;
197 }
198 }
199
200 void Paso_Solver_GS_local(Paso_SystemMatrix* A, Paso_Solver_GS * gs, double * x, double * b);
201
202 /************************************************************************************/
203
204 /* Applies MPI versioned GS
205
206 In fact it solves Ax=b in two steps:
207 step 1: among different nodes (MPI ranks), we use block Jacobi
208 x{k} = x{k-1} + D{-1}(b-A*x{k-1})
209 => D*x{k} = b - (E+F)x{k-1}
210 where matrix D is (let p be the number of nodes):
211 --------------------
212 |A1| | | ... | |
213 --------------------
214 | |A2| | ... | |
215 --------------------
216 | | |A3| ... | |
217 --------------------
218 | ... |
219 --------------------
220 | | | | ... |Ap|
221 --------------------
222 and Ai (i \in [1,p]) represents the mainBlock of matrix
223 A on node i. Matrix (E+F) is represented as the coupleBlock
224 of matrix A on each node (annotated as ACi).
225 Therefore, step 1 can be turned into the following for node i:
226 => Ai * x{k} = b - ACi * x{k-1}
227 where both x{k} and b are the segment of x and b on node i,
228 and x{k-1} is the old segment values of x on all other nodes.
229
230 step 2: inside node i, we use Gauss-Seidel
231 let b'= b - ACi * x{k-1} we have Ai * x{k} = b' for node i
232 by using symmetric Gauss-Seidel, this can be solved in a forward
233 phase and a backward phase:
234 forward phase: x{m} = diag(Ai){-1} (b' - E*x{m} - F*x{m-1})
235 backward phase: x{m+1} = diag(Ai){-1} (b' - F*{m+1} - E*x{m})
236 */
237
238 void Paso_Solver_solveGSMPI(Paso_SystemMatrix* A, Paso_Solver_GS * gs, double * x, double * b) {
239 register dim_t i;
240 dim_t n_block=gs->n_block;
241 dim_t n=gs->n;
242 dim_t sweeps=gs->sweeps;
243
244 /*xi{0} = 0
245 xi{1} = Ai{-1} * bi
246 xi{2} = Ai{-1} * (bi - ACi * xj{1})
247 ...
248 xi{k} = Ai{-1} * (bi - ACi * xj{k-1}) */
249 #pragma omp parallel for private(i) schedule(static)
250 for (i=0;i<n*n_block;++i) x[i]=0;
251
252 Paso_Solver_GS_local(A,gs,x,b);
253
254 if (sweeps > 1) {
255 double *new_b=MEMALLOC(n*n_block,double);
256 double *remote_x=NULL;
257
258 while (sweeps > 1) {
259 /* calculate new_b = b - ACi * x{k-1}, where x{k-1} are remote
260 values of x, which requires MPI communication */
261 #pragma omp parallel for private(i) schedule(static)
262 for (i=0;i<n*n_block;++i) new_b[i]=b[i];
263
264 if (A->col_coupleBlock->pattern->ptr!=NULL){
265 Paso_SystemMatrix_startCollect(A,x);
266 remote_x=Paso_SystemMatrix_finishCollect(A);
267 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(DBLE(-1),A->col_coupleBlock,remote_x,DBLE(1), new_b);
268 }
269
270 Paso_Solver_GS_local(A,gs,x,new_b);
271 sweeps --;
272 }
273 MEMFREE(new_b);
274 }
275
276 return;
277 }
278
279 /* Locally solve A'x=b, where A' is the mainBlock of global system matrix A */
280 void Paso_Solver_GS_local(Paso_SystemMatrix* A, Paso_Solver_GS * gs, double * x, double * b) {
281 dim_t n_block=gs->n_block;
282 dim_t n=gs->n;
283 double sum0, sum1, sum2, X0, X1, X2;
284 double *val=A->mainBlock->val;
285 double *diag=gs->diag;
286 index_t *ptr=gs->pattern->ptr;
287 index_t *index=gs->pattern->index;
288 dim_t i, j, iptr, xi, ai, xj, aj;
289 #ifdef _OPENMP
290 dim_t nt, len, rest, t, istart, iend;
291
292 nt=omp_get_max_threads();
293 len=n/nt;
294 rest=n-len*nt;
295 #endif
296 /* TO BE DONE: add handler to deal with the case "n is too small"
297 to be worth run in threads. */
298
299 #ifdef _OPENMP
300 /* calculate new_b = b - ACi * x{k-1}, where x{k-1} are x values
301 computed by other threads in previous sweep */
302 if (nt > 1) {
303 if (n_block == 1){
304 #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
305 for (t=0; t<nt; t++) {
306 istart=len*t+MIN(t,rest);
307 iend=istart+len+(t<rest ? 1:0);
308 for (i=istart; i<iend; i++){
309 sum0=b[i];
310 for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
311 j=index[iptr];
312 if (j<istart || j>=iend){
313 sum0 = sum0 - val[iptr] * x[j];
314 }
315 }
316 b[i]=sum0;
317 }
318 }
319 } else if (n_block == 2) {
320 #pragma omp parallel for private(t,istart,iend,i,xi,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
321 for (t=0; t<nt; t++) {
322 istart=len*t+MIN(t,rest);
323 iend=istart+len+(t<rest ? 1:0);
324 for (i=istart; i<iend; i++){
325 xi=2*i;
326 sum0=b[xi];
327 sum1=b[xi+1];
328 for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
329 j=index[iptr];
330 if (j<istart || j>=iend){
331 xj=2*j;
332 aj=4*iptr;
333 X0=x[xj];
334 X1=x[xj+1];
335 sum0 = sum0 - val[aj ]*X0 - val[aj+2]*X1;
336 sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
337 }
338 }
339 b[xi]=sum0;
340 b[xi+1]=sum1;
341 }
342 }
343 } else if (n_block == 3) {
344 #pragma omp parallel for private(t,istart,iend,i,xi,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
345 for (t=0; t<nt; t++) {
346 istart=len*t+MIN(t,rest);
347 iend=istart+len+(t<rest ? 1:0);
348 for (i=istart; i<iend; i++){
349 xi=3*i;
350 sum0=b[xi];
351 sum1=b[xi+1];
352 sum2=b[xi+2];
353 for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
354 j=index[iptr];
355 if (j<istart || j>=iend){
356 xj=3*j;
357 aj=9*iptr;
358 X0=x[xj];
359 X1=x[xj+1];
360 X2=x[xj+2];
361 sum0 = sum0 - val[aj ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
362 sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
363 sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
364 }
365 }
366 b[xi]=sum0;
367 b[xi+1]=sum1;
368 b[xi+2]=sum2;
369 }
370 }
371 }
372 }
373 #endif
374
375 /* step 1: forward iteration
376 x{k} = D{-1}(b - E*x{k} - F*x{k-1}) */
377 /* One Gauss-Seidel iteration
378 In case of forward iteration x{k} = D{-1}(b - E*x{k} - F*x{k-1})
379 => into a loop (without coloring):
380 for i in [0,n-1] do
381 x_i = (1/a_ii) *
382 (b_i - \sum{j=0}{i-1}(a_ij*x_j) - \sum{j=i+1}{n-1}(a_ij*x_j))
383 where the first "\sum" sums up newly updated values of x elements
384 while the second "\sum" sums up previous (old) values of x elements.
385 In case of backward iteration x{k} = D{-1}(b - F*x{k} - E*x{k-1})
386 */
387 if (n_block == 1){
388 #ifdef _OPENMP
389 #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
390 for (t=0; t<nt; t++) {
391 istart=len*t+MIN(t,rest);
392 iend=istart+len+(t<rest ? 1:0);
393 for (i=istart; i<iend; i++){
394 #else
395 for (i=0; i<n; i++) {
396 #endif
397 sum0 = b[i];
398 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
399 j=index[iptr];
400 #ifdef _OPENMP
401 if (j >= istart && j < iend && i != j){
402 #else
403 if (i != j) {
404 #endif
405 sum0 = sum0 - val[iptr] * x[j];
406 }
407 }
408 x[i] = sum0*diag[i];
409 #ifdef _OPENMP
410 }
411 }
412 #else
413 }
414 #endif
415 } else if (n_block == 2) {
416 #ifdef _OPENMP
417 #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
418 for (t=0; t<nt; t++) {
419 istart=len*t+MIN(t,rest);
420 iend=istart+len+(t<rest ? 1:0);
421 for (i=istart; i<iend; i++){
422 #else
423 for (i=0; i<n; i++) {
424 #endif
425 xi=2*i;
426 ai=4*i;
427 sum0 = b[xi];
428 sum1 = b[xi+1];
429 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
430 j=index[iptr];
431 #ifdef _OPENMP
432 if (j >= istart && j < iend && i != j){
433 #else
434 if (i != j) {
435 #endif
436 xj=2*j;
437 aj=4*iptr;
438 X0=x[xj];
439 X1=x[xj+1];
440 sum0 = sum0 - val[aj ]*X0 - val[aj+2]*X1;
441 sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
442 }
443 }
444 x[xi ]=diag[ai ]*sum0 + diag[ai+2]*sum1;
445 x[xi+1]=diag[ai+1]*sum0 + diag[ai+3]*sum1;
446 #ifdef _OPENMP
447 }
448 }
449 #else
450 }
451 #endif
452 } else if (n_block == 3) {
453 #ifdef _OPENMP
454 #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
455 for (t=0; t<nt; t++) {
456 istart=len*t+MIN(t,rest);
457 iend=istart+len+(t<rest ? 1:0);
458 for (i=istart; i<iend; i++){
459 #else
460 for (i=0; i<n; i++) {
461 #endif
462 xi=3*i;
463 ai=9*i;
464 sum0 = b[xi];
465 sum1 = b[xi+1];
466 sum2 = b[xi+2];
467 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
468 j=index[iptr];
469 #ifdef _OPENMP
470 if (j >= istart && j < iend && i != j){
471 #else
472 if (i != j) {
473 #endif
474 xj=3*j;
475 aj=9*iptr;
476 X0=x[xj];
477 X1=x[xj+1];
478 X2=x[xj+2];
479 sum0 = sum0 - val[aj ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
480 sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
481 sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
482 }
483 }
484 x[xi ] = diag[ai ]*sum0 + diag[ai+3]*sum1 + diag[ai+6]*sum2;
485 x[xi+1] = diag[ai+1]*sum0 + diag[ai+4]*sum1 + diag[ai+7]*sum2;
486 x[xi+2] = diag[ai+2]*sum0 + diag[ai+5]*sum1 + diag[ai+8]*sum2;
487 #ifdef _OPENMP
488 }
489 }
490 #else
491 }
492 #endif
493 }
494
495 /* step 2: backward iteration
496 x{k} = D{-1}(b - F*x{k} - E*x{k-1}) */
497 if (n_block == 1){
498 #ifdef _OPENMP
499 #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
500 for (t=nt-1; t>=0; t--) {
501 istart=len*t+MIN(t,rest);
502 iend=istart+len+(t<rest ? 1:0);
503 for (i=iend-1; i>=istart; i--){
504 #else
505 for (i=n-1; i>=0; i--) {
506 #endif
507 sum0 = b[i];
508 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
509 j=index[iptr];
510 #ifdef _OPENMP
511 if (j >= istart && j < iend && i != j){
512 #else
513 if (i != j) {
514 #endif
515 sum0 = sum0 - val[iptr] * x[j];
516 }
517 }
518 x[i] = sum0*diag[i];
519 #ifdef _OPENMP
520 }
521 }
522 #else
523 }
524 #endif
525 } else if (n_block == 2) {
526 #ifdef _OPENMP
527 #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
528 for (t=nt-1; t>=0; t--) {
529 istart=len*t+MIN(t,rest);
530 iend=istart+len+(t<rest ? 1:0);
531 for (i=iend-1; i>=istart; i--){
532 #else
533 for (i=n-1; i>=0; i--) {
534 #endif
535 xi=2*i;
536 ai=4*i;
537 sum0 = b[xi];
538 sum1 = b[xi+1];
539 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
540 j=index[iptr];
541 #ifdef _OPENMP
542 if (j >= istart && j < iend && i != j){
543 #else
544 if (i != j) {
545 #endif
546 xj=2*j;
547 aj=4*iptr;
548 X0=x[xj];
549 X1=x[xj+1];
550 sum0 = sum0 - val[aj ]*X0 - val[aj+2]*X1;
551 sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
552 }
553 }
554 x[xi ]=diag[ai ]*sum0 + diag[ai+2]*sum1;
555 x[xi+1]=diag[ai+1]*sum0 + diag[ai+3]*sum1;
556 #ifdef _OPENMP
557 }
558 }
559 #else
560 }
561 #endif
562 } else if (n_block == 3) {
563 #ifdef _OPENMP
564 #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
565 for (t=nt-1; t>=0; t--) {
566 istart=len*t+MIN(t,rest);
567 iend=istart+len+(t<rest ? 1:0);
568 for (i=iend-1; i>=istart; i--){
569 #else
570 for (i=n-1; i>=0; i--) {
571 #endif
572 xi=3*i;
573 ai=9*i;
574 sum0 = b[xi];
575 sum1 = b[xi+1];
576 sum2 = b[xi+2];
577 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
578 j=index[iptr];
579 #ifdef _OPENMP
580 if (j >= istart && j < iend && i != j){
581 #else
582 if (i != j) {
583 #endif
584 xj=3*j;
585 aj=9*iptr;
586 X0=x[xj];
587 X1=x[xj+1];
588 X2=x[xj+2];
589 sum0 = sum0 - val[aj ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
590 sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
591 sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
592 }
593 }
594 x[xi ] = diag[ai ]*sum0 + diag[ai+3]*sum1 + diag[ai+6]*sum2;
595 x[xi+1] = diag[ai+1]*sum0 + diag[ai+4]*sum1 + diag[ai+7]*sum2;
596 x[xi+2] = diag[ai+2]*sum0 + diag[ai+5]*sum1 + diag[ai+8]*sum2;
597 #ifdef _OPENMP
598 }
599 }
600 #else
601 }
602 #endif
603 }
604 }
605

  ViewVC Help
Powered by ViewVC 1.1.26