/[escript]/trunk/paso/src/Solver_GSMPI.c
ViewVC logotype

Contents of /trunk/paso/src/Solver_GSMPI.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3051 - (show annotations)
Tue Jun 29 00:45:49 2010 UTC (9 years, 3 months ago) by lgao
File MIME type: text/plain
File size: 21965 byte(s)
Hybrid MPI/OpenMP versioned Gauss_Seidel preconditioner is added. 
To use it, use "SolverOptions.GAUSS_SEIDEL_MPI" in python script. 


1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: GS preconditioner with reordering */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia 2003-2010 */
22 /* Author: l.gao@uq.edu.au */
23
24 /**************************************************************/
25
26 #include "Paso.h"
27 #include "SystemMatrix.h"
28 #include "Solver.h"
29 #include "PasoUtil.h"
30
31 #include <stdio.h>
32
33 /**************************************************************/
34
35 /* free all memory used by GS */
36
37 void Paso_Solver_GSMPI_free(Paso_Solver_GS * in) {
38 if (in!=NULL) {
39 MEMFREE(in->colorOf);
40 Paso_SparseMatrix_free(in->factors);
41 MEMFREE(in->diag);
42 MEMFREE(in->main_iptr);
43 Paso_Pattern_free(in->pattern);
44 MEMFREE(in);
45 }
46 }
47
48 /**************************************************************/
49
50 /* gs->diag saves the matrix of D{-1}
51 this is different from Paso_Solver_getGS(), in which, gs->diag
52 is the matrix D.
53 */
54 Paso_Solver_GS* Paso_Solver_getGSMPI(Paso_SparseMatrix * A,bool_t verbose) {
55 dim_t n=A->numRows;
56 dim_t n_block=A->row_block_size;
57 dim_t block_size=A->block_size;
58 register index_t i,iptr_main=0,iPtr;
59 double time0=0,time_color=0,time_fac=0;
60 double D, A11, A21, A31, A12, A22, A32, A13, A23, A33;
61
62 /* allocations: */
63 /* printf("n_block= %d, n= %d\n", n_block, n); */
64 Paso_Solver_GS* out=MEMALLOC(1,Paso_Solver_GS);
65 if (Paso_checkPtr(out)) return NULL;
66 out->colorOf=MEMALLOC(n,index_t);
67 out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
68 /*out->diag=MEMALLOC(A->len,double);*/
69 out->main_iptr=MEMALLOC(n,index_t);
70 out->pattern=Paso_Pattern_getReference(A->pattern);
71 out->factors=Paso_SparseMatrix_getReference(A);
72 out->n_block=n_block;
73 out->n=n;
74
75 if ( !(Paso_checkPtr(out->colorOf) || Paso_checkPtr(out->main_iptr) || Paso_checkPtr(out->factors)) ) {
76 time0=Paso_timer();
77 Paso_Pattern_color(A->pattern,&out->num_colors,out->colorOf);
78 time_color=Paso_timer()-time0;
79
80 if (Paso_noError()) {
81 time0=Paso_timer();
82
83 if (! (Paso_checkPtr(out->diag))) {
84 if (n_block==1) {
85 #pragma omp parallel for private(i,iPtr,iptr_main) schedule(static)
86 for (i = 0; i < A->pattern->numOutput; i++) {
87 iptr_main=0;
88 out->diag[i]=1.;
89 /* find main diagonal */
90 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
91 if (A->pattern->index[iPtr]==i) {
92 iptr_main=iPtr;
93 if (ABS(A->val[iPtr]) > 0.) {
94 out->diag[i]=1./(A->val[iPtr]);
95 } else {
96 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGSMPI: non-regular main diagonal block.");
97 }
98 break;
99 }
100 }
101 out->main_iptr[i]=iptr_main;
102 }
103 } else if (n_block==2) {
104 #pragma omp parallel for private(i,iPtr,iptr_main) schedule(static)
105 for (i = 0; i < A->pattern->numOutput; i++) {
106 out->diag[i*4+0]= 1.;
107 out->diag[i*4+1]= 0.;
108 out->diag[i*4+2]= 0.;
109 out->diag[i*4+3]= 1.;
110 iptr_main=0;
111 /* find main diagonal */
112 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
113 if (A->pattern->index[iPtr]==i) {
114 iptr_main=iPtr;
115 A11=A->val[iPtr*4];
116 A21=A->val[iPtr*4+1];
117 A12=A->val[iPtr*4+2];
118 A22=A->val[iPtr*4+3];
119 D = A11*A22-A12*A21;
120 if (ABS(D)>0.) {
121 D=1./D;
122 out->diag[i*4 ]= A22*D;
123 out->diag[i*4+1]= -A21*D;
124 out->diag[i*4+2]= -A12*D;
125 out->diag[i*4+3]= A11*D;
126 } else {
127 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGSMPI: non-regular main diagonal block.");
128 }
129 break;
130 }
131 }
132 out->main_iptr[i]=iptr_main;
133 }
134 } else if (n_block==3) {
135 #pragma omp parallel for private(i, iPtr,iptr_main) schedule(static)
136 for (i = 0; i < A->pattern->numOutput; i++) {
137 out->diag[i*9 ]=1.;
138 out->diag[i*9+1]=0.;
139 out->diag[i*9+2]=0.;
140 out->diag[i*9+3]=0.;
141 out->diag[i*9+4]=1.;
142 out->diag[i*9+5]=0.;
143 out->diag[i*9+6]=0.;
144 out->diag[i*9+7]=0.;
145 out->diag[i*9+8]=1.;
146 iptr_main=0;
147 /* find main diagonal */
148 for (iPtr = A->pattern->ptr[i]; iPtr < A->pattern->ptr[i + 1]; iPtr++) {
149 if (A->pattern->index[iPtr]==i) {
150 iptr_main=iPtr;
151 A11=A->val[iPtr*9 ];
152 A21=A->val[iPtr*9+1];
153 A31=A->val[iPtr*9+2];
154 A12=A->val[iPtr*9+3];
155 A22=A->val[iPtr*9+4];
156 A32=A->val[iPtr*9+5];
157 A13=A->val[iPtr*9+6];
158 A23=A->val[iPtr*9+7];
159 A33=A->val[iPtr*9+8];
160 D = A11*(A22*A33-A23*A32) + A12*(A31*A23-A21*A33) + A13*(A21*A32-A31*A22);
161 if (ABS(D)>0.) {
162 D=1./D;
163 out->diag[i*9 ]= (A22*A33-A23*A32)*D;
164 out->diag[i*9+1]= (A31*A23-A21*A33)*D;
165 out->diag[i*9+2]= (A21*A32-A31*A22)*D;
166 out->diag[i*9+3]= (A13*A32-A12*A33)*D;
167 out->diag[i*9+4]= (A11*A33-A31*A13)*D;
168 out->diag[i*9+5]= (A12*A31-A11*A32)*D;
169 out->diag[i*9+6]= (A12*A23-A13*A22)*D;
170 out->diag[i*9+7]= (A13*A21-A11*A23)*D;
171 out->diag[i*9+8]= (A11*A22-A12*A21)*D;
172 } else {
173 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getGSMPI: non-regular main diagonal block.");
174 }
175 break;
176 }
177 }
178 out->main_iptr[i]=iptr_main;
179 }
180 }
181 }
182
183 time_fac=Paso_timer()-time0;
184 }
185 }
186 if (Paso_noError()) {
187 if (verbose) {
188 printf("GS_MPI: %d color used \n",out->num_colors);
189 printf("timing: GS_MPI: coloring/elemination : %e/%e\n",time_color,time_fac);
190 }
191 return out;
192 } else {
193 Paso_Solver_GSMPI_free(out);
194 return NULL;
195 }
196 }
197
198 void Paso_Solver_GS_local(Paso_SystemMatrix* A, Paso_Solver_GS * gs, double * x, double * b);
199
200 /**************************************************************/
201
202 /* apply MPI versioned GS
203
204 in fact it solves Ax=b in two steps:
205 step1: among different nodes (MPI ranks), we use block Jacobi
206 x{k} = x{k-1} + D{-1}(b-A*x{k-1})
207 => D*x{k} = b - (E+F)x{k-1}
208 where matrix D is (let p be the number of nodes):
209 --------------------
210 |A1| | | ... | |
211 --------------------
212 | |A2| | ... | |
213 --------------------
214 | | |A3| ... | |
215 --------------------
216 | ... |
217 --------------------
218 | | | | ... |Ap|
219 --------------------
220 and Ai (i \in [1,p]) represents the mainBlock of matrix
221 A on node i. Matrix (E+F) is represented as the coupleBlock
222 of matrix A on each node (annotated as ACi).
223 Therefore, step1 can be turned into the following for node i:
224 => Ai * x{k} = b - ACi * x{k-1}
225 where both x{k} and b are the segment of x and b on node i,
226 and x{k-1} is the old segment values of x on all other nodes.
227
228 step2: inside node i, we use Gauss-Seidel
229 let b'= b - ACi * x{k-1} we have Ai * x{k} = b' for node i
230 by using symetrix Gauss-Seidel, this can be solved in a forward
231 phase and a backward phase:
232 forward phase: x{m} = diag(Ai){-1} (b' - E*x{m} - F*x{m-1})
233 backward phase: x{m+1} = diag(Ai){-1} (b' - F*{m+1} - E*x{m})
234 */
235
236 void Paso_Solver_solveGSMPI(Paso_SystemMatrix* A, Paso_Solver_GS * gs, double * x, double * b) {
237 register dim_t i;
238 dim_t n_block=gs->n_block;
239 dim_t n=gs->n;
240 dim_t sweeps=gs->sweeps;
241
242 /*xi{0} = 0
243 xi{1} = Ai{-1} * bi
244 xi{2} = Ai{-1} * (bi - ACi * xj{1})
245 ...
246 xi{k} = Ai{-1} * (bi - ACi * xj{k-1}) */
247 #pragma omp parallel for private(i) schedule(static)
248 for (i=0;i<n*n_block;++i) x[i]=0;
249
250 Paso_Solver_GS_local(A,gs,x,b);
251
252 if (sweeps > 1) {
253 double *new_b=MEMALLOC(n*n_block,double);
254 double *remote_x=NULL;
255
256 while (sweeps > 1) {
257 /* calculate new_b = b - ACi * x{k-1}, where x{k-1} are remote
258 value of x, which requires MPI communications */
259 #pragma omp parallel for private(i) schedule(static)
260 for (i=0;i<n*n_block;++i) new_b[i]=b[i];
261
262 if (A->col_coupleBlock->pattern->ptr!=NULL){
263 Paso_SystemMatrix_startCollect(A,x);
264 remote_x=Paso_SystemMatrix_finishCollect(A);
265 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(DBLE(-1),A->col_coupleBlock,remote_x,DBLE(1), new_b);
266 }
267
268 Paso_Solver_GS_local(A,gs,x,new_b);
269 sweeps --;
270 }
271 MEMFREE(new_b);
272 }
273
274 return;
275 }
276
277 /* Locally solve A'x=b, where A' is the mainBlock of global system matrix A */
278 void Paso_Solver_GS_local(Paso_SystemMatrix* A, Paso_Solver_GS * gs, double * x, double * b) {
279 dim_t n_block=gs->n_block;
280 dim_t n=gs->n;
281 double sum0, sum1, sum2, X0, X1, X2;
282 double *val=A->mainBlock->val;
283 double *diag=gs->diag;
284 index_t *ptr=gs->pattern->ptr;
285 index_t *index=gs->pattern->index;
286 dim_t i, j, iptr, xi, ai, xj, aj;
287 #ifdef _OPENMP
288 dim_t nt, len, rest, t, istart, iend;
289
290 nt=omp_get_max_threads();
291 len=n/nt;
292 rest=n-len*nt;
293 #endif
294 /* TO BE DONE: add handler to deal with the case "n" is too small
295 to be worth run in threads. */
296
297 #ifdef _OPENMP
298 /* calculate new_b = b - ACi * x{k-1}, where x{k-1} are x value
299 computed by other threads in previous sweep */
300 if (nt > 1) {
301 if (n_block == 1){
302 #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
303 for (t=0; t<nt; t++) {
304 istart=len*t+MIN(t,rest);
305 iend=istart+len+(t<rest ? 1:0);
306 for (i=istart; i<iend; i++){
307 sum0=b[i];
308 for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
309 j=index[iptr];
310 if (j<istart || j>=iend){
311 sum0 = sum0 - val[iptr] * x[j];
312 }
313 }
314 b[i]=sum0;
315 }
316 }
317 } else if (n_block == 2) {
318 #pragma omp parallel for private(t,istart,iend,i,xi,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
319 for (t=0; t<nt; t++) {
320 istart=len*t+MIN(t,rest);
321 iend=istart+len+(t<rest ? 1:0);
322 for (i=istart; i<iend; i++){
323 xi=2*i;
324 sum0=b[xi];
325 sum1=b[xi+1];
326 for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
327 j=index[iptr];
328 if (j<istart || j>=iend){
329 xj=2*j;
330 aj=4*iptr;
331 X0=x[xj];
332 X1=x[xj+1];
333 sum0 = sum0 - val[aj ]*X0 - val[aj+2]*X1;
334 sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
335 }
336 }
337 b[xi]=sum0;
338 b[xi+1]=sum1;
339 }
340 }
341 } else if (n_block == 3) {
342 #pragma omp parallel for private(t,istart,iend,i,xi,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
343 for (t=0; t<nt; t++) {
344 istart=len*t+MIN(t,rest);
345 iend=istart+len+(t<rest ? 1:0);
346 for (i=istart; i<iend; i++){
347 xi=3*i;
348 sum0=b[xi];
349 sum1=b[xi+1];
350 sum2=b[xi+2];
351 for (iptr=ptr[i]; iptr<ptr[i+1]; iptr++){
352 j=index[iptr];
353 if (j<istart || j>=iend){
354 xj=3*j;
355 aj=9*iptr;
356 X0=x[xj];
357 X1=x[xj+1];
358 X2=x[xj+2];
359 sum0 = sum0 - val[aj ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
360 sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
361 sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
362 }
363 }
364 b[xi]=sum0;
365 b[xi+1]=sum1;
366 b[xi+2]=sum2;
367 }
368 }
369 }
370 }
371 #endif
372
373 /* step1: forward iteration
374 x{k} = D{-1}(b - E*x{k} - F*x{k-1}) */
375 /* One Guass-Seidel itertion
376 In case of forward iteration x{k} = D{-1}(b - E*x{k} - F*x{k-1})
377 => into a loop (without coloring):
378 for i in [0,n-1] do
379 x_i = (1/a_ii) *
380 (b_i - \sum{j=0}{i-1}(a_ij*x_j) - \sum{j=i+1}{n-1}(a_ij*x_j))
381 where the first "\sum" sums up newly updated values of x elements
382 while the second "\sum" sums up previous (old) values of x elements.
383 In case of backward iteration x{k} = D{-1}(b - F*x{k} - E*x{k-1})
384 */
385 if (n_block == 1){
386 #ifdef _OPENMP
387 #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
388 for (t=0; t<nt; t++) {
389 istart=len*t+MIN(t,rest);
390 iend=istart+len+(t<rest ? 1:0);
391 for (i=istart; i<iend; i++){
392 #else
393 for (i=0; i<n; i++) {
394 #endif
395 sum0 = b[i];
396 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
397 j=index[iptr];
398 #ifdef _OPENMP
399 if (j >= istart && j < iend && i != j){
400 #else
401 if (i != j) {
402 #endif
403 sum0 = sum0 - val[iptr] * x[j];
404 }
405 }
406 x[i] = sum0*diag[i];
407 #ifdef _OPENMP
408 }
409 }
410 #else
411 }
412 #endif
413 } else if (n_block == 2) {
414 #ifdef _OPENMP
415 #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
416 for (t=0; t<nt; t++) {
417 istart=len*t+MIN(t,rest);
418 iend=istart+len+(t<rest ? 1:0);
419 for (i=istart; i<iend; i++){
420 #else
421 for (i=0; i<n; i++) {
422 #endif
423 xi=2*i;
424 ai=4*i;
425 sum0 = b[xi];
426 sum1 = b[xi+1];
427 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
428 j=index[iptr];
429 #ifdef _OPENMP
430 if (j >= istart && j < iend && i != j){
431 #else
432 if (i != j) {
433 #endif
434 xj=2*j;
435 aj=4*iptr;
436 X0=x[xj];
437 X1=x[xj+1];
438 sum0 = sum0 - val[aj ]*X0 - val[aj+2]*X1;
439 sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
440 }
441 }
442 x[xi ]=diag[ai ]*sum0 + diag[ai+2]*sum1;
443 x[xi+1]=diag[ai+1]*sum0 + diag[ai+3]*sum1;
444 #ifdef _OPENMP
445 }
446 }
447 #else
448 }
449 #endif
450 } else if (n_block == 3) {
451 #ifdef _OPENMP
452 #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
453 for (t=0; t<nt; t++) {
454 istart=len*t+MIN(t,rest);
455 iend=istart+len+(t<rest ? 1:0);
456 for (i=istart; i<iend; i++){
457 #else
458 for (i=0; i<n; i++) {
459 #endif
460 xi=3*i;
461 ai=9*i;
462 sum0 = b[xi];
463 sum1 = b[xi+1];
464 sum2 = b[xi+2];
465 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
466 j=index[iptr];
467 #ifdef _OPENMP
468 if (j >= istart && j < iend && i != j){
469 #else
470 if (i != j) {
471 #endif
472 xj=3*j;
473 aj=9*iptr;
474 X0=x[xj];
475 X1=x[xj+1];
476 X2=x[xj+2];
477 sum0 = sum0 - val[aj ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
478 sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
479 sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
480 }
481 }
482 x[xi ] = diag[ai ]*sum0 + diag[ai+3]*sum1 + diag[ai+6]*sum2;
483 x[xi+1] = diag[ai+1]*sum0 + diag[ai+4]*sum1 + diag[ai+7]*sum2;
484 x[xi+2] = diag[ai+2]*sum0 + diag[ai+5]*sum1 + diag[ai+8]*sum2;
485 #ifdef _OPENMP
486 }
487 }
488 #else
489 }
490 #endif
491 }
492
493 /* step2: backward iteration
494 x{k} = D{-1}(b - F*x{k} - E*x{k-1}) */
495 if (n_block == 1){
496 #ifdef _OPENMP
497 #pragma omp parallel for private(t,istart,iend,i,sum0,iptr,j) schedule(static)
498 for (t=nt-1; t>=0; t--) {
499 istart=len*t+MIN(t,rest);
500 iend=istart+len+(t<rest ? 1:0);
501 for (i=iend-1; i>=istart; i--){
502 #else
503 for (i=n-1; i>=0; i--) {
504 #endif
505 sum0 = b[i];
506 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
507 j=index[iptr];
508 #ifdef _OPENMP
509 if (j >= istart && j < iend && i != j){
510 #else
511 if (i != j) {
512 #endif
513 sum0 = sum0 - val[iptr] * x[j];
514 }
515 }
516 x[i] = sum0*diag[i];
517 #ifdef _OPENMP
518 }
519 }
520 #else
521 }
522 #endif
523 } else if (n_block == 2) {
524 #ifdef _OPENMP
525 #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,iptr,j,xj,aj,X0,X1) schedule(static)
526 for (t=nt-1; t>=0; t--) {
527 istart=len*t+MIN(t,rest);
528 iend=istart+len+(t<rest ? 1:0);
529 for (i=iend-1; i>=istart; i--){
530 #else
531 for (i=n-1; i>=0; i--) {
532 #endif
533 xi=2*i;
534 ai=4*i;
535 sum0 = b[xi];
536 sum1 = b[xi+1];
537 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
538 j=index[iptr];
539 #ifdef _OPENMP
540 if (j >= istart && j < iend && i != j){
541 #else
542 if (i != j) {
543 #endif
544 xj=2*j;
545 aj=4*iptr;
546 X0=x[xj];
547 X1=x[xj+1];
548 sum0 = sum0 - val[aj ]*X0 - val[aj+2]*X1;
549 sum1 = sum1 - val[aj+1]*X0 - val[aj+3]*X1;
550 }
551 }
552 x[xi ]=diag[ai ]*sum0 + diag[ai+2]*sum1;
553 x[xi+1]=diag[ai+1]*sum0 + diag[ai+3]*sum1;
554 #ifdef _OPENMP
555 }
556 }
557 #else
558 }
559 #endif
560 } else if (n_block == 3) {
561 #ifdef _OPENMP
562 #pragma omp parallel for private(t,istart,iend,i,xi,ai,sum0,sum1,sum2,iptr,j,xj,aj,X0,X1,X2) schedule(static)
563 for (t=nt-1; t>=0; t--) {
564 istart=len*t+MIN(t,rest);
565 iend=istart+len+(t<rest ? 1:0);
566 for (i=iend-1; i>=istart; i--){
567 #else
568 for (i=n-1; i>=0; i--) {
569 #endif
570 xi=3*i;
571 ai=9*i;
572 sum0 = b[xi];
573 sum1 = b[xi+1];
574 sum2 = b[xi+2];
575 for (iptr=ptr[i]; iptr<ptr[i+1]; ++iptr) {
576 j=index[iptr];
577 #ifdef _OPENMP
578 if (j >= istart && j < iend && i != j){
579 #else
580 if (i != j) {
581 #endif
582 xj=3*j;
583 aj=9*iptr;
584 X0=x[xj];
585 X1=x[xj+1];
586 X2=x[xj+2];
587 sum0 = sum0 - val[aj ]*X0 - val[aj+3]*X1 - val[aj+6]*X2;
588 sum1 = sum1 - val[aj+1]*X0 - val[aj+4]*X1 - val[aj+7]*X2;
589 sum2 = sum2 - val[aj+2]*X0 - val[aj+5]*X1 - val[aj+8]*X2;
590 }
591 }
592 x[xi ] = diag[ai ]*sum0 + diag[ai+3]*sum1 + diag[ai+6]*sum2;
593 x[xi+1] = diag[ai+1]*sum0 + diag[ai+4]*sum1 + diag[ai+7]*sum2;
594 x[xi+2] = diag[ai+2]*sum0 + diag[ai+5]*sum1 + diag[ai+8]*sum2;
595 #ifdef _OPENMP
596 }
597 }
598 #else
599 }
600 #endif
601 }
602
603 return;
604 }
605

  ViewVC Help
Powered by ViewVC 1.1.26