/[escript]/trunk/paso/src/Solver_AMG.c
ViewVC logotype

Contents of /trunk/paso/src/Solver_AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3043 - (show annotations)
Fri Jun 18 04:45:14 2010 UTC (8 years, 10 months ago) by artak
File MIME type: text/plain
File size: 25059 byte(s)
Problem with latest MKL should be fixed now. The problem was solving unrolled block matrix with MKL.
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: AMG preconditioner */
18
19 /**************************************************************/
20
21 /* Author: artak@uq.edu.au */
22
23 /**************************************************************/
24
25 #include "Paso.h"
26 #include "Solver.h"
27 #include "Options.h"
28 #include "PasoUtil.h"
29 #include "UMFPACK.h"
30 #include "MKL.h"
31 #include "SystemMatrix.h"
32 #include "Pattern_coupling.h"
33
34 /**************************************************************/
35
36 /* free all memory used by AMG */
37
38 void Paso_Solver_AMG_System_free(Paso_Solver_AMG_System * in) {
39 dim_t i;
40 if (in!=NULL) {
41 for (i=0;i<in->block_size;++i) {
42 Paso_Solver_AMG_free(in->amgblock[i]);
43 Paso_SparseMatrix_free(in->block[i]);
44 }
45 MEMFREE(in);
46 }
47 }
48
49
50 /* free all memory used by AMG */
51
52 void Paso_Solver_AMG_free(Paso_Solver_AMG * in) {
53 if (in!=NULL) {
54
55 if(in->Smoother->ID==PASO_JACOBI)
56 Paso_Solver_Jacobi_free(in->Smoother->Jacobi);
57 else if (in->Smoother->ID==PASO_GS)
58 Paso_Solver_GS_free(in->Smoother->GS);
59 MEMFREE(in->Smoother);
60
61 Paso_SparseMatrix_free(in->A_FC);
62 Paso_SparseMatrix_free(in->A_FF);
63 Paso_SparseMatrix_free(in->W_FC);
64 Paso_SparseMatrix_free(in->A_CF);
65 Paso_SparseMatrix_free(in->P);
66 Paso_SparseMatrix_free(in->R);
67 Paso_SparseMatrix_free(in->A);
68 if(in->coarsest_level==TRUE) {
69 #ifdef MKL
70 Paso_MKL_free1(in->AOffset1);
71 Paso_SparseMatrix_free(in->AOffset1);
72 Paso_SparseMatrix_free(in->AUnrolled);
73 #else
74 #ifdef UMFPACK
75 Paso_UMFPACK1_free((Paso_UMFPACK_Handler*)(in->solver));
76 Paso_SparseMatrix_free(in->AUnrolled);
77 #endif
78 #endif
79 }
80 MEMFREE(in->rows_in_F);
81 MEMFREE(in->rows_in_C);
82 MEMFREE(in->mask_F);
83 MEMFREE(in->mask_C);
84 MEMFREE(in->x_F);
85 MEMFREE(in->b_F);
86 MEMFREE(in->x_C);
87 MEMFREE(in->b_C);
88 in->solver=NULL;
89 Paso_Solver_AMG_free(in->AMG_of_Coarse);
90 MEMFREE(in->b_C);
91 MEMFREE(in);
92 }
93 }
94
95 /**************************************************************/
96
97 /* constructs the block-block factorization of
98
99 [ A_FF A_FC ]
100 A_p=
101 [ A_CF A_FF ]
102
103 to
104
105 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
106 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
107
108
109 where S=A_FF-ACF*invA_FF*A_FC within the shape of S
110
111 then AMG is applied to S again until S becomes empty
112
113 */
114 Paso_Solver_AMG* Paso_Solver_getAMG(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {
115 Paso_Solver_AMG* out=NULL;
116 /*
117 Paso_Pattern* temp1=NULL;
118 Paso_Pattern* temp2=NULL;
119 */
120 bool_t verbose=options->verbose;
121 bool_t timing=0;
122
123 dim_t n=A_p->numRows;
124 dim_t n_block=A_p->row_block_size;
125 index_t* mis_marker=NULL;
126 index_t* counter=NULL;
127 /*index_t iPtr,*index, *where_p;*/
128 dim_t i,j;
129 Paso_SparseMatrix * A_c=NULL;
130 double time0=0;
131 Paso_SparseMatrix * Atemp=NULL;
132 double sparsity=0;
133
134 /*
135 double *temp,*temp_1;
136 double S;
137 index_t iptr;
138 */
139
140 /*char filename[8];*/
141
142 /*
143 sprintf(filename,"AMGLevel%d",level);
144
145 Paso_SparseMatrix_saveMM(A_p,filename);
146 */
147
148 /*Make sure we have block sizes 1*/
149 /*if (A_p->col_block_size>1) {
150 Paso_setError(TYPE_ERROR,"Paso_Solver_getAMG: AMG requires column block size 1.");
151 return NULL;
152 }
153 if (A_p->row_block_size>1) {
154 Paso_setError(TYPE_ERROR,"Paso_Solver_getAMG: AMG requires row block size 1.");
155 return NULL;
156 }*/
157 out=MEMALLOC(1,Paso_Solver_AMG);
158 out->Smoother=MEMALLOC(1,Paso_Solver_Smoother);
159 /* identify independend set of rows/columns */
160 mis_marker=TMPMEMALLOC(n,index_t);
161 counter=TMPMEMALLOC(n,index_t);
162 if ( !( Paso_checkPtr(mis_marker) || Paso_checkPtr(counter) || Paso_checkPtr(out)) ) {
163 out->AMG_of_Coarse=NULL;
164 out->A_FF=NULL;
165 out->A_FC=NULL;
166 out->A_CF=NULL;
167 out->W_FC=NULL;
168 out->P=NULL;
169 out->R=NULL;
170 out->rows_in_F=NULL;
171 out->rows_in_C=NULL;
172 out->mask_F=NULL;
173 out->mask_C=NULL;
174 out->x_F=NULL;
175 out->b_F=NULL;
176 out->x_C=NULL;
177 out->b_C=NULL;
178 out->A=Paso_SparseMatrix_getReference(A_p);
179 out->AUnrolled=NULL;
180 out->AOffset1=NULL;
181 out->solver=NULL;
182 out->Smoother->ID=options->smoother;
183 out->Smoother->Jacobi=NULL;
184 out->Smoother->GS=NULL;
185 /*out->GS=Paso_Solver_getGS(A_p,verbose);*/
186 out->level=level;
187 out->n=n;
188 out->n_F=n+1;
189 out->n_block=n_block;
190 out->post_sweeps=options->post_sweeps;
191 out->pre_sweeps=options->pre_sweeps;
192
193 sparsity=(A_p->len*1.)/(1.*A_p->numRows*A_p->numCols);
194
195 if (verbose) fprintf(stdout,"Stats: Sparsity of the Coarse Matrix with %d non-zeros (%d,%d) in level %d is %.6f\n",A_p->len,A_p->numRows,A_p->numCols,level,sparsity);
196
197
198 if(sparsity>0.05) {
199 level=0;
200 }
201
202
203 if (level==0 || n<=options->min_coarse_matrix_size) {
204 out->coarsest_level=TRUE;
205 /*out->GS=Paso_Solver_getJacobi(A_p);*/
206
207 #ifdef MKL
208 out->AUnrolled=Paso_SparseMatrix_unroll(A_p);
209 out->AOffset1=Paso_SparseMatrix_alloc(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, out->AUnrolled->pattern,1,1, FALSE);
210 #pragma omp parallel for private(i) schedule(static)
211 for (i=0;i<out->A->len;++i) {
212 out->AOffset1->val[i]=out->AUnrolled->val[i];
213 }
214 #else
215 #ifdef UMFPACK
216 out->AUnrolled=Paso_SparseMatrix_unroll(A_p);
217 /*Paso_SparseMatrix_saveMM(out->AUnrolled,"AUnroll.mat");
218 Paso_SparseMatrix_saveMM(A_p,"Aorg.mat");
219 */
220 #else
221 if (options->smoother == PASO_JACOBI)
222 out->Smoother->Jacobi=Paso_Solver_getJacobi(A_p);
223 else if (options->smoother == PASO_GS)
224 out->Smoother->GS=Paso_Solver_getGS(A_p,verbose);
225 #endif
226 #endif
227
228 } else {
229 out->coarsest_level=FALSE;
230
231 if (options->smoother == PASO_JACOBI)
232 out->Smoother->Jacobi=Paso_Solver_getJacobi(A_p);
233 else if (options->smoother == PASO_GS)
234 out->Smoother->GS=Paso_Solver_getGS(A_p,verbose);
235
236 /* identify independend set of rows/columns */
237 #pragma omp parallel for private(i) schedule(static)
238 for (i=0;i<n;++i) mis_marker[i]=-1;
239
240 /*mesuring coarsening time */
241 time0=Paso_timer();
242
243 if (options->coarsening_method == PASO_YAIR_SHAPIRA_COARSENING) {
244 Paso_Pattern_YS(A_p,mis_marker,options->coarsening_threshold);
245 }
246 else if (options->coarsening_method == PASO_RUGE_STUEBEN_COARSENING) {
247 Paso_Pattern_RS(A_p,mis_marker,options->coarsening_threshold);
248 }
249 else if (options->coarsening_method == PASO_AGGREGATION_COARSENING) {
250 Paso_Pattern_Aggregiation(A_p,mis_marker,options->coarsening_threshold);
251 }
252 else if (options->coarsening_method == PASO_STANDARD_COARSENING) {
253 Paso_Pattern_Standard_Block(A_p,mis_marker,options->coarsening_threshold);
254 }
255 else {
256 /*Default coarseneing*/
257 Paso_Pattern_Standard_Block(A_p,mis_marker,options->coarsening_threshold);
258 /*Paso_Pattern_Read("Standard.spl",n,mis_marker);*/
259 /*Paso_Pattern_YS(A_p,mis_marker,options->coarsening_threshold);*/
260 /*Paso_Pattern_greedy_Agg(A_p,mis_marker,options->coarsening_threshold);*/
261 /*Paso_Pattern_greedy(A_p->pattern,mis_marker);*/
262 /*Paso_Pattern_Aggregiation(A_p,mis_marker,options->coarsening_threshold);*/
263
264 }
265
266 if (timing) fprintf(stdout,"timing: Profilining for level %d:\n",level);
267
268 time0=Paso_timer()-time0;
269 if (timing) fprintf(stdout,"timing: Coarsening: %e\n",time0);
270
271 #pragma omp parallel for private(i) schedule(static)
272 for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
273
274 out->n_F=Paso_Util_cumsum(n,counter);
275
276 if (out->n_F==0) {
277 out->coarsest_level=TRUE;
278 level=0;
279 if (verbose) {
280 /*printf("AMG coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/
281 printf("AMG coarsening does not eliminate any of the unknowns, switching to Jacobi preconditioner.\n");
282 }
283 }
284 else if (out->n_F==n) {
285 out->coarsest_level=TRUE;
286 level=0;
287 if (verbose) {
288 /*printf("AMG coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/
289 printf("AMG coarsening eliminates all of the unknowns, switching to Jacobi preconditioner.\n");
290
291 }
292 } else {
293
294 if (Paso_noError()) {
295
296 /*#pragma omp parallel for private(i) schedule(static)
297 for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
298 out->n_F=Paso_Util_cumsum(n,counter);
299 */
300
301 out->mask_F=MEMALLOC(n,index_t);
302 out->rows_in_F=MEMALLOC(out->n_F,index_t);
303 if (! (Paso_checkPtr(out->mask_F) || Paso_checkPtr(out->rows_in_F) ) ) {
304 /* creates an index for F from mask */
305 #pragma omp parallel for private(i) schedule(static)
306 for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
307 #pragma omp parallel for private(i) schedule(static)
308 for (i = 0; i < n; ++i) {
309 if (mis_marker[i]) {
310 out->rows_in_F[counter[i]]=i;
311 out->mask_F[i]=counter[i];
312 } else {
313 out->mask_F[i]=-1;
314 }
315 }
316
317 }
318 }
319
320 /* if(level==1) {
321 printf("##TOTAL: %d, ELIMINATED: %d\n",n,out->n_F);
322 for (i = 0; i < n; ++i) {
323 printf("##%d %d\n",i,!mis_marker[i]);
324 }
325 }
326 */
327
328 /*check whether coarsening process actually makes sense to continue.
329 if coarse matrix at least smaller by 30% then continue, otherwise we stop.*/
330 if ((out->n_F*100/n)<30) {
331 level=1;
332 }
333
334 if ( Paso_noError()) {
335 out->n_C=n-out->n_F;
336 out->rows_in_C=MEMALLOC(out->n_C,index_t);
337 out->mask_C=MEMALLOC(n,index_t);
338 if (! (Paso_checkPtr(out->mask_C) || Paso_checkPtr(out->rows_in_C) ) ) {
339 /* creates an index for C from mask */
340 #pragma omp parallel for private(i) schedule(static)
341 for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
342 Paso_Util_cumsum(n,counter);
343 #pragma omp parallel for private(i) schedule(static)
344 for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
345 #pragma omp parallel for private(i) schedule(static)
346 for (i = 0; i < n; ++i) {
347 if (! mis_marker[i]) {
348 out->rows_in_C[counter[i]]=i;
349 out->mask_C[i]=counter[i];
350 } else {
351 out->mask_C[i]=-1;
352 }
353 }
354 }
355 }
356 if ( Paso_noError()) {
357 /* get A_FF block: */
358 /*
359 out->A_FF=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_F,out->rows_in_F,out->mask_F);
360 out->A_CF=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_F,out->rows_in_C,out->mask_F);
361 out->A_FC=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C);
362 */
363
364 /*Compute W_FC*/
365 /*initialy W_FC=A_FC*/
366 out->W_FC=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C);
367
368 /*sprintf(filename,"W_FCbefore_%d",level);
369 Paso_SparseMatrix_saveMM(out->W_FC,filename);
370 */
371 /* for (i = 0; i < n; ++i) {
372 printf("##mis_marker[%d]=%d\n",i,mis_marker[i]);
373 }
374 */
375 time0=Paso_timer();
376 Paso_SparseMatrix_updateWeights(A_p,out->W_FC,mis_marker);
377 time0=Paso_timer()-time0;
378 if (timing) fprintf(stdout,"timing: updateWeights: %e\n",time0);
379
380 /*
381 sprintf(filename,"W_FCafter_%d",level);
382 Paso_SparseMatrix_saveMM(out->W_FC,filename);
383 */
384
385 /* get Prolongation and Restriction */
386 time0=Paso_timer();
387 out->P=Paso_SparseMatrix_getProlongation(out->W_FC,mis_marker);
388 time0=Paso_timer()-time0;
389 if (timing) fprintf(stdout,"timing: getProlongation: %e\n",time0);
390 /*out->P=Paso_SparseMatrix_loadMM_toCSR("P1.mtx");*/
391
392 /*
393 sprintf(filename,"P_%d",level);
394 Paso_SparseMatrix_saveMM(out->P,filename);
395 */
396
397 time0=Paso_timer();
398 out->R=Paso_SparseMatrix_getRestriction(out->P);
399 time0=Paso_timer()-time0;
400 if (timing) fprintf(stdout,"timing: getRestriction: %e\n",time0);
401 /*out->R=Paso_SparseMatrix_loadMM_toCSR("R1.mtx");*/
402
403 /*
404 sprintf(filename,"R_%d",level);
405 Paso_SparseMatrix_saveMM(out->R,filename);
406 */
407
408 }
409 if ( Paso_noError()) {
410
411 time0=Paso_timer();
412
413 Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P);
414
415 A_c=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp);
416
417 /*A_c=Paso_SparseMatrix_loadMM_toCSR("A_C1.mtx");*/
418
419 Paso_SparseMatrix_free(Atemp);
420
421 /*A_c=Paso_Solver_getCoarseMatrix(A_p,out->R,out->P);*/
422 time0=Paso_timer()-time0;
423 if (timing) fprintf(stdout,"timing: getCoarseMatrix: %e\n",time0);
424
425
426 /*Paso_Solver_getCoarseMatrix(A_c, A_p,out->R,out->P);*/
427 /*
428 sprintf(filename,"A_C_%d",level);
429 Paso_SparseMatrix_saveMM(A_c,filename);
430 */
431
432 out->AMG_of_Coarse=Paso_Solver_getAMG(A_c,level-1,options);
433 }
434
435 /* allocate work arrays for AMG application */
436 if (Paso_noError()) {
437 /*
438 out->x_F=MEMALLOC(n_block*out->n_F,double);
439 out->b_F=MEMALLOC(n_block*out->n_F,double);
440 */
441 out->x_C=MEMALLOC(n_block*out->n_C,double);
442 out->b_C=MEMALLOC(n_block*out->n_C,double);
443
444 /*if (! (Paso_checkPtr(out->x_F) || Paso_checkPtr(out->b_F) || Paso_checkPtr(out->x_C) || Paso_checkPtr(out->b_C) ) ) {*/
445 if ( ! ( Paso_checkPtr(out->x_C) || Paso_checkPtr(out->b_C) ) ) {
446
447 /*
448 #pragma omp parallel for private(i) schedule(static)
449 for (i = 0; i < out->n_F; ++i) {
450 out->x_F[i]=0.;
451 out->b_F[i]=0.;
452 }
453 */
454
455 #pragma omp parallel for private(i,j) schedule(static)
456 for (i = 0; i < out->n_C; ++i) {
457 for(j=0;j<n_block;++j) {
458 out->x_C[i*n_block+j]=0.;
459 out->b_C[i*n_block+j]=0.;
460 }
461 }
462 }
463 }
464 Paso_SparseMatrix_free(A_c);
465 }
466 }
467 }
468 TMPMEMFREE(mis_marker);
469 TMPMEMFREE(counter);
470
471 if (Paso_noError()) {
472 if (verbose && level>0 && !out->coarsest_level) {
473 printf("AMG: level: %d: %d unknowns eliminated. %d left.\n",level, out->n_F,out->n_C);
474 }
475 return out;
476 } else {
477 Paso_Solver_AMG_free(out);
478 return NULL;
479 }
480 }
481
482 /**************************************************************/
483
484 /* apply AMG precondition b-> x
485
486 in fact it solves
487
488 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FC ] [ x_F ] = [b_F]
489 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
490
491 in the form
492
493 b->[b_F,b_C]
494 x_F=invA_FF*b_F
495 b_C=b_C-A_CF*x_F
496 x_C=AMG(b_C)
497 b_F=b_F-A_FC*x_C
498 x_F=invA_FF*b_F
499 x<-[x_F,x_C]
500
501 should be called within a parallel region
502 barrier synconization should be performed to make sure that the input vector available
503
504 */
505
506 void Paso_Solver_solveAMG(Paso_Solver_AMG * amg, double * x, double * b) {
507 dim_t i,j;
508 double time0=0;
509 double *r=NULL, *x0=NULL;
510 bool_t timing=0;
511
512 dim_t post_sweeps=amg->post_sweeps;
513 dim_t pre_sweeps=amg->pre_sweeps;
514
515 #ifdef UMFPACK
516 Paso_UMFPACK_Handler * ptr=NULL;
517 #endif
518
519 r=MEMALLOC(amg->n*amg->n_block,double);
520 x0=MEMALLOC(amg->n*amg->n_block,double);
521
522 if (amg->coarsest_level) {
523
524 time0=Paso_timer();
525 /*If all unknown are eliminated then Jacobi is the best preconditioner*/
526
527 if (amg->n_F==0 || amg->n_F==amg->n) {
528 if(amg->Smoother->ID==PASO_JACOBI)
529 Paso_Solver_solveJacobi(amg->Smoother->Jacobi,x,b);
530 else if (amg->Smoother->ID==PASO_GS)
531 Paso_Solver_solveGS(amg->Smoother->GS,x,b);
532 }
533 else {
534 #ifdef MKL
535 Paso_MKL1(amg->AOffset1,x,b,timing);
536 #else
537 #ifdef UMFPACK
538 ptr=(Paso_UMFPACK_Handler *)(amg->solver);
539 Paso_UMFPACK1(&ptr,amg->AUnrolled,x,b,timing);
540 amg->solver=(void*) ptr;
541 #else
542 if(amg->Smoother->ID==PASO_JACOBI)
543 Paso_Solver_solveJacobi(amg->Smoother->Jacobi,x,b);
544 else if (amg->Smoother->ID==PASO_GS)
545 Paso_Solver_solveGS(amg->Smoother->GS,x,b);
546 #endif
547 #endif
548 }
549
550 time0=Paso_timer()-time0;
551 if (timing) fprintf(stdout,"timing: DIRECT SOLVER: %e\n",time0);
552
553 } else {
554 /* presmoothing */
555 time0=Paso_timer();
556 if(amg->Smoother->ID==PASO_JACOBI)
557 Paso_Solver_solveJacobi(amg->Smoother->Jacobi,x,b);
558 else if (amg->Smoother->ID==PASO_GS)
559 Paso_Solver_solveGS(amg->Smoother->GS,x,b);
560
561 /***********/
562 if (pre_sweeps>1) {
563 #pragma omp parallel for private(i,j) schedule(static)
564 for (i=0;i<amg->n;++i) {
565 for (j=0;j<amg->n_block;++j) {
566 r[i*amg->n_block+j]=b[i*amg->n_block+j];
567 }
568 }
569 }
570
571 while(pre_sweeps>1) {
572 #pragma omp parallel for private(i,j) schedule(static)
573 for (i=0;i<amg->n;++i) {
574 for (j=0;j<amg->n_block;++j) {
575 r[i*amg->n_block+j]+=b[i*amg->n_block+j];
576 }
577 }
578
579
580 /* Compute the residual r=r-Ax*/
581 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
582 /* Go round again*/
583
584 if(amg->Smoother->ID==PASO_JACOBI)
585 Paso_Solver_solveJacobi(amg->Smoother->Jacobi,x,r);
586 else if (amg->Smoother->ID==PASO_GS)
587 Paso_Solver_solveGS(amg->Smoother->GS,x,r);
588
589 pre_sweeps-=1;
590 }
591 /***********/
592
593 time0=Paso_timer()-time0;
594 if (timing) fprintf(stdout,"timing: Presmooting: %e\n",time0);
595 /* end of presmoothing */
596
597 time0=Paso_timer();
598 #pragma omp parallel for private(i,j) schedule(static)
599 for (i=0;i<amg->n;++i) {
600 for (j=0;j<amg->n_block;++j) {
601 r[i*amg->n_block+j]=b[i*amg->n_block+j];
602 }
603 }
604
605 /*r=b-Ax*/
606 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
607
608 /* b_c = R*r */
609 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,r,0.,amg->b_C);
610
611 time0=Paso_timer()-time0;
612 if (timing) fprintf(stdout,"timing: Before next level: %e\n",time0);
613
614 /* x_C=AMG(b_C) */
615 Paso_Solver_solveAMG(amg->AMG_of_Coarse,amg->x_C,amg->b_C);
616
617 time0=Paso_timer();
618
619 /* x_0 = P*x_c */
620 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,0.,x0);
621
622 /* x=x+x0 */
623 #pragma omp parallel for private(i,j) schedule(static)
624 for (i=0;i<amg->n;++i) {
625 for (j=0;j<amg->n_block;++j) {
626 x[i*amg->n_block+j]+=x0[i*amg->n_block+j];
627 }
628 }
629
630 /*postsmoothing*/
631
632 time0=Paso_timer();
633 #pragma omp parallel for private(i,j) schedule(static)
634 for (i=0;i<amg->n;++i) {
635 for (j=0;j<amg->n_block;++j) {
636 r[i*amg->n_block+j]=b[i*amg->n_block+j];
637 }
638 }
639
640 /*r=b-Ax */
641 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
642 if(amg->Smoother->ID==PASO_JACOBI)
643 Paso_Solver_solveJacobi(amg->Smoother->Jacobi,x0,r);
644 else if (amg->Smoother->ID==PASO_GS)
645 Paso_Solver_solveGS(amg->Smoother->GS,x0,r);
646
647 #pragma omp parallel for private(i,j) schedule(static)
648 for (i=0;i<amg->n;++i) {
649 for (j=0;j<amg->n_block;++j) {
650 x[i*amg->n_block+j]+=x0[i*amg->n_block+j];
651 }
652 }
653
654 /***************/
655 while(post_sweeps>1) {
656
657 #pragma omp parallel for private(i,j) schedule(static)
658 for (i=0;i<amg->n;++i) {
659 for (j=0;j<amg->n_block;++j) {
660 r[i*amg->n_block+j]=b[i*amg->n_block+j];
661 }
662 }
663
664 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
665
666 if(amg->Smoother->ID==PASO_JACOBI)
667 Paso_Solver_solveJacobi(amg->Smoother->Jacobi,x0,r);
668 else if (amg->Smoother->ID==PASO_GS)
669 Paso_Solver_solveGS(amg->Smoother->GS,x0,r);
670
671 #pragma omp parallel for private(i,j) schedule(static)
672 for (i=0;i<amg->n;++i) {
673 for (j=0;j<amg->n_block;++j) {
674 x[i*amg->n_block+j]+=x0[i*amg->n_block+j];
675 }
676 }
677 post_sweeps-=1;
678 }
679 /**************/
680
681 time0=Paso_timer()-time0;
682 if (timing) fprintf(stdout,"timing: Postsmoothing: %e\n",time0);
683
684 /*end of postsmoothing*/
685
686 }
687 MEMFREE(r);
688 MEMFREE(x0);
689
690 return;
691 }

  ViewVC Help
Powered by ViewVC 1.1.26