/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Contents of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3193 - (show annotations)
Tue Sep 21 06:56:44 2010 UTC (9 years ago) by gross
File MIME type: text/plain
File size: 19866 byte(s)
more clean up in AMG
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: AMG preconditioner */
18
19 /**************************************************************/
20
21 /* Author: artak@uq.edu.au */
22
23 /**************************************************************/
24
25 #include "Paso.h"
26 #include "Preconditioner.h"
27 #include "Options.h"
28 #include "PasoUtil.h"
29 #include "UMFPACK.h"
30 #include "MKL.h"
31 #include "Coarsening.h"
32
33 /**************************************************************/
34
35 /* free all memory used by AMG */
36
37 void Paso_Preconditioner_LocalAMG_free(Paso_Preconditioner_LocalAMG * in) {
38 if (in!=NULL) {
39 Paso_Preconditioner_LocalSmoother_free(in->Smoother);
40
41 /*=========================*/
42 Paso_SparseMatrix_free(in->A_FC);
43 Paso_SparseMatrix_free(in->A_FF);
44 Paso_SparseMatrix_free(in->W_FC);
45 Paso_SparseMatrix_free(in->A_CF);
46 Paso_SparseMatrix_free(in->P);
47 Paso_SparseMatrix_free(in->R);
48 Paso_SparseMatrix_free(in->A);
49 if(in->coarsest_level==TRUE) {
50 #ifdef MKL
51 Paso_MKL_free1(in->AOffset1);
52 Paso_SparseMatrix_free(in->AOffset1);
53 Paso_SparseMatrix_free(in->AUnrolled);
54 #else
55 #ifdef UMFPACK
56 Paso_UMFPACK1_free((Paso_UMFPACK_Handler*)(in->solver));
57 Paso_SparseMatrix_free(in->AUnrolled);
58 #endif
59 #endif
60 }
61 MEMFREE(in->rows_in_F);
62 MEMFREE(in->rows_in_C);
63 MEMFREE(in->mask_F);
64 MEMFREE(in->mask_C);
65 MEMFREE(in->x_F);
66 MEMFREE(in->b_F);
67 MEMFREE(in->x_C);
68 MEMFREE(in->b_C);
69 in->solver=NULL;
70 Paso_Preconditioner_LocalAMG_free(in->AMG_of_Coarse);
71 MEMFREE(in->b_C);
72 MEMFREE(in);
73 }
74 }
75
76 /**************************************************************/
77
78 /* constructs the block-block factorization of
79
80 [ A_FF A_FC ]
81 A_p=
82 [ A_CF A_FF ]
83
84 to
85
86 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
87 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
88
89
90 where S=A_FF-ACF*invA_FF*A_FC within the shape of S
91
92 then AMG is applied to S again until S becomes empty
93
94 */
95 Paso_Preconditioner_LocalAMG* Paso_Preconditioner_LocalAMG_alloc(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {
96 Paso_Preconditioner_LocalAMG* out=NULL;
97 /*
98 Paso_Pattern* temp1=NULL;
99 Paso_Pattern* temp2=NULL;
100 */
101 bool_t verbose=options->verbose;
102 bool_t timing=0;
103
104 const dim_t n=A_p->numRows;
105 const dim_t n_block=A_p->row_block_size;
106
107
108 index_t* mis_marker=NULL;
109 index_t* counter=NULL;
110 /*index_t iPtr,*index, *where_p;*/
111 dim_t i,j;
112 Paso_SparseMatrix * A_c=NULL;
113 double time0=0;
114 Paso_SparseMatrix * Atemp=NULL;
115 double sparsity=0;
116
117 /*
118 double *temp,*temp_1;
119 double S;
120 index_t iptr;
121 */
122
123 /*char filename[8];*/
124
125 /*
126 sprintf(filename,"AMGLevel%d",level);
127
128 Paso_SparseMatrix_saveMM(A_p,filename);
129 */
130
131 /*Make sure we have block sizes 1*/
132 /*if (A_p->col_block_size>1) {
133 Paso_setError(TYPE_ERROR,"Paso_Solver_getAMG: AMG requires column block size 1.");
134 return NULL;
135 }
136 if (A_p->row_block_size>1) {
137 Paso_setError(TYPE_ERROR,"Paso_Solver_getAMG: AMG requires row block size 1.");
138 return NULL;
139 }*/
140 out=MEMALLOC(1,Paso_Preconditioner_LocalAMG);
141
142
143 /* identify independend set of rows/columns */
144 mis_marker=TMPMEMALLOC(n,index_t);
145 counter=TMPMEMALLOC(n,index_t);
146 if ( !( Paso_checkPtr(mis_marker) || Paso_checkPtr(counter) || Paso_checkPtr(out)) ) {
147
148
149 out->post_sweeps=options->post_sweeps;
150 out->pre_sweeps=options->pre_sweeps;
151
152 out->AMG_of_Coarse=NULL;
153 out->A_FF=NULL;
154 out->A_FC=NULL;
155 out->A_CF=NULL;
156 out->W_FC=NULL;
157 out->P=NULL;
158 out->R=NULL;
159 out->rows_in_F=NULL;
160 out->rows_in_C=NULL;
161 out->mask_F=NULL;
162 out->mask_C=NULL;
163 out->x_F=NULL;
164 out->b_F=NULL;
165 out->x_C=NULL;
166 out->b_C=NULL;
167 out->A=Paso_SparseMatrix_getReference(A_p);
168 out->AUnrolled=NULL;
169 out->AOffset1=NULL;
170 out->solver=NULL;
171 out->Smoother=NULL;
172 out->level=level;
173 out->n=n;
174 out->n_F=n+1;
175 out->n_block=n_block;
176
177
178 sparsity=(A_p->len*1.)/(1.*A_p->numRows*A_p->numCols);
179
180 if (verbose) fprintf(stdout,"Stats: Sparsity of the Coarse Matrix with %d non-zeros (%d,%d) in level %d is %.6f\n",A_p->len,A_p->numRows,A_p->numCols,level,sparsity);
181
182
183 if(sparsity>0.05) {
184 level=0;
185 }
186
187
188 if (level==0 || n<=options->min_coarse_matrix_size) {
189 out->coarsest_level=TRUE;
190 #ifdef MKL
191 out->AUnrolled=Paso_SparseMatrix_unroll(A_p);
192 out->AOffset1=Paso_SparseMatrix_alloc(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, out->AUnrolled->pattern,1,1, FALSE);
193 #pragma omp parallel for private(i) schedule(static)
194 for (i=0;i<out->A->len;++i) {
195 out->AOffset1->val[i]=out->AUnrolled->val[i];
196 }
197 #else
198 #ifdef UMFPACK
199 out->AUnrolled=Paso_SparseMatrix_unroll(A_p);
200 /*Paso_SparseMatrix_saveMM(out->AUnrolled,"AUnroll.mat");
201 Paso_SparseMatrix_saveMM(A_p,"Aorg.mat");
202 */
203 #else
204 out->Smoother=Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
205 #endif
206 #endif
207
208 } else {
209 out->coarsest_level=FALSE;
210 out->Smoother=Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
211
212 /* Start Coarsening : */
213 time0=Paso_timer();
214 Paso_Coarsening_Local(mis_marker, A_p, options->coarsening_threshold, options->coarsening_method);
215 time0=Paso_timer()-time0;
216 if (timing) fprintf(stdout,"Level %d: timing: Coarsening: %e\n",level,time0);
217
218 #pragma omp parallel for private(i) schedule(static)
219 for (i = 0; i < n; ++i) {
220 mis_marker[i]=(mis_marker[i]== PASO_COARSENING_IN_F);
221 counter[i]=mis_marker[i];
222 }
223 out->n_F=Paso_Util_cumsum(n,counter);
224
225 if (out->n_F==0) {
226 out->coarsest_level=TRUE;
227 level=0;
228 if (verbose) {
229 /*printf("AMG coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/
230 printf("AMG coarsening does not eliminate any of the unknowns, switching to Jacobi preconditioner.\n");
231 }
232 }
233 else if (out->n_F==n) {
234 out->coarsest_level=TRUE;
235 level=0;
236 if (verbose) {
237 /*printf("AMG coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/
238 printf("AMG coarsening eliminates all of the unknowns, switching to Jacobi preconditioner.\n");
239
240 }
241 } else {
242
243 if (Paso_noError()) {
244
245 /*#pragma omp parallel for private(i) schedule(static)
246 for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
247 out->n_F=Paso_Util_cumsum(n,counter);
248 */
249
250 out->mask_F=MEMALLOC(n,index_t);
251 out->rows_in_F=MEMALLOC(out->n_F,index_t);
252 if (! (Paso_checkPtr(out->mask_F) || Paso_checkPtr(out->rows_in_F) ) ) {
253 /* creates an index for F from mask */
254 #pragma omp parallel for private(i) schedule(static)
255 for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
256 #pragma omp parallel for private(i) schedule(static)
257 for (i = 0; i < n; ++i) {
258 if (mis_marker[i]) {
259 out->rows_in_F[counter[i]]=i;
260 out->mask_F[i]=counter[i];
261 } else {
262 out->mask_F[i]=-1;
263 }
264 }
265
266 }
267 }
268
269 /* if(level==1) {
270 printf("##TOTAL: %d, ELIMINATED: %d\n",n,out->n_F);
271 for (i = 0; i < n; ++i) {
272 printf("##%d %d\n",i,!mis_marker[i]);
273 }
274 }
275 */
276
277 /*check whether coarsening process actually makes sense to continue.
278 if coarse matrix at least smaller by 30% then continue, otherwise we stop.*/
279 if ((out->n_F*100/n)<30) {
280 level=1;
281 }
282
283 if ( Paso_noError()) {
284 out->n_C=n-out->n_F;
285 out->rows_in_C=MEMALLOC(out->n_C,index_t);
286 out->mask_C=MEMALLOC(n,index_t);
287 if (! (Paso_checkPtr(out->mask_C) || Paso_checkPtr(out->rows_in_C) ) ) {
288 /* creates an index for C from mask */
289 #pragma omp parallel for private(i) schedule(static)
290 for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
291 Paso_Util_cumsum(n,counter);
292 #pragma omp parallel for private(i) schedule(static)
293 for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
294 #pragma omp parallel for private(i) schedule(static)
295 for (i = 0; i < n; ++i) {
296 if (! mis_marker[i]) {
297 out->rows_in_C[counter[i]]=i;
298 out->mask_C[i]=counter[i];
299 } else {
300 out->mask_C[i]=-1;
301 }
302 }
303 }
304 }
305 if ( Paso_noError()) {
306 /* get A_FF block: */
307 /*
308 out->A_FF=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_F,out->rows_in_F,out->mask_F);
309 out->A_CF=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_F,out->rows_in_C,out->mask_F);
310 out->A_FC=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C);
311 */
312
313 /*Compute W_FC*/
314 /*initialy W_FC=A_FC*/
315 out->W_FC=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C);
316
317 /*sprintf(filename,"W_FCbefore_%d",level);
318 Paso_SparseMatrix_saveMM(out->W_FC,filename);
319 */
320 /* for (i = 0; i < n; ++i) {
321 printf("##mis_marker[%d]=%d\n",i,mis_marker[i]);
322 }
323 */
324 time0=Paso_timer();
325 Paso_SparseMatrix_updateWeights(A_p,out->W_FC,mis_marker);
326 time0=Paso_timer()-time0;
327 if (timing) fprintf(stdout,"timing: updateWeights: %e\n",time0);
328
329 /*
330 sprintf(filename,"W_FCafter_%d",level);
331 Paso_SparseMatrix_saveMM(out->W_FC,filename);
332 */
333
334 /* get Prolongation and Restriction */
335 time0=Paso_timer();
336 out->P=Paso_SparseMatrix_getProlongation(out->W_FC,mis_marker);
337 time0=Paso_timer()-time0;
338 if (timing) fprintf(stdout,"timing: getProlongation: %e\n",time0);
339 /*out->P=Paso_SparseMatrix_loadMM_toCSR("P1.mtx");*/
340
341 /*
342 sprintf(filename,"P_%d",level);
343 Paso_SparseMatrix_saveMM(out->P,filename);
344 */
345
346 time0=Paso_timer();
347 out->R=Paso_SparseMatrix_getRestriction(out->P);
348 time0=Paso_timer()-time0;
349 if (timing) fprintf(stdout,"timing: getRestriction: %e\n",time0);
350 /*out->R=Paso_SparseMatrix_loadMM_toCSR("R1.mtx");*/
351
352 /*
353 sprintf(filename,"R_%d",level);
354 Paso_SparseMatrix_saveMM(out->R,filename);
355 */
356
357 }
358 if ( Paso_noError()) {
359
360 time0=Paso_timer();
361
362 Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P);
363
364 A_c=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp);
365
366 /*A_c=Paso_SparseMatrix_loadMM_toCSR("A_C1.mtx");*/
367
368 Paso_SparseMatrix_free(Atemp);
369
370 /*A_c=Paso_Solver_getCoarseMatrix(A_p,out->R,out->P);*/
371 time0=Paso_timer()-time0;
372 if (timing) fprintf(stdout,"timing: getCoarseMatrix: %e\n",time0);
373
374
375 /*Paso_Solver_getCoarseMatrix(A_c, A_p,out->R,out->P);*/
376 /*
377 sprintf(filename,"A_C_%d",level);
378 Paso_SparseMatrix_saveMM(A_c,filename);
379 */
380
381 out->AMG_of_Coarse=Paso_Preconditioner_LocalAMG_alloc(A_c,level-1,options);
382 }
383
384 /* allocate work arrays for AMG application */
385 if (Paso_noError()) {
386 /*
387 out->x_F=MEMALLOC(n_block*out->n_F,double);
388 out->b_F=MEMALLOC(n_block*out->n_F,double);
389 */
390 out->x_C=MEMALLOC(n_block*out->n_C,double);
391 out->b_C=MEMALLOC(n_block*out->n_C,double);
392
393 /*if (! (Paso_checkPtr(out->x_F) || Paso_checkPtr(out->b_F) || Paso_checkPtr(out->x_C) || Paso_checkPtr(out->b_C) ) ) {*/
394 if ( ! ( Paso_checkPtr(out->x_C) || Paso_checkPtr(out->b_C) ) ) {
395
396 /*
397 #pragma omp parallel for private(i) schedule(static)
398 for (i = 0; i < out->n_F; ++i) {
399 out->x_F[i]=0.;
400 out->b_F[i]=0.;
401 }
402 */
403
404 #pragma omp parallel for private(i,j) schedule(static)
405 for (i = 0; i < out->n_C; ++i) {
406 for(j=0;j<n_block;++j) {
407 out->x_C[i*n_block+j]=0.;
408 out->b_C[i*n_block+j]=0.;
409 }
410 }
411 }
412 }
413 Paso_SparseMatrix_free(A_c);
414 }
415 }
416 }
417 TMPMEMFREE(mis_marker);
418 TMPMEMFREE(counter);
419
420 if (Paso_noError()) {
421 if (verbose && level>0 && !out->coarsest_level) {
422 printf("AMG: level: %d: %d unknowns eliminated. %d left.\n",level, out->n_F,out->n_C);
423 }
424 return out;
425 } else {
426 Paso_Preconditioner_LocalAMG_free(out);
427 return NULL;
428 }
429 }
430
431 /**************************************************************/
432
433 /* apply AMG precondition b-> x
434
435 in fact it solves
436
437 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FC ] [ x_F ] = [b_F]
438 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
439
440 in the form
441
442 b->[b_F,b_C]
443 x_F=invA_FF*b_F
444 b_C=b_C-A_CF*x_F
445 x_C=AMG(b_C)
446 b_F=b_F-A_FC*x_C
447 x_F=invA_FF*b_F
448 x<-[x_F,x_C]
449
450 should be called within a parallel region
451 barrier synconization should be performed to make sure that the input vector available
452
453 */
454
455 void Paso_Preconditioner_LocalAMG_solve(Paso_Preconditioner_LocalAMG * amg, double * x, double * b) {
456 dim_t i,j;
457 double time0=0;
458 double *r=NULL, *x0=NULL;
459 bool_t timing=0;
460
461 dim_t post_sweeps=amg->post_sweeps;
462 dim_t pre_sweeps=amg->pre_sweeps;
463
464 #ifdef UMFPACK
465 Paso_UMFPACK_Handler * ptr=NULL;
466 #endif
467
468 r=MEMALLOC(amg->n*amg->n_block,double);
469 x0=MEMALLOC(amg->n*amg->n_block,double);
470
471 if (amg->coarsest_level) {
472
473 time0=Paso_timer();
474 /*If all unknown are eliminated then Jacobi is the best preconditioner*/
475
476 if (amg->n_F==0 || amg->n_F==amg->n) {
477 Paso_Preconditioner_LocalSmoother_solve(amg->A, amg->Smoother,x,b,1, FALSE);
478 }
479 else {
480 #ifdef MKL
481 Paso_MKL1(amg->AOffset1,x,b,timing);
482 #else
483 #ifdef UMFPACK
484 ptr=(Paso_UMFPACK_Handler *)(amg->solver);
485 Paso_UMFPACK1(&ptr,amg->AUnrolled,x,b,timing);
486 amg->solver=(void*) ptr;
487 #else
488 Paso_Preconditioner_LocalSmoother_solve(amg->A, amg->Smoother,x,b,1, FALSE);
489 #endif
490 #endif
491 }
492
493 time0=Paso_timer()-time0;
494 if (timing) fprintf(stdout,"timing: DIRECT SOLVER: %e\n",time0);
495
496 } else {
497 /* presmoothing */
498 time0=Paso_timer();
499 Paso_Preconditioner_LocalSmoother_solve(amg->A, amg->Smoother, x, b, pre_sweeps, FALSE);
500 time0=Paso_timer()-time0;
501 if (timing) fprintf(stdout,"timing: Presmooting: %e\n",time0);
502
503 /* end of presmoothing */
504
505 time0=Paso_timer();
506 #pragma omp parallel for private(i,j) schedule(static)
507 for (i=0;i<amg->n;++i) {
508 for (j=0;j<amg->n_block;++j) {
509 r[i*amg->n_block+j]=b[i*amg->n_block+j];
510 }
511 }
512
513 /*r=b-Ax*/
514 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
515
516 /* b_c = R*r */
517 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,r,0.,amg->b_C);
518
519 time0=Paso_timer()-time0;
520 if (timing) fprintf(stdout,"timing: Before next level: %e\n",time0);
521
522 /* x_C=AMG(b_C) */
523 Paso_Preconditioner_LocalAMG_solve(amg->AMG_of_Coarse,amg->x_C,amg->b_C);
524
525 time0=Paso_timer();
526
527 /* x_0 = P*x_c */
528 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,0.,x0);
529
530 /* x=x+x0 */
531 #pragma omp parallel for private(i,j) schedule(static)
532 for (i=0;i<amg->n;++i) {
533 for (j=0;j<amg->n_block;++j) {
534 x[i*amg->n_block+j]+=x0[i*amg->n_block+j];
535 }
536 }
537
538 /*postsmoothing*/
539
540 /*solve Ax=b with initial guess x */
541 time0=Paso_timer();
542 Paso_Preconditioner_LocalSmoother_solve(amg->A, amg->Smoother, x, b, post_sweeps, TRUE);
543 time0=Paso_timer()-time0;
544 if (timing) fprintf(stdout,"timing: Postsmoothing: %e\n",time0);
545
546 /*end of postsmoothing*/
547
548 }
549 MEMFREE(r);
550 MEMFREE(x0);
551
552 return;
553 }

  ViewVC Help
Powered by ViewVC 1.1.26