/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Annotation of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3283 - (hide annotations)
Mon Oct 18 22:39:28 2010 UTC (8 years, 11 months ago) by gross
File MIME type: text/plain
File size: 17464 byte(s)
AMG reengineered, BUG is Smoother fixed.


1 gross 3193
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2010 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17     /* Paso: AMG preconditioner */
18    
19     /**************************************************************/
20    
21     /* Author: artak@uq.edu.au */
22    
23     /**************************************************************/
24    
25 gross 3283 #define SHOW_TIMING FALSE
26    
27 gross 3193 #include "Paso.h"
28     #include "Preconditioner.h"
29     #include "Options.h"
30     #include "PasoUtil.h"
31     #include "UMFPACK.h"
32     #include "MKL.h"
33    
34     /**************************************************************/
35    
36     /* free all memory used by AMG */
37    
38     void Paso_Preconditioner_LocalAMG_free(Paso_Preconditioner_LocalAMG * in) {
39     if (in!=NULL) {
40     Paso_Preconditioner_LocalSmoother_free(in->Smoother);
41 gross 3283 Paso_SparseMatrix_free(in->P);
42     Paso_SparseMatrix_free(in->R);
43     Paso_SparseMatrix_free(in->A_C);
44     Paso_Preconditioner_LocalAMG_free(in->AMG_C);
45     MEMFREE(in->r);
46     MEMFREE(in->x_C);
47     MEMFREE(in->b_C);
48 gross 3193
49 gross 3283
50     MEMFREE(in);
51 gross 3193 }
52     }
53    
54 gross 3283 /*****************************************************************
55 gross 3193
56 gross 3283 constructs AMG
57    
58     ******************************************************************/
59     Paso_Preconditioner_LocalAMG* Paso_Preconditioner_LocalAMG_alloc(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {
60 gross 3193
61     Paso_Preconditioner_LocalAMG* out=NULL;
62     bool_t verbose=options->verbose;
63    
64 gross 3283 Paso_SparseMatrix* W_FC=NULL, *Atemp=NULL, *A_C=NULL;
65 gross 3193 const dim_t n=A_p->numRows;
66     const dim_t n_block=A_p->row_block_size;
67 gross 3283 index_t* split_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL;
68     dim_t n_F=0, n_C=0, i;
69     double time0=0;
70     const double theta = options->coarsening_threshold;
71     const double tau = options->diagonal_dominance_threshold;
72 gross 3193
73    
74     /*
75 gross 3283 is the input matrix A suitable for coarsening
76    
77 gross 3193 */
78 gross 3283 if ( (A_p->pattern->len >= options->min_coarse_sparsity * n * n ) || (n <= options->min_coarse_matrix_size) || (level > options->level_max) ) {
79     if (verbose) printf("Paso: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
80     level, options->level_max, A_p->pattern->len/(1.*n * n), options->min_coarse_sparsity, n, options->min_coarse_matrix_size );
81 gross 3193 return NULL;
82 gross 3283 }
83     /* Start Coarsening : */
84 gross 3193
85 gross 3283 split_marker=TMPMEMALLOC(n,index_t);
86     counter=TMPMEMALLOC(n,index_t);
87     if ( !( Esys_checkPtr(split_marker) || Esys_checkPtr(counter) ) ) {
88     /*
89     set splitting of unknows:
90    
91     */
92     time0=Esys_timer();
93     if (n_block>1) {
94     Paso_Preconditioner_AMG_RSCoarsening_Block(A_p, split_marker, theta,tau);
95     } else {
96     Paso_Preconditioner_AMG_RSCoarsening(A_p, split_marker, theta,tau);
97     }
98     options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
99    
100     if (Esys_noError() ) {
101     #pragma omp parallel for private(i) schedule(static)
102     for (i = 0; i < n; ++i) split_marker[i]= (split_marker[i] == PASO_AMG_IN_F);
103    
104     /*
105     count number of unkowns to be eliminated:
106     */
107     n_F=Paso_Util_cumsum_maskedTrue(n,counter, split_marker);
108     n_C=n-n_F;
109     if (verbose) printf("Paso AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
110    
111     if ( n_F == 0 ) { /* is a nasty case. a direct solver should be used, return NULL */
112     out = NULL;
113     } else {
114     out=MEMALLOC(1,Paso_Preconditioner_LocalAMG);
115     mask_C=TMPMEMALLOC(n,index_t);
116     rows_in_F=TMPMEMALLOC(n_F,index_t);
117     if ( !( Esys_checkPtr(mask_C) || Esys_checkPtr(rows_in_F) || Esys_checkPtr(out)) ) {
118     out->level = level;
119     out->n = n;
120     out->n_F = n_F;
121     out->n_block = n_block;
122     out->A_C = NULL;
123     out->P = NULL;
124     out->R = NULL;
125     out->post_sweeps = options->post_sweeps;
126     out->pre_sweeps = options->pre_sweeps;
127     out->r = NULL;
128     out->x_C = NULL;
129     out->b_C = NULL;
130     out->AMG_C = NULL;
131     out->Smoother = Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
132    
133     if ( n_F < n ) { /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
134    
135     /* creates index for F:*/
136     #pragma omp parallel for private(i) schedule(static)
137     for (i = 0; i < n; ++i) {
138     if (split_marker[i]) rows_in_F[counter[i]]=i;
139     }
140     /* create mask of C nodes with value >-1 gives new id */
141     i=Paso_Util_cumsum_maskedFalse(n,counter, split_marker);
142 gross 3193
143 gross 3283 #pragma omp parallel for private(i) schedule(static)
144     for (i = 0; i < n; ++i) {
145     if (split_marker[i]) {
146     mask_C[i]=-1;
147     } else {
148     mask_C[i]=counter[i];;
149     }
150     }
151     /*
152     get Restriction : (can we do this in one go?)
153     */
154     time0=Esys_timer();
155     W_FC=Paso_SparseMatrix_getSubmatrix(A_p, n_F, n_C, rows_in_F, mask_C);
156     if (SHOW_TIMING) printf("timing: level %d: get Weights: %e\n",level, Esys_timer()-time0);
157    
158     time0=Esys_timer();
159     Paso_SparseMatrix_updateWeights(A_p,W_FC,split_marker);
160     if (SHOW_TIMING) printf("timing: level %d: updateWeights: %e\n",level, Esys_timer()-time0);
161    
162     time0=Esys_timer();
163     out->P=Paso_SparseMatrix_getProlongation(W_FC,split_marker);
164     if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
165    
166     Paso_SparseMatrix_free(W_FC);
167     /*
168     construct Prolongation operator as transposed of restriction operator:
169     */
170     time0=Esys_timer();
171     out->R=Paso_SparseMatrix_getTranspose(out->P);
172     if (SHOW_TIMING) printf("timing: level %d: Paso_SparseMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
173     /*
174     constrPaso AMG level 2: 4 unknowns are flagged for elimination.
175     timing: Gauss-Seidel preparation: elemination : 8.096400e-05
176     AMG: level 2: 4 unknowns eliminated.
177     uct coarse level matrix (can we do this in one call?)
178     */
179     time0=Esys_timer();
180     Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P);
181     A_C=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp);
182     Paso_SparseMatrix_free(Atemp);
183     /*A_c=Paso_Solver_getCoarseMatrix(A_p,out->R,out->P);*/
184     if (SHOW_TIMING) printf("timing: level %d : getCoarseMatrix: %e\n",level,Esys_timer()-time0);
185    
186     /* allocate helpers :*/
187     out->x_C=MEMALLOC(n_block*n_C,double);
188     out->b_C=MEMALLOC(n_block*n_C,double);
189     out->r=MEMALLOC(n_block*n,double);
190    
191     Esys_checkPtr(out->r);
192     Esys_checkPtr(out->Smoother);
193     Esys_checkPtr(out->x_C);
194     Esys_checkPtr(out->b_C);
195    
196     /*
197     constructe courser level:
198    
199     */
200     out->AMG_C=Paso_Preconditioner_LocalAMG_alloc(A_C,level+1,options);
201    
202     if ( Esys_noError()) {
203     if ( out->AMG_C == NULL ) {
204     out->reordering = options->reordering;
205     out->refinements = options->coarse_matrix_refinements;
206     /* no coarse level matrix has been constructed. use direct solver */
207     #ifdef MKL
208     Atemp=Paso_SparseMatrix_unroll(A_C);
209     Paso_SparseMatrix_free(A_C);
210     out->A_C=Paso_SparseMatrix_alloc(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, Atemp->pattern,1,1, FALSE);
211     #pragma omp parallel for private(i) schedule(static)
212     for (i=0;i<out->A->len;++i) {
213     out->A_C->val[i]=Atemp->val[i];
214     }
215     Paso_SparseMatrix_free(Atemp);
216     out->A_C->solver_package = PASO_MKL;
217     #else
218     #ifdef UMFPACK
219     out->A_C=Paso_SparseMatrix_unroll(A_C);
220     Paso_SparseMatrix_free(A_C);
221     out->A_C->solver_package = PASO_UMFPACK;
222     #else
223     out->A_C=A_C;
224     out->A_C->solver_p=Paso_Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
225     out->A_C->solver_package = PASO_SMOOTHER;
226     #endif
227     #endif
228     } else {
229     /* finally we set some helpers for the solver step */
230     out->A_C=A_C;
231     }
232     }
233     }
234     }
235     TMPMEMFREE(mask_C);
236     TMPMEMFREE(rows_in_F);
237     }
238     }
239 gross 3193 }
240     TMPMEMFREE(counter);
241 gross 3283 TMPMEMFREE(split_marker);
242 gross 3193
243 jfenwick 3259 if (Esys_noError()) {
244 gross 3283 if (verbose) printf("AMG: level %d: %d unknowns eliminated.\n",level, n_F);
245 gross 3193 return out;
246     } else {
247     Paso_Preconditioner_LocalAMG_free(out);
248     return NULL;
249     }
250     }
251    
252    
253 gross 3283 void Paso_Preconditioner_LocalAMG_solve(Paso_SparseMatrix* A, Paso_Preconditioner_LocalAMG * amg, double * x, double * b) {
254     const dim_t n = amg->n * amg->n_block;
255     double time0=0;
256     const dim_t post_sweeps=amg->post_sweeps;
257     const dim_t pre_sweeps=amg->pre_sweeps;
258 gross 3193
259 gross 3283 /* presmoothing */
260     time0=Esys_timer();
261     Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
262     time0=Esys_timer()-time0;
263     if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
264     /* end of presmoothing */
265    
266     if (amg->n_F < amg->n) { /* is there work on the coarse level? */
267     time0=Esys_timer();
268     Paso_Copy(n, amg->r, b); /* r <- b */
269     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
270     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */
271     time0=Esys_timer()-time0;
272     /* coarse level solve */
273     if ( amg->AMG_C == NULL) {
274     time0=Esys_timer();
275     /* A_C is the coarsest level */
276     switch (amg->A_C->solver_package) {
277     case (PASO_MKL):
278     Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
279     break;
280     case (PASO_UMFPACK):
281     Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
282     break;
283     case (PASO_SMOOTHER):
284     Paso_Preconditioner_LocalSmoother_solve(amg->A_C, amg->Smoother,amg->x_C,amg->b_C,pre_sweeps, FALSE);
285     break;
286     }
287     if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
288     } else {
289     Paso_Preconditioner_LocalAMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */
290     }
291     time0=time0+Esys_timer();
292     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
293    
294     /*postsmoothing*/
295    
296     /*solve Ax=b with initial guess x */
297     time0=Esys_timer();
298     Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
299     time0=Esys_timer()-time0;
300     if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
301     /*end of postsmoothing*/
302    
303     }
304     return;
305     }
306 gross 3193
307 gross 3283 /* theta = threshold for strong connections */
308     /* tau = threshold for diagonal dominance */
309 gross 3193
310 gross 3283 void Paso_Preconditioner_AMG_RSCoarsening(Paso_SparseMatrix* A, index_t* split_marker, const double theta, const double tau)
311     {
312     const dim_t n=A->numRows;
313     dim_t *degree=NULL; /* number of naighbours in strong connection graph */
314     index_t *S=NULL, iptr, i,j;
315     dim_t kdeg;
316     double max_offdiagonal, threshold, sum_row, main_row, fnorm;
317    
318     degree=TMPMEMALLOC(n, dim_t);
319     S=TMPMEMALLOC(A->pattern->len, index_t);
320 gross 3193
321 gross 3283 if ( !( Esys_checkPtr(degree) || Esys_checkPtr(S) ) ) {
322     /*S_i={j \in N_i; i strongly coupled to j}
323    
324     in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
325     */
326     #pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static)
327     for (i=0;i<n;++i) {
328 gross 3193
329 gross 3283 max_offdiagonal = 0.;
330     sum_row=0;
331     main_row=0;
332     #pragma ivdep
333     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
334     j=A->pattern->index[iptr];
335     fnorm=ABS(A->val[iptr]);
336    
337     if( j != i) {
338     max_offdiagonal = MAX(max_offdiagonal,fnorm);
339     sum_row+=fnorm;
340     } else {
341     main_row=fnorm;
342     }
343     }
344     threshold = theta*max_offdiagonal;
345     kdeg=0;
346     if (tau*main_row < sum_row) { /* no diagonal domainance */
347     #pragma ivdep
348     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
349     j=A->pattern->index[iptr];
350     if(ABS(A->val[iptr])>threshold && i!=j) {
351     S[A->pattern->ptr[i]+kdeg] = j;
352     kdeg++;
353     }
354     }
355     }
356     degree[i]=kdeg;
357     }
358 gross 3193
359 gross 3283 Paso_Preconditioner_AMG_RSCoarsening_search(n, A->pattern->ptr, degree, S, split_marker);
360    
361     }
362     TMPMEMFREE(degree);
363     TMPMEMFREE(S);
364     }
365 gross 3193
366 gross 3283 /* theta = threshold for strong connections */
367     /* tau = threshold for diagonal dominance */
368     void Paso_Preconditioner_AMG_RSCoarsening_Block(Paso_SparseMatrix* A, index_t* split_marker, const double theta, const double tau)
369    
370     {
371     const dim_t n_block=A->row_block_size;
372     const dim_t n=A->numRows;
373     dim_t *degree=NULL; /* number of naighbours in strong connection graph */
374     index_t *S=NULL, iptr, i,j, bi;
375     dim_t kdeg, max_deg;
376     register double max_offdiagonal, threshold, fnorm, sum_row, main_row;
377     double *rtmp;
378    
379    
380     degree=TMPMEMALLOC(n, dim_t);
381     S=TMPMEMALLOC(A->pattern->len, index_t);
382    
383     if ( !( Esys_checkPtr(degree) || Esys_checkPtr(S) ) ) {
384     /*S_i={j \in N_i; i strongly coupled to j}
385    
386     in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
387     */
388     #pragma omp parallel private(i,iptr,max_offdiagonal, kdeg, threshold,j, max_deg, fnorm, sum_row, main_row, rtmp)
389     {
390     max_deg=0;
391     #pragma omp for schedule(static)
392     for (i=0;i<n;++i) max_deg=MAX(max_deg, A->pattern->ptr[i+1]-A->pattern->ptr[i]);
393 gross 3193
394 gross 3283 rtmp=TMPMEMALLOC(max_deg, double);
395    
396     #pragma omp for schedule(static)
397     for (i=0;i<n;++i) {
398 gross 3193
399 gross 3283 max_offdiagonal = 0.;
400     sum_row=0;
401     main_row=0;
402     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
403     j=A->pattern->index[iptr];
404     fnorm=0;
405     #pragma ivdep
406     for(bi=0;bi<n_block*n_block;++bi) fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
407     fnorm=sqrt(fnorm);
408    
409     if( j != i) {
410     rtmp[iptr-A->pattern->ptr[i]]=fnorm;
411     max_offdiagonal = MAX(max_offdiagonal,fnorm);
412     sum_row+=fnorm;
413     } else {
414     main_row=fnorm;
415     }
416     }
417     threshold = theta*max_offdiagonal;
418 gross 3193
419 gross 3283 kdeg=0;
420     if (tau*main_row < sum_row) { /* no diagonal domainance */
421     #pragma ivdep
422     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
423     j=A->pattern->index[iptr];
424     if(rtmp[iptr-A->pattern->ptr[i]] > threshold && i!=j) {
425     S[A->pattern->ptr[i]+kdeg] = j;
426     kdeg++;
427     }
428     }
429     }
430     degree[i]=kdeg;
431     }
432     TMPMEMFREE(rtmp);
433     } /* end of parallel region */
434     Paso_Preconditioner_AMG_RSCoarsening_search(n, A->pattern->ptr, degree, S, split_marker);
435     }
436     TMPMEMFREE(degree);
437     TMPMEMFREE(S);
438     }
439    
440     /* the runge stueben coarsening algorithm: */
441     void Paso_Preconditioner_AMG_RSCoarsening_search(const dim_t n, const index_t* offset, const dim_t* degree, const index_t* S,
442     index_t*split_marker)
443     {
444     index_t *lambda=NULL, j, *ST=NULL;
445     dim_t i,k, p, q, *degreeT=NULL, itmp;
446    
447     if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
448    
449     lambda=TMPMEMALLOC(n, index_t);
450     degreeT=TMPMEMALLOC(n, dim_t);
451     ST=TMPMEMALLOC(offset[n], index_t);
452    
453     if (! ( Esys_checkPtr(lambda) || Esys_checkPtr(degreeT) || Esys_checkPtr(ST) ) ) {
454     /* initialize split_marker and split_marker :*/
455     /* those unknows which are not influenced go into F, the rest is available for F or C */
456     #pragma omp parallel for private(i) schedule(static)
457     for (i=0;i<n;++i) {
458     degreeT[i]=0;
459     if (degree[i]>0) {
460     lambda[i]=0;
461     split_marker[i]=PASO_AMG_UNDECIDED;
462     } else {
463     split_marker[i]=PASO_AMG_IN_F;
464     lambda[i]=-1;
465     }
466     }
467     /* create transpose :*/
468     for (i=0;i<n;++i) {
469     for (p=0; p<degree[i]; ++p) {
470     j=S[offset[i]+p];
471     ST[offset[j]+degreeT[j]]=i;
472     degreeT[j]++;
473     }
474     }
475     /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
476     #pragma omp parallel for private(i, j, itmp) schedule(static)
477     for (i=0;i<n;++i) {
478     if (split_marker[i]==PASO_AMG_UNDECIDED) {
479     itmp=lambda[i];
480     for (p=0; p<degreeT[i]; ++p) {
481     j=ST[offset[i]+p];
482     if (split_marker[j]==PASO_AMG_UNDECIDED) {
483     itmp++;
484     } else { /* at this point there are no C points */
485     itmp+=2;
486     }
487     }
488     lambda[i]=itmp;
489     }
490     }
491    
492     /* start search :*/
493     i=Paso_Util_arg_max(n,lambda);
494     while (lambda[i]>-1) { /* is there any undecided unknowns? */
495 gross 3193
496 gross 3283 /* the unknown i is moved to C */
497     split_marker[i]=PASO_AMG_IN_C;
498     lambda[i]=-1; /* lambda fro unavailable unknowns is set to -1 */
499    
500     /* all undecided unknown strongly coupled to i are moved to F */
501     for (p=0; p<degreeT[i]; ++p) {
502     j=ST[offset[i]+p];
503    
504     if (split_marker[j]==PASO_AMG_UNDECIDED) {
505    
506     split_marker[j]=PASO_AMG_IN_F;
507     lambda[j]=-1;
508    
509     for (q=0; q<degreeT[j]; ++q) {
510     k=ST[offset[j]+q];
511     if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
512     }
513 gross 3193
514 gross 3283 }
515     }
516     for (p=0; p<degree[i]; ++p) {
517     j=S[offset[i]+p];
518     if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
519     }
520    
521     i=Paso_Util_arg_max(n,lambda);
522     }
523     }
524     TMPMEMFREE(lambda);
525     TMPMEMFREE(ST);
526     TMPMEMFREE(degreeT);
527 gross 3193 }

  ViewVC Help
Powered by ViewVC 1.1.26