/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Annotation of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3323 - (hide annotations)
Thu Oct 28 09:53:46 2010 UTC (8 years, 11 months ago) by gross
File MIME type: text/plain
File size: 16926 byte(s)
some openmp fizes.
1 gross 3193
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2010 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17     /* Paso: AMG preconditioner */
18    
19     /**************************************************************/
20    
21 gross 3303 /* Author: artak@uq.edu.au, l.gross@uq.edu.au */
22 gross 3193
23     /**************************************************************/
24    
25 gross 3283 #define SHOW_TIMING FALSE
26    
27 gross 3193 #include "Paso.h"
28     #include "Preconditioner.h"
29     #include "Options.h"
30     #include "PasoUtil.h"
31     #include "UMFPACK.h"
32     #include "MKL.h"
33    
34     /**************************************************************/
35    
36     /* free all memory used by AMG */
37    
38     void Paso_Preconditioner_LocalAMG_free(Paso_Preconditioner_LocalAMG * in) {
39     if (in!=NULL) {
40     Paso_Preconditioner_LocalSmoother_free(in->Smoother);
41 gross 3283 Paso_SparseMatrix_free(in->P);
42     Paso_SparseMatrix_free(in->R);
43     Paso_SparseMatrix_free(in->A_C);
44     Paso_Preconditioner_LocalAMG_free(in->AMG_C);
45     MEMFREE(in->r);
46     MEMFREE(in->x_C);
47     MEMFREE(in->b_C);
48 gross 3193
49 gross 3283
50     MEMFREE(in);
51 gross 3193 }
52     }
53    
54 gross 3283 /*****************************************************************
55 gross 3193
56 gross 3283 constructs AMG
57    
58     ******************************************************************/
59     Paso_Preconditioner_LocalAMG* Paso_Preconditioner_LocalAMG_alloc(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {
60 gross 3193
61     Paso_Preconditioner_LocalAMG* out=NULL;
62     bool_t verbose=options->verbose;
63    
64 gross 3303 Paso_SparseMatrix *Atemp=NULL, *A_C=NULL;
65 gross 3193 const dim_t n=A_p->numRows;
66     const dim_t n_block=A_p->row_block_size;
67 gross 3303 index_t* split_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL, *S=NULL, *degree=NULL;
68 gross 3283 dim_t n_F=0, n_C=0, i;
69     double time0=0;
70     const double theta = options->coarsening_threshold;
71     const double tau = options->diagonal_dominance_threshold;
72 gross 3193
73    
74     /*
75 gross 3283 is the input matrix A suitable for coarsening
76    
77 gross 3193 */
78 gross 3283 if ( (A_p->pattern->len >= options->min_coarse_sparsity * n * n ) || (n <= options->min_coarse_matrix_size) || (level > options->level_max) ) {
79 gross 3323 if (verbose) printf("Paso_Solver: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
80 gross 3283 level, options->level_max, A_p->pattern->len/(1.*n * n), options->min_coarse_sparsity, n, options->min_coarse_matrix_size );
81 gross 3193 return NULL;
82 gross 3283 }
83     /* Start Coarsening : */
84 gross 3193
85 gross 3283 split_marker=TMPMEMALLOC(n,index_t);
86     counter=TMPMEMALLOC(n,index_t);
87 gross 3303 degree=TMPMEMALLOC(n, dim_t);
88     S=TMPMEMALLOC(A_p->pattern->len, index_t);
89     if ( !( Esys_checkPtr(split_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree) || Esys_checkPtr(S) ) ) {
90 gross 3283 /*
91     set splitting of unknows:
92    
93     */
94     time0=Esys_timer();
95     if (n_block>1) {
96 gross 3303 Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree, S, theta,tau);
97 gross 3283 } else {
98 gross 3303 Paso_Preconditioner_AMG_setStrongConnections(A_p, degree, S, theta,tau);
99 gross 3283 }
100 gross 3303 Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree, S, split_marker);
101 gross 3283 options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
102    
103     if (Esys_noError() ) {
104     #pragma omp parallel for private(i) schedule(static)
105     for (i = 0; i < n; ++i) split_marker[i]= (split_marker[i] == PASO_AMG_IN_F);
106    
107     /*
108     count number of unkowns to be eliminated:
109     */
110     n_F=Paso_Util_cumsum_maskedTrue(n,counter, split_marker);
111     n_C=n-n_F;
112 gross 3323 if (verbose) printf("Paso_Solver: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
113 gross 3283
114     if ( n_F == 0 ) { /* is a nasty case. a direct solver should be used, return NULL */
115     out = NULL;
116     } else {
117     out=MEMALLOC(1,Paso_Preconditioner_LocalAMG);
118 gross 3323 if (! Esys_checkPtr(out)) {
119 gross 3283 out->level = level;
120     out->n = n;
121     out->n_F = n_F;
122     out->n_block = n_block;
123     out->A_C = NULL;
124     out->P = NULL;
125     out->R = NULL;
126     out->post_sweeps = options->post_sweeps;
127     out->pre_sweeps = options->pre_sweeps;
128     out->r = NULL;
129     out->x_C = NULL;
130     out->b_C = NULL;
131     out->AMG_C = NULL;
132 gross 3323 out->Smoother=NULL;
133     }
134     mask_C=TMPMEMALLOC(n,index_t);
135     rows_in_F=TMPMEMALLOC(n_F,index_t);
136     Esys_checkPtr(mask_C);
137     Esys_checkPtr(rows_in_F);
138     if ( Esys_noError() ) {
139    
140 gross 3283 out->Smoother = Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
141 gross 3323
142 gross 3283 if ( n_F < n ) { /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
143    
144 gross 3315 /* allocate helpers :*/
145     out->x_C=MEMALLOC(n_block*n_C,double);
146     out->b_C=MEMALLOC(n_block*n_C,double);
147     out->r=MEMALLOC(n_block*n,double);
148    
149     Esys_checkPtr(out->r);
150     Esys_checkPtr(out->x_C);
151     Esys_checkPtr(out->b_C);
152    
153 gross 3323 if ( Esys_noError() ) {
154     /* creates index for F:*/
155     #pragma omp parallel private(i)
156     {
157     #pragma omp for schedule(static)
158     for (i = 0; i < n; ++i) {
159     if (split_marker[i]) rows_in_F[counter[i]]=i;
160     }
161     }
162     /* create mask of C nodes with value >-1 gives new id */
163     i=Paso_Util_cumsum_maskedFalse(n,counter, split_marker);
164 gross 3193
165 gross 3323 #pragma omp parallel for private(i) schedule(static)
166     for (i = 0; i < n; ++i) {
167     if (split_marker[i]) {
168     mask_C[i]=-1;
169     } else {
170     mask_C[i]=counter[i];;
171     }
172 gross 3283 }
173 gross 3323 /*
174     get Restriction :
175     */
176     time0=Esys_timer();
177     out->P=Paso_Preconditioner_AMG_getDirectProlongation(A_p,degree,S,n_C,mask_C);
178     if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
179 gross 3283 }
180 gross 3323 /*
181 gross 3283 construct Prolongation operator as transposed of restriction operator:
182     */
183 gross 3315 if ( Esys_noError()) {
184     time0=Esys_timer();
185     out->R=Paso_SparseMatrix_getTranspose(out->P);
186     if (SHOW_TIMING) printf("timing: level %d: Paso_SparseMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
187     }
188 gross 3283 /*
189 gross 3312 construct coarse level matrix:
190 gross 3283 */
191 gross 3315 if ( Esys_noError()) {
192     time0=Esys_timer();
193     Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P);
194     A_C=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp);
195     Paso_SparseMatrix_free(Atemp);
196     if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
197     }
198    
199 gross 3283
200     /*
201     constructe courser level:
202    
203     */
204     if ( Esys_noError()) {
205 gross 3315 out->AMG_C=Paso_Preconditioner_LocalAMG_alloc(A_C,level+1,options);
206     }
207     if ( Esys_noError()) {
208 gross 3283 if ( out->AMG_C == NULL ) {
209     out->reordering = options->reordering;
210     out->refinements = options->coarse_matrix_refinements;
211     /* no coarse level matrix has been constructed. use direct solver */
212     #ifdef MKL
213 gross 3312 out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
214 gross 3283 Paso_SparseMatrix_free(A_C);
215     out->A_C->solver_package = PASO_MKL;
216 gross 3323 if (verbose) printf("Paso_Solver: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C);
217 gross 3283 #else
218     #ifdef UMFPACK
219 gross 3312 out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
220 gross 3283 Paso_SparseMatrix_free(A_C);
221     out->A_C->solver_package = PASO_UMFPACK;
222 gross 3323 if (verbose) printf("Paso_Solver: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C);
223 gross 3283 #else
224     out->A_C=A_C;
225     out->A_C->solver_p=Paso_Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
226     out->A_C->solver_package = PASO_SMOOTHER;
227 gross 3323 if (verbose) printf("Paso_Solver: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C);
228 gross 3283 #endif
229     #endif
230     } else {
231     /* finally we set some helpers for the solver step */
232     out->A_C=A_C;
233     }
234     }
235     }
236     }
237     TMPMEMFREE(mask_C);
238     TMPMEMFREE(rows_in_F);
239     }
240     }
241 gross 3193 }
242     TMPMEMFREE(counter);
243 gross 3283 TMPMEMFREE(split_marker);
244 gross 3303 TMPMEMFREE(degree);
245     TMPMEMFREE(S);
246 gross 3193
247 jfenwick 3259 if (Esys_noError()) {
248 gross 3193 return out;
249     } else {
250     Paso_Preconditioner_LocalAMG_free(out);
251     return NULL;
252     }
253     }
254    
255    
256 gross 3283 void Paso_Preconditioner_LocalAMG_solve(Paso_SparseMatrix* A, Paso_Preconditioner_LocalAMG * amg, double * x, double * b) {
257     const dim_t n = amg->n * amg->n_block;
258     double time0=0;
259     const dim_t post_sweeps=amg->post_sweeps;
260     const dim_t pre_sweeps=amg->pre_sweeps;
261 gross 3193
262 gross 3283 /* presmoothing */
263     time0=Esys_timer();
264     Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
265     time0=Esys_timer()-time0;
266     if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
267     /* end of presmoothing */
268    
269     if (amg->n_F < amg->n) { /* is there work on the coarse level? */
270     time0=Esys_timer();
271     Paso_Copy(n, amg->r, b); /* r <- b */
272 gross 3312 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
273     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */
274 gross 3283 time0=Esys_timer()-time0;
275     /* coarse level solve */
276     if ( amg->AMG_C == NULL) {
277     time0=Esys_timer();
278     /* A_C is the coarsest level */
279     switch (amg->A_C->solver_package) {
280     case (PASO_MKL):
281     Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
282     break;
283     case (PASO_UMFPACK):
284     Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
285     break;
286     case (PASO_SMOOTHER):
287     Paso_Preconditioner_LocalSmoother_solve(amg->A_C, amg->Smoother,amg->x_C,amg->b_C,pre_sweeps, FALSE);
288     break;
289     }
290     if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
291     } else {
292     Paso_Preconditioner_LocalAMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */
293     }
294     time0=time0+Esys_timer();
295 gross 3303 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
296 gross 3283
297     /*postsmoothing*/
298    
299     /*solve Ax=b with initial guess x */
300     time0=Esys_timer();
301     Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
302     time0=Esys_timer()-time0;
303     if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
304     /*end of postsmoothing*/
305    
306     }
307     return;
308     }
309 gross 3193
310 gross 3283 /* theta = threshold for strong connections */
311     /* tau = threshold for diagonal dominance */
312 gross 3193
313 gross 3303 /*S_i={j \in N_i; i strongly coupled to j}
314    
315     in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
316     */
317    
318     void Paso_Preconditioner_AMG_setStrongConnections(Paso_SparseMatrix* A,
319     dim_t *degree, index_t *S,
320     const double theta, const double tau)
321 gross 3283 {
322     const dim_t n=A->numRows;
323 gross 3303 index_t iptr, i,j;
324 gross 3283 dim_t kdeg;
325     double max_offdiagonal, threshold, sum_row, main_row, fnorm;
326 gross 3193
327 gross 3303
328 gross 3283 #pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static)
329     for (i=0;i<n;++i) {
330 gross 3193
331 gross 3283 max_offdiagonal = 0.;
332     sum_row=0;
333     main_row=0;
334     #pragma ivdep
335     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
336     j=A->pattern->index[iptr];
337     fnorm=ABS(A->val[iptr]);
338    
339     if( j != i) {
340     max_offdiagonal = MAX(max_offdiagonal,fnorm);
341     sum_row+=fnorm;
342     } else {
343     main_row=fnorm;
344     }
345     }
346     threshold = theta*max_offdiagonal;
347     kdeg=0;
348     if (tau*main_row < sum_row) { /* no diagonal domainance */
349     #pragma ivdep
350     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
351     j=A->pattern->index[iptr];
352     if(ABS(A->val[iptr])>threshold && i!=j) {
353     S[A->pattern->ptr[i]+kdeg] = j;
354     kdeg++;
355     }
356     }
357     }
358     degree[i]=kdeg;
359     }
360 gross 3303
361 gross 3283 }
362 gross 3193
363 gross 3283 /* theta = threshold for strong connections */
364     /* tau = threshold for diagonal dominance */
365 gross 3303 /*S_i={j \in N_i; i strongly coupled to j}
366 gross 3283
367 gross 3303 in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
368     */
369     void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SparseMatrix* A,
370     dim_t *degree, index_t *S,
371     const double theta, const double tau)
372    
373 gross 3283 {
374     const dim_t n_block=A->row_block_size;
375     const dim_t n=A->numRows;
376 gross 3303 index_t iptr, i,j, bi;
377 gross 3283 dim_t kdeg, max_deg;
378     register double max_offdiagonal, threshold, fnorm, sum_row, main_row;
379     double *rtmp;
380    
381    
382     #pragma omp parallel private(i,iptr,max_offdiagonal, kdeg, threshold,j, max_deg, fnorm, sum_row, main_row, rtmp)
383     {
384     max_deg=0;
385     #pragma omp for schedule(static)
386     for (i=0;i<n;++i) max_deg=MAX(max_deg, A->pattern->ptr[i+1]-A->pattern->ptr[i]);
387 gross 3193
388 gross 3283 rtmp=TMPMEMALLOC(max_deg, double);
389    
390     #pragma omp for schedule(static)
391     for (i=0;i<n;++i) {
392 gross 3193
393 gross 3283 max_offdiagonal = 0.;
394     sum_row=0;
395     main_row=0;
396     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
397     j=A->pattern->index[iptr];
398     fnorm=0;
399     #pragma ivdep
400     for(bi=0;bi<n_block*n_block;++bi) fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
401     fnorm=sqrt(fnorm);
402 gross 3287 rtmp[iptr-A->pattern->ptr[i]]=fnorm;
403 gross 3283 if( j != i) {
404     max_offdiagonal = MAX(max_offdiagonal,fnorm);
405     sum_row+=fnorm;
406     } else {
407     main_row=fnorm;
408     }
409     }
410     threshold = theta*max_offdiagonal;
411 gross 3193
412 gross 3283 kdeg=0;
413     if (tau*main_row < sum_row) { /* no diagonal domainance */
414     #pragma ivdep
415     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
416     j=A->pattern->index[iptr];
417     if(rtmp[iptr-A->pattern->ptr[i]] > threshold && i!=j) {
418     S[A->pattern->ptr[i]+kdeg] = j;
419     kdeg++;
420     }
421     }
422     }
423     degree[i]=kdeg;
424     }
425     TMPMEMFREE(rtmp);
426     } /* end of parallel region */
427 gross 3303
428 gross 3283 }
429    
430     /* the runge stueben coarsening algorithm: */
431 gross 3303 void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset,
432     const dim_t* degree, const index_t* S,
433 gross 3283 index_t*split_marker)
434     {
435     index_t *lambda=NULL, j, *ST=NULL;
436     dim_t i,k, p, q, *degreeT=NULL, itmp;
437 gross 3323 double time0=0;
438 gross 3283
439     if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
440    
441     lambda=TMPMEMALLOC(n, index_t);
442     degreeT=TMPMEMALLOC(n, dim_t);
443     ST=TMPMEMALLOC(offset[n], index_t);
444    
445     if (! ( Esys_checkPtr(lambda) || Esys_checkPtr(degreeT) || Esys_checkPtr(ST) ) ) {
446 gross 3323 time0=Esys_timer();
447    
448 gross 3283 /* initialize split_marker and split_marker :*/
449     /* those unknows which are not influenced go into F, the rest is available for F or C */
450     #pragma omp parallel for private(i) schedule(static)
451     for (i=0;i<n;++i) {
452     degreeT[i]=0;
453     if (degree[i]>0) {
454     lambda[i]=0;
455     split_marker[i]=PASO_AMG_UNDECIDED;
456     } else {
457     split_marker[i]=PASO_AMG_IN_F;
458     lambda[i]=-1;
459     }
460     }
461     /* create transpose :*/
462     for (i=0;i<n;++i) {
463     for (p=0; p<degree[i]; ++p) {
464     j=S[offset[i]+p];
465     ST[offset[j]+degreeT[j]]=i;
466     degreeT[j]++;
467     }
468     }
469     /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
470     #pragma omp parallel for private(i, j, itmp) schedule(static)
471     for (i=0;i<n;++i) {
472     if (split_marker[i]==PASO_AMG_UNDECIDED) {
473     itmp=lambda[i];
474     for (p=0; p<degreeT[i]; ++p) {
475     j=ST[offset[i]+p];
476     if (split_marker[j]==PASO_AMG_UNDECIDED) {
477     itmp++;
478     } else { /* at this point there are no C points */
479     itmp+=2;
480     }
481     }
482     lambda[i]=itmp;
483     }
484     }
485 gross 3323 /* printf("timing: %e\n",Esys_timer()-time0); */
486     time0=Esys_timer();
487 gross 3283 /* start search :*/
488     i=Paso_Util_arg_max(n,lambda);
489     while (lambda[i]>-1) { /* is there any undecided unknowns? */
490 gross 3193
491 gross 3283 /* the unknown i is moved to C */
492     split_marker[i]=PASO_AMG_IN_C;
493     lambda[i]=-1; /* lambda fro unavailable unknowns is set to -1 */
494    
495     /* all undecided unknown strongly coupled to i are moved to F */
496     for (p=0; p<degreeT[i]; ++p) {
497     j=ST[offset[i]+p];
498    
499     if (split_marker[j]==PASO_AMG_UNDECIDED) {
500    
501     split_marker[j]=PASO_AMG_IN_F;
502     lambda[j]=-1;
503    
504     for (q=0; q<degreeT[j]; ++q) {
505     k=ST[offset[j]+q];
506     if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
507     }
508 gross 3193
509 gross 3283 }
510     }
511     for (p=0; p<degree[i]; ++p) {
512     j=S[offset[i]+p];
513     if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
514     }
515    
516     i=Paso_Util_arg_max(n,lambda);
517     }
518     }
519 gross 3323 /* printf("timing: %e\n",Esys_timer()-time0); */
520 gross 3283 TMPMEMFREE(lambda);
521     TMPMEMFREE(ST);
522     TMPMEMFREE(degreeT);
523 gross 3193 }

  ViewVC Help
Powered by ViewVC 1.1.26