/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Diff of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3315 by gross, Wed Oct 27 01:20:27 2010 UTC revision 3403 by gross, Tue Dec 7 08:13:51 2010 UTC
# Line 51  void Paso_Preconditioner_LocalAMG_free(P Line 51  void Paso_Preconditioner_LocalAMG_free(P
51       }       }
52  }  }
53    
54    index_t Paso_Preconditioner_LocalAMG_getMaxLevel(const Paso_Preconditioner_LocalAMG * in) {
55       if (in->AMG_C == NULL) {
56          return in->level;
57       } else {
58          return Paso_Preconditioner_LocalAMG_getMaxLevel(in->AMG_C);
59       }
60    }
61    double Paso_Preconditioner_LocalAMG_getCoarseLevelSparsity(const Paso_Preconditioner_LocalAMG * in) {
62          if (in->AMG_C == NULL) {
63         if (in->A_C == NULL) {
64            return 1.;
65         } else {
66            return DBLE(in->A_C->pattern->len)/DBLE(in->A_C->numRows)/DBLE(in->A_C->numRows);
67         }
68          } else {
69            return Paso_Preconditioner_LocalAMG_getCoarseLevelSparsity(in->AMG_C);
70          }
71    }
72    dim_t Paso_Preconditioner_LocalAMG_getNumCoarseUnknwons(const Paso_Preconditioner_LocalAMG * in) {
73       if (in->AMG_C == NULL) {
74          if (in->A_C == NULL) {
75         return 0;
76          } else {
77         return in->A_C->numRows;
78          }
79       } else {
80         return Paso_Preconditioner_LocalAMG_getNumCoarseUnknwons(in->AMG_C);
81       }
82    }
83  /*****************************************************************  /*****************************************************************
84    
85     constructs AMG     constructs AMG
# Line 76  Paso_Preconditioner_LocalAMG* Paso_Preco Line 105  Paso_Preconditioner_LocalAMG* Paso_Preco
105                
106    */    */
107    if ( (A_p->pattern->len >= options->min_coarse_sparsity * n * n ) || (n <= options->min_coarse_matrix_size) || (level > options->level_max) ) {    if ( (A_p->pattern->len >= options->min_coarse_sparsity * n * n ) || (n <= options->min_coarse_matrix_size) || (level > options->level_max) ) {
108       if (verbose) printf("Paso: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",       if (verbose) printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
109      level,  options->level_max, A_p->pattern->len/(1.*n * n), options->min_coarse_sparsity, n, options->min_coarse_matrix_size  );        level,  options->level_max, A_p->pattern->len/(1.*n * n), options->min_coarse_sparsity, n, options->min_coarse_matrix_size  );  
110       return NULL;       return NULL;
111    }    }
# Line 97  Paso_Preconditioner_LocalAMG* Paso_Preco Line 126  Paso_Preconditioner_LocalAMG* Paso_Preco
126       } else {       } else {
127             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree, S, theta,tau);             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree, S, theta,tau);
128       }       }
129       Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree, S, split_marker);       Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree, S, split_marker, options->usePanel);
130       options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);       options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
131            
132       if (Esys_noError() ) {       if (Esys_noError() ) {
# Line 109  Paso_Preconditioner_LocalAMG* Paso_Preco Line 138  Paso_Preconditioner_LocalAMG* Paso_Preco
138          */          */
139          n_F=Paso_Util_cumsum_maskedTrue(n,counter, split_marker);          n_F=Paso_Util_cumsum_maskedTrue(n,counter, split_marker);
140          n_C=n-n_F;          n_C=n-n_F;
141          if (verbose) printf("Paso: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);          if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
142            
143          if ( n_F == 0 ) {  /*  is a nasty case. a direct solver should be used, return NULL */          if ( n_F == 0 ) {  /*  is a nasty case. a direct solver should be used, return NULL */
144             out = NULL;             out = NULL;
145          } else {          } else {
146             out=MEMALLOC(1,Paso_Preconditioner_LocalAMG);             out=MEMALLOC(1,Paso_Preconditioner_LocalAMG);
147             mask_C=TMPMEMALLOC(n,index_t);             if (! Esys_checkPtr(out)) {
            rows_in_F=TMPMEMALLOC(n_F,index_t);  
            if ( !( Esys_checkPtr(mask_C) || Esys_checkPtr(rows_in_F) || Esys_checkPtr(out)) ) {  
148            out->level = level;            out->level = level;
149            out->n = n;            out->n = n;
150            out->n_F = n_F;            out->n_F = n_F;
# Line 131  Paso_Preconditioner_LocalAMG* Paso_Preco Line 158  Paso_Preconditioner_LocalAMG* Paso_Preco
158            out->x_C = NULL;            out->x_C = NULL;
159            out->b_C = NULL;            out->b_C = NULL;
160            out->AMG_C = NULL;            out->AMG_C = NULL;
161              out->Smoother=NULL;
162               }
163               mask_C=TMPMEMALLOC(n,index_t);
164               rows_in_F=TMPMEMALLOC(n_F,index_t);
165               Esys_checkPtr(mask_C);
166               Esys_checkPtr(rows_in_F);
167               if ( Esys_noError() ) {
168    
169            out->Smoother = Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);            out->Smoother = Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
170                    
171            if ( n_F < n ) { /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */            if (n_C != 0) {
172                   /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
173        
174              /* allocate helpers :*/              /* allocate helpers :*/
175              out->x_C=MEMALLOC(n_block*n_C,double);              out->x_C=MEMALLOC(n_block*n_C,double);
# Line 141  Paso_Preconditioner_LocalAMG* Paso_Preco Line 177  Paso_Preconditioner_LocalAMG* Paso_Preco
177              out->r=MEMALLOC(n_block*n,double);              out->r=MEMALLOC(n_block*n,double);
178                            
179              Esys_checkPtr(out->r);              Esys_checkPtr(out->r);
             Esys_checkPtr(out->Smoother);  
180              Esys_checkPtr(out->x_C);              Esys_checkPtr(out->x_C);
181              Esys_checkPtr(out->b_C);              Esys_checkPtr(out->b_C);
182                            
183              /* creates index for F:*/              if ( Esys_noError() ) {
184              #pragma omp parallel for private(i) schedule(static)                 /* creates index for F:*/
185              for (i = 0; i < n; ++i) {                 #pragma omp parallel private(i)
186                 if  (split_marker[i]) rows_in_F[counter[i]]=i;                 {
187              }                              #pragma omp for schedule(static)
188              /*  create mask of C nodes with value >-1 gives new id */                    for (i = 0; i < n; ++i) {
189              i=Paso_Util_cumsum_maskedFalse(n,counter, split_marker);                   if  (split_marker[i]) rows_in_F[counter[i]]=i;
190                      }
             #pragma omp parallel for private(i) schedule(static)  
             for (i = 0; i < n; ++i) {  
                if  (split_marker[i]) {  
                   mask_C[i]=-1;  
                } else {  
                   mask_C[i]=counter[i];;  
191                 }                 }
192              }                 /*  create mask of C nodes with value >-1 gives new id */
193              /*                 i=Paso_Util_cumsum_maskedFalse(n,counter, split_marker);
194    
195                   #pragma omp parallel for private(i) schedule(static)
196                   for (i = 0; i < n; ++i) {
197                      if  (split_marker[i]) {
198                     mask_C[i]=-1;
199                      } else {
200                     mask_C[i]=counter[i];;
201                      }
202                   }
203                   /*
204                    get Restriction :                      get Restriction :  
205              */                                   */                  
206              time0=Esys_timer();                 time0=Esys_timer();
207              out->P=Paso_Preconditioner_AMG_getDirectProlongation(A_p,degree,S,n_C,mask_C);                 out->P=Paso_Preconditioner_AMG_getDirectProlongation(A_p,degree,S,n_C,mask_C);
208              if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);                 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
209      /*                    }
210                /*      
211                 construct Prolongation operator as transposed of restriction operator:                 construct Prolongation operator as transposed of restriction operator:
212              */              */
213              if ( Esys_noError()) {              if ( Esys_noError()) {
# Line 203  Paso_Preconditioner_LocalAMG* Paso_Preco Line 243  Paso_Preconditioner_LocalAMG* Paso_Preco
243                      out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);                      out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
244                      Paso_SparseMatrix_free(A_C);                      Paso_SparseMatrix_free(A_C);
245                      out->A_C->solver_package = PASO_MKL;                      out->A_C->solver_package = PASO_MKL;
246                      if (verbose) printf("Paso: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C);                      if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
247                    #else                    #else
248                      #ifdef UMFPACK                      #ifdef UMFPACK
249                         out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);                         out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
250                         Paso_SparseMatrix_free(A_C);                         Paso_SparseMatrix_free(A_C);
251                         out->A_C->solver_package = PASO_UMFPACK;                         out->A_C->solver_package = PASO_UMFPACK;
252                         if (verbose) printf("Paso: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C);                         if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
253                      #else                      #else
254                         out->A_C=A_C;                         out->A_C=A_C;
255                         out->A_C->solver_p=Paso_Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);                         out->A_C->solver_p=Paso_Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
256                         out->A_C->solver_package = PASO_SMOOTHER;                         out->A_C->solver_package = PASO_SMOOTHER;
257                         if (verbose) printf("Paso: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C);                         if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
258                      #endif                      #endif
259                    #endif                    #endif
260                 } else {                 } else {
# Line 274  void Paso_Preconditioner_LocalAMG_solve( Line 314  void Paso_Preconditioner_LocalAMG_solve(
314            Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);            Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
315            break;            break;
316             case (PASO_SMOOTHER):             case (PASO_SMOOTHER):
317            Paso_Preconditioner_LocalSmoother_solve(amg->A_C, amg->Smoother,amg->x_C,amg->b_C,pre_sweeps, FALSE);            Paso_Preconditioner_LocalSmoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
318            break;            break;
319          }          }
320          if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);          if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
# Line 316  void Paso_Preconditioner_AMG_setStrongCo Line 356  void Paso_Preconditioner_AMG_setStrongCo
356    
357    
358        #pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static)        #pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static)
359        for (i=0;i<n;++i) {        for (i=0;i<n;++i) {        
             
360       max_offdiagonal = 0.;       max_offdiagonal = 0.;
361       sum_row=0;       sum_row=0;
362       main_row=0;       main_row=0;
# Line 335  void Paso_Preconditioner_AMG_setStrongCo Line 374  void Paso_Preconditioner_AMG_setStrongCo
374       }       }
375       threshold = theta*max_offdiagonal;       threshold = theta*max_offdiagonal;
376       kdeg=0;       kdeg=0;
377        
378       if (tau*main_row < sum_row) { /* no diagonal domainance */       if (tau*main_row < sum_row) { /* no diagonal domainance */
379          #pragma ivdep          #pragma ivdep
380          for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {          for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
# Line 419  void Paso_Preconditioner_AMG_setStrongCo Line 459  void Paso_Preconditioner_AMG_setStrongCo
459    
460  /* the runge stueben coarsening algorithm: */  /* the runge stueben coarsening algorithm: */
461  void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset,  void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset,
462                           const dim_t* degree, const index_t* S,                          const dim_t* degree, const index_t* S,
463                           index_t*split_marker)                          index_t*split_marker, const bool_t usePanel)
464  {  {
465     index_t *lambda=NULL, j, *ST=NULL;    
466     dim_t i,k, p, q, *degreeT=NULL, itmp;     index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;
467       dim_t i,k, p, q, *degreeT=NULL, len_panel, len_panel_new;
468       register index_t j, itmp;
469        
470     if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */     if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
471        
472     lambda=TMPMEMALLOC(n, index_t);     lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);
473     degreeT=TMPMEMALLOC(n, dim_t);     degreeT=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degreeT);
474     ST=TMPMEMALLOC(offset[n], index_t);     ST=TMPMEMALLOC(offset[n], index_t);  Esys_checkPtr(ST);
475       if (usePanel) {
476          notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);
477          panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);
478       }
479      
480        
481     if (! ( Esys_checkPtr(lambda) || Esys_checkPtr(degreeT) || Esys_checkPtr(ST) ) ) {    
482       if (Esys_noError() ) {
483        /* initialize  split_marker and split_marker :*/        /* initialize  split_marker and split_marker :*/
484        /* those unknows which are not influenced go into F, the rest is available for F or C */        /* those unknows which are not influenced go into F, the rest is available for F or C */
485        #pragma omp parallel for private(i) schedule(static)        #pragma omp parallel for private(i) schedule(static)
# Line 469  void Paso_Preconditioner_AMG_RungeStuebe Line 517  void Paso_Preconditioner_AMG_RungeStuebe
517          lambda[i]=itmp;          lambda[i]=itmp;
518       }       }
519        }        }
520          if (usePanel) {
521         #pragma omp parallel for private(i) schedule(static)
522         for (i=0;i<n;++i) notInPanel[i]=TRUE;
523          }
524        /* start search :*/        /* start search :*/
525        i=Paso_Util_arg_max(n,lambda);        i=Paso_Util_arg_max(n,lambda);
526        while (lambda[i]>-1) { /* is there any undecided unknowns? */        while (lambda[i]>-1) { /* is there any undecided unknown? */
527        
528       /* the unknown i is moved to C */       if (usePanel) {
529       split_marker[i]=PASO_AMG_IN_C;          len_panel=0;
530       lambda[i]=-1;  /* lambda fro unavailable unknowns is set to -1 */          do {
531                   /* the unknown i is moved to C */
532       /* all undecided unknown strongly coupled to i are moved to F */             split_marker[i]=PASO_AMG_IN_C;
533       for (p=0; p<degreeT[i]; ++p) {             lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */
534          j=ST[offset[i]+p];            
535                   /* all undecided unknown strongly coupled to i are moved to F */
536          if (split_marker[j]==PASO_AMG_UNDECIDED) {             for (p=0; p<degreeT[i]; ++p) {
537                    j=ST[offset[i]+p];
538             split_marker[j]=PASO_AMG_IN_F;            
539             lambda[j]=-1;            if (split_marker[j]==PASO_AMG_UNDECIDED) {
540                      
541             for (q=0; q<degreeT[j]; ++q) {               split_marker[j]=PASO_AMG_IN_F;
542            k=ST[offset[j]+q];               lambda[j]=-1;
543            if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;              
544                 for (q=0; q<degreeT[j]; ++q) {
545                k=ST[offset[j]+q];
546                if (split_marker[k]==PASO_AMG_UNDECIDED) {
547                   lambda[k]++;
548                   if (notInPanel[k]) {
549                      notInPanel[k]=FALSE;
550                      panel[len_panel]=k;
551                      len_panel++;
552                   }
553    
554                }    /* the unknown i is moved to C */
555                split_marker[i]=PASO_AMG_IN_C;
556                lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */
557                 }
558                
559              }
560               }
561               for (p=0; p<degree[i]; ++p) {
562              j=S[offset[i]+p];
563              if (split_marker[j]==PASO_AMG_UNDECIDED) {
564                 lambda[j]--;
565                 if (notInPanel[j]) {
566                notInPanel[j]=FALSE;
567                panel[len_panel]=j;
568                len_panel++;
569                 }
570              }
571               }
572    
573               /* consolidate panel */
574               /* remove lambda[q]=-1 */
575               lambda_max=-1;
576               i=-1;
577               len_panel_new=0;
578               for (q=0; q<len_panel; q++) {
579                 k=panel[q];
580                 lambda_k=lambda[k];
581                 if (split_marker[k]==PASO_AMG_UNDECIDED) {
582                panel[len_panel_new]=k;
583                len_panel_new++;
584    
585                if (lambda_max == lambda_k) {
586                   if (k<i) i=k;
587                } else if (lambda_max < lambda_k) {
588                   lambda_max =lambda_k;
589                   i=k;
590                }
591                 }
592             }             }
593               len_panel=len_panel_new;
594            } while (len_panel>0);    
595         } else {
596            /* the unknown i is moved to C */
597            split_marker[i]=PASO_AMG_IN_C;
598            lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */
599            
600            /* all undecided unknown strongly coupled to i are moved to F */
601            for (p=0; p<degreeT[i]; ++p) {
602               j=ST[offset[i]+p];
603               if (split_marker[j]==PASO_AMG_UNDECIDED) {
604            
605              split_marker[j]=PASO_AMG_IN_F;
606              lambda[j]=-1;
607            
608              for (q=0; q<degreeT[j]; ++q) {
609                 k=ST[offset[j]+q];
610                 if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
611              }
612    
613               }
614          }          }
615            for (p=0; p<degree[i]; ++p) {
616               j=S[offset[i]+p];
617               if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
618            }
619            
620       }       }
      for (p=0; p<degree[i]; ++p) {  
         j=S[offset[i]+p];  
         if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;  
      }  
       
621       i=Paso_Util_arg_max(n,lambda);       i=Paso_Util_arg_max(n,lambda);
622        }        }
623     }     }
624     TMPMEMFREE(lambda);     TMPMEMFREE(lambda);
625     TMPMEMFREE(ST);     TMPMEMFREE(ST);
626     TMPMEMFREE(degreeT);     TMPMEMFREE(degreeT);
627       TMPMEMFREE(panel);
628       TMPMEMFREE(notInPanel);
629  }  }

Legend:
Removed from v.3315  
changed lines
  Added in v.3403

  ViewVC Help
Powered by ViewVC 1.1.26