/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Diff of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3351 by gross, Tue Nov 16 02:39:40 2010 UTC revision 3442 by gross, Tue Jan 18 01:47:36 2011 UTC
# Line 14  Line 14 
14    
15  /**************************************************************/  /**************************************************************/
16    
17  /* Paso: AMG preconditioner                                  */  /* Paso: AMG preconditioner  (local version)                  */
18    
19  /**************************************************************/  /**************************************************************/
20    
# Line 35  Line 35 
35    
36  /* free all memory used by AMG                                */  /* free all memory used by AMG                                */
37    
38  void Paso_Preconditioner_LocalAMG_free(Paso_Preconditioner_LocalAMG * in) {  void Paso_Preconditioner_AMG_free(Paso_Preconditioner_AMG * in) {
39       if (in!=NULL) {       if (in!=NULL) {
40      Paso_Preconditioner_LocalSmoother_free(in->Smoother);      Paso_Preconditioner_Smoother_free(in->Smoother);
41      Paso_SparseMatrix_free(in->P);      Paso_SystemMatrix_free(in->P);
42      Paso_SparseMatrix_free(in->R);      Paso_SystemMatrix_free(in->R);
43      Paso_SparseMatrix_free(in->A_C);      Paso_SystemMatrix_free(in->A_C);
44      Paso_Preconditioner_LocalAMG_free(in->AMG_C);      Paso_Preconditioner_AMG_free(in->AMG_C);
45      MEMFREE(in->r);      MEMFREE(in->r);
46      MEMFREE(in->x_C);      MEMFREE(in->x_C);
47      MEMFREE(in->b_C);      MEMFREE(in->b_C);
48    
       
49      MEMFREE(in);      MEMFREE(in);
50       }       }
51  }  }
52    
53    index_t Paso_Preconditioner_AMG_getMaxLevel(const Paso_Preconditioner_AMG * in) {
54       if (in->AMG_C == NULL) {
55          return in->level;
56       } else {
57          return Paso_Preconditioner_AMG_getMaxLevel(in->AMG_C);
58       }
59    }
60    double Paso_Preconditioner_AMG_getCoarseLevelSparsity(const Paso_Preconditioner_AMG * in) {
61          if (in->AMG_C == NULL) {
62         if (in->A_C == NULL) {
63            return 1.;
64         } else {
65            return Paso_SystemMatrix_getSparsity(in->A_C);
66         }
67          } else {
68            return Paso_Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
69          }
70    }
71    dim_t Paso_Preconditioner_AMG_getNumCoarseUnknwons(const Paso_Preconditioner_AMG * in) {
72       if (in->AMG_C == NULL) {
73          if (in->A_C == NULL) {
74         return 0;
75          } else {
76         Paso_SystemMatrix_getTotalNumRows(in->A_C);
77          }
78       } else {
79         return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);
80       }
81    }
82  /*****************************************************************  /*****************************************************************
83    
84     constructs AMG     constructs AMG
85        
86  ******************************************************************/  ******************************************************************/
87  Paso_Preconditioner_LocalAMG* Paso_Preconditioner_LocalAMG_alloc(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {  Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
88    
89    Paso_Preconditioner_LocalAMG* out=NULL;    Paso_Preconditioner_AMG* out=NULL;
90    bool_t verbose=options->verbose;    bool_t verbose=options->verbose;
91        
92    Paso_SparseMatrix *Atemp=NULL, *A_C=NULL;    Paso_SystemMatrix *Atemp=NULL, *A_C=NULL;
93    const dim_t n=A_p->numRows;    const dim_t n=A_p->numRows;
94    const dim_t n_block=A_p->row_block_size;    const dim_t n_block=A_p->row_block_size;
95    index_t* split_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL, *S=NULL, *degree=NULL;    index_t* F_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL, *S=NULL, *degree_S=NULL;
96    dim_t n_F=0, n_C=0, i;    dim_t n_F=0, n_C=0, i;
97    double time0=0;    double time0=0;
98    const double theta = options->coarsening_threshold;    const double theta = options->coarsening_threshold;
99    const double tau = options->diagonal_dominance_threshold;    const double tau = options->diagonal_dominance_threshold;
100        const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
101      const dim_t total_n=Paso_SystemMatrix_getGlobalTotalNumRows(A_p);
102    
103        
104    /*    /*
105        is the input matrix A suitable for coarsening        is the input matrix A suitable for coarsening
106                
107    */    */
108    if ( (A_p->pattern->len >= options->min_coarse_sparsity * n * n ) || (n <= options->min_coarse_matrix_size) || (level > options->level_max) ) {    if ( (sparsity >= options->min_coarse_sparsity) ||
109       if (verbose) printf("Paso_Solver: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",         (total_n <= options->min_coarse_matrix_size) ||
110      level,  options->level_max, A_p->pattern->len/(1.*n * n), options->min_coarse_sparsity, n, options->min_coarse_matrix_size  );           (level > options->level_max) ) {
111    
112        if (verbose) {
113              /*
114                  print stopping condition:
115                          - 'SPAR' = min_coarse_matrix_sparsity exceeded
116                          - 'SIZE' = min_coarse_matrix_size exceeded
117                          - 'LEVEL' = level_max exceeded
118              */
119              printf("Paso_Preconditioner: AMG: termination of coarsening by ");
120    
121              if (sparsity >= options->min_coarse_sparsity)
122                  printf("SPAR ");
123    
124              if (total_n <= options->min_coarse_matrix_size)
125                  printf("SIZE ");
126    
127              if (level > options->level_max)
128                  printf("LEVEL ");
129    
130              printf("\n");
131    
132            printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
133               level,  options->level_max, sparsity, options->min_coarse_sparsity, total_n, options->min_coarse_matrix_size);  
134    
135        }
136    
137       return NULL;       return NULL;
138    }    }
139       /* Start Coarsening : */       /* Start Coarsening : */
140    
141       split_marker=TMPMEMALLOC(n,index_t);       F_marker=TMPMEMALLOC(n,index_t);
142       counter=TMPMEMALLOC(n,index_t);       counter=TMPMEMALLOC(n,index_t);
143       degree=TMPMEMALLOC(n, dim_t);       degree_S=TMPMEMALLOC(n, dim_t);
144       S=TMPMEMALLOC(A_p->pattern->len, index_t);       S=TMPMEMALLOC(A_p->pattern->len, index_t);
145       if ( !( Esys_checkPtr(split_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree) || Esys_checkPtr(S) ) ) {       if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(S) ) ) {
146       /*       /*
147            set splitting of unknows:            set splitting of unknows:
148                
149           */           */
150       time0=Esys_timer();       time0=Esys_timer();
151       if (n_block>1) {       if (n_block>1) {
152             Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree, S, theta,tau);             Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree_S, S, theta,tau);
153       } else {       } else {
154             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree, S, theta,tau);             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, S, theta,tau);
155       }       }
156       Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree, S, split_marker);  
157    /*MPI:
158         Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree_S, S, F_marker, options->usePanel);
159    */
160        
161             /* in BoomerAMG interpolation is used FF connectiovity is required :*/
162    /*MPI:
163             if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
164                                 Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);  
165    */
166    
167       options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);       options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
168            
169       if (Esys_noError() ) {       if (Esys_noError() ) {
170          #pragma omp parallel for private(i) schedule(static)          #pragma omp parallel for private(i) schedule(static)
171          for (i = 0; i < n; ++i) split_marker[i]= (split_marker[i] == PASO_AMG_IN_F);          for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] ==  PASO_AMG_IN_F);
172            
173          /*          /*
174             count number of unkowns to be eliminated:             count number of unkowns to be eliminated:
175          */          */
176          n_F=Paso_Util_cumsum_maskedTrue(n,counter, split_marker);          n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
177          n_C=n-n_F;          n_C=n-n_F;
178          if (verbose) printf("Paso_Solver: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);          if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
179            
180          if ( n_F == 0 ) {  /*  is a nasty case. a direct solver should be used, return NULL */          if ( n_F == 0 ) {  /*  is a nasty case. a direct solver should be used, return NULL */
181             out = NULL;             out = NULL;
182          } else {          } else {
183             out=MEMALLOC(1,Paso_Preconditioner_LocalAMG);             out=MEMALLOC(1,Paso_Preconditioner_AMG);
184             if (! Esys_checkPtr(out)) {             if (! Esys_checkPtr(out)) {
185            out->level = level;            out->level = level;
186            out->n = n;            out->n = n;
# Line 137  Paso_Preconditioner_LocalAMG* Paso_Preco Line 203  Paso_Preconditioner_LocalAMG* Paso_Preco
203             Esys_checkPtr(rows_in_F);             Esys_checkPtr(rows_in_F);
204             if ( Esys_noError() ) {             if ( Esys_noError() ) {
205    
206            out->Smoother = Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);            out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
207                
208            if ( n_F < n ) { /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */            if (n_C != 0) {
209                   /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
210        
211              /* allocate helpers :*/              /* allocate helpers :*/
212              out->x_C=MEMALLOC(n_block*n_C,double);              out->x_C=MEMALLOC(n_block*n_C,double);
# Line 156  Paso_Preconditioner_LocalAMG* Paso_Preco Line 223  Paso_Preconditioner_LocalAMG* Paso_Preco
223                 {                 {
224                    #pragma omp for schedule(static)                    #pragma omp for schedule(static)
225                    for (i = 0; i < n; ++i) {                    for (i = 0; i < n; ++i) {
226                   if  (split_marker[i]) rows_in_F[counter[i]]=i;                   if  (F_marker[i]) rows_in_F[counter[i]]=i;
227                    }                    }
228                 }                 }
229                 /*  create mask of C nodes with value >-1 gives new id */                 /*  create mask of C nodes with value >-1 gives new id */
230                 i=Paso_Util_cumsum_maskedFalse(n,counter, split_marker);                 i=Paso_Util_cumsum_maskedFalse(n,counter, F_marker);
231    
232                 #pragma omp parallel for private(i) schedule(static)                 #pragma omp parallel for private(i) schedule(static)
233                 for (i = 0; i < n; ++i) {                 for (i = 0; i < n; ++i) {
234                    if  (split_marker[i]) {                    if  (F_marker[i]) {
235                   mask_C[i]=-1;                   mask_C[i]=-1;
236                    } else {                    } else {
237                   mask_C[i]=counter[i];;                   mask_C[i]=counter[i];;
238                    }                    }
239                 }                 }
240                 /*                 /*
241                    get Restriction :                      get Prolongation :    
242                 */                                   */                  
243                 time0=Esys_timer();                 time0=Esys_timer();
244                 out->P=Paso_Preconditioner_AMG_getDirectProlongation(A_p,degree,S,n_C,mask_C);  /*MPI:
245                   out->P=Paso_Preconditioner_AMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);
246    */
247                 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);                 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
248              }              }
249              /*                    /*      
250                 construct Prolongation operator as transposed of restriction operator:                 construct Restriction operator as transposed of Prolongation operator:
251              */              */
252              if ( Esys_noError()) {              if ( Esys_noError()) {
253                 time0=Esys_timer();                 time0=Esys_timer();
254                 out->R=Paso_SparseMatrix_getTranspose(out->P);  /*MPI:
255                 if (SHOW_TIMING) printf("timing: level %d: Paso_SparseMatrix_getTranspose: %e\n",level,Esys_timer()-time0);                 out->R=Paso_SystemMatrix_getTranspose(out->P);
256    */
257                   if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
258              }                    }      
259              /*              /*
260              construct coarse level matrix:              construct coarse level matrix:
261              */              */
262              if ( Esys_noError()) {              if ( Esys_noError()) {
263                 time0=Esys_timer();                 time0=Esys_timer();
264                 Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P);  /*MPI:
265                 A_C=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp);                 Atemp=Paso_SystemMatrix_MatrixMatrix(A_p,out->P);
266                 Paso_SparseMatrix_free(Atemp);                 A_C=Paso_SystemMatrix_MatrixMatrix(out->R,Atemp);
267                  
268                   Paso_SystemMatrix_free(Atemp);
269    */
270    
271                 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);                             if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);            
272              }              }
273    
# Line 202  Paso_Preconditioner_LocalAMG* Paso_Preco Line 277  Paso_Preconditioner_LocalAMG* Paso_Preco
277                                
278              */              */
279              if ( Esys_noError()) {              if ( Esys_noError()) {
280                 out->AMG_C=Paso_Preconditioner_LocalAMG_alloc(A_C,level+1,options);                 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
281              }              }
282              if ( Esys_noError()) {              if ( Esys_noError()) {
283                 if ( out->AMG_C == NULL ) {                 if ( out->AMG_C == NULL ) {
# Line 210  Paso_Preconditioner_LocalAMG* Paso_Preco Line 285  Paso_Preconditioner_LocalAMG* Paso_Preco
285                    out->refinements = options->coarse_matrix_refinements;                    out->refinements = options->coarse_matrix_refinements;
286                    /* no coarse level matrix has been constructed. use direct solver */                    /* no coarse level matrix has been constructed. use direct solver */
287                    #ifdef MKL                    #ifdef MKL
288                      out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);                      out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
289                      Paso_SparseMatrix_free(A_C);                      Paso_SystemMatrix_free(A_C);
290                      out->A_C->solver_package = PASO_MKL;                      out->A_C->solver_package = PASO_MKL;
291                      if (verbose) printf("Paso_Solver: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C);                      if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
292                    #else                    #else
293                      #ifdef UMFPACK                      #ifdef UMFPACK
294                         out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);                         out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
295                         Paso_SparseMatrix_free(A_C);                         Paso_SystemMatrix_free(A_C);
296                         out->A_C->solver_package = PASO_UMFPACK;                         out->A_C->solver_package = PASO_UMFPACK;
297                         if (verbose) printf("Paso_Solver: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C);                         if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
298                      #else                      #else
299                         out->A_C=A_C;                         out->A_C=A_C;
300                         out->A_C->solver_p=Paso_Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);                         out->A_C->solver_p=Paso_Preconditioner_Smoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
301                         out->A_C->solver_package = PASO_SMOOTHER;                         out->A_C->solver_package = PASO_SMOOTHER;
302                         if (verbose) printf("Paso_Solver: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C);                         if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
303                      #endif                      #endif
304                    #endif                    #endif
305                 } else {                 } else {
# Line 240  Paso_Preconditioner_LocalAMG* Paso_Preco Line 315  Paso_Preconditioner_LocalAMG* Paso_Preco
315       }       }
316    }    }
317    TMPMEMFREE(counter);    TMPMEMFREE(counter);
318    TMPMEMFREE(split_marker);    TMPMEMFREE(F_marker);
319    TMPMEMFREE(degree);    TMPMEMFREE(degree_S);
320    TMPMEMFREE(S);    TMPMEMFREE(S);
321    
322    if (Esys_noError()) {    if (Esys_noError()) {
323       return out;       return out;
324    } else  {    } else  {
325       Paso_Preconditioner_LocalAMG_free(out);       Paso_Preconditioner_AMG_free(out);
326       return NULL;       return NULL;
327    }    }
328  }  }
329    
330    
331  void Paso_Preconditioner_LocalAMG_solve(Paso_SparseMatrix* A, Paso_Preconditioner_LocalAMG * amg, double * x, double * b) {  void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
332       const dim_t n = amg->n * amg->n_block;       const dim_t n = amg->n * amg->n_block;
333       double time0=0;       double time0=0;
334       const dim_t post_sweeps=amg->post_sweeps;       const dim_t post_sweeps=amg->post_sweeps;
# Line 261  void Paso_Preconditioner_LocalAMG_solve( Line 336  void Paso_Preconditioner_LocalAMG_solve(
336    
337       /* presmoothing */       /* presmoothing */
338       time0=Esys_timer();       time0=Esys_timer();
339       Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);       Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
340       time0=Esys_timer()-time0;       time0=Esys_timer()-time0;
341       if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);       if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
342       /* end of presmoothing */       /* end of presmoothing */
# Line 269  void Paso_Preconditioner_LocalAMG_solve( Line 344  void Paso_Preconditioner_LocalAMG_solve(
344       if (amg->n_F < amg->n) { /* is there work on the coarse level? */       if (amg->n_F < amg->n) { /* is there work on the coarse level? */
345           time0=Esys_timer();           time0=Esys_timer();
346       Paso_Copy(n, amg->r, b);                            /*  r <- b */       Paso_Copy(n, amg->r, b);                            /*  r <- b */
347       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
348       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->R,amg->r,0.,amg->b_C);  /* b_c = R*r  */       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->R,amg->r,0.,amg->b_C);  /* b_c = R*r  */
349           time0=Esys_timer()-time0;           time0=Esys_timer()-time0;
350       /* coarse level solve */       /* coarse level solve */
351       if ( amg->AMG_C == NULL) {       if ( amg->AMG_C == NULL) {
# Line 284  void Paso_Preconditioner_LocalAMG_solve( Line 359  void Paso_Preconditioner_LocalAMG_solve(
359            Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);            Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
360            break;            break;
361             case (PASO_SMOOTHER):             case (PASO_SMOOTHER):
362            Paso_Preconditioner_LocalSmoother_solve(amg->A_C, amg->Smoother,amg->x_C,amg->b_C,pre_sweeps, FALSE);            Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
363            break;            break;
364          }          }
365          if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);          if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
366       } else {       } else {
367          Paso_Preconditioner_LocalAMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C)     */          Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C)     */
368       }         }  
369       time0=time0+Esys_timer();       time0=time0+Esys_timer();
370       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */           Paso_SystemMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */    
371            
372           /*postsmoothing*/           /*postsmoothing*/
373                
374          /*solve Ax=b with initial guess x */          /*solve Ax=b with initial guess x */
375          time0=Esys_timer();          time0=Esys_timer();
376          Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);          Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
377          time0=Esys_timer()-time0;          time0=Esys_timer()-time0;
378          if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);          if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
379          /*end of postsmoothing*/          /*end of postsmoothing*/
# Line 315  void Paso_Preconditioner_LocalAMG_solve( Line 390  void Paso_Preconditioner_LocalAMG_solve(
390  in the sense that |A_{ij}| >= theta * max_k |A_{ik}|  in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
391  */  */
392    
393  void Paso_Preconditioner_AMG_setStrongConnections(Paso_SparseMatrix* A,  void Paso_Preconditioner_AMG_setStrongConnections(Paso_SystemMatrix* A,
394                        dim_t *degree, index_t *S,                        dim_t *degree_S, index_t *S,
395                        const double theta, const double tau)                        const double theta, const double tau)
396  {  {
397     const dim_t n=A->numRows;     const dim_t n=A->numRows;
# Line 326  void Paso_Preconditioner_AMG_setStrongCo Line 401  void Paso_Preconditioner_AMG_setStrongCo
401    
402    
403        #pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static)        #pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static)
404        for (i=0;i<n;++i) {        for (i=0;i<n;++i) {        
             
405       max_offdiagonal = 0.;       max_offdiagonal = 0.;
406       sum_row=0;       sum_row=0;
407       main_row=0;       main_row=0;
# Line 345  void Paso_Preconditioner_AMG_setStrongCo Line 419  void Paso_Preconditioner_AMG_setStrongCo
419       }       }
420       threshold = theta*max_offdiagonal;       threshold = theta*max_offdiagonal;
421       kdeg=0;       kdeg=0;
422        
423       if (tau*main_row < sum_row) { /* no diagonal domainance */       if (tau*main_row < sum_row) { /* no diagonal domainance */
424          #pragma ivdep          #pragma ivdep
425          for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {          for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
# Line 354  void Paso_Preconditioner_AMG_setStrongCo Line 429  void Paso_Preconditioner_AMG_setStrongCo
429            kdeg++;            kdeg++;
430             }             }
431          }          }
432       }           }
433       degree[i]=kdeg;       degree_S[i]=kdeg;
434        }        }
435    
436  }  }
# Line 366  void Paso_Preconditioner_AMG_setStrongCo Line 441  void Paso_Preconditioner_AMG_setStrongCo
441    
442  in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F  in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
443  */  */
444  void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SparseMatrix* A,  void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SystemMatrix* A,
445                              dim_t *degree, index_t *S,                              dim_t *degree_S, index_t *S,
446                              const double theta, const double tau)                              const double theta, const double tau)
447    
448  {  {
# Line 393  void Paso_Preconditioner_AMG_setStrongCo Line 468  void Paso_Preconditioner_AMG_setStrongCo
468          max_offdiagonal = 0.;          max_offdiagonal = 0.;
469          sum_row=0;          sum_row=0;
470          main_row=0;          main_row=0;
471    
472          for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {          for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
473             j=A->pattern->index[iptr];             j=A->pattern->index[iptr];
474             fnorm=0;             fnorm=0;
# Line 420  void Paso_Preconditioner_AMG_setStrongCo Line 496  void Paso_Preconditioner_AMG_setStrongCo
496            }            }
497             }             }
498          }          }
499          degree[i]=kdeg;          degree_S[i]=kdeg;
500       }             }      
501       TMPMEMFREE(rtmp);       TMPMEMFREE(rtmp);
502        } /* end of parallel region */        } /* end of parallel region */
# Line 428  void Paso_Preconditioner_AMG_setStrongCo Line 504  void Paso_Preconditioner_AMG_setStrongCo
504  }    }  
505    
506  /* the runge stueben coarsening algorithm: */  /* the runge stueben coarsening algorithm: */
507  void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset,  void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset_S,
508                           const dim_t* degree, const index_t* S,                          const dim_t* degree_S, const index_t* S,
509                           index_t*split_marker)                          index_t*split_marker, const bool_t usePanel)
510  {  {
    const bool_t usePanel=FALSE;  
511        
512     index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;     index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;
513     dim_t i,k, p, q, *degreeT=NULL, len_panel, len_panel_new;     dim_t i,k, p, q, *degree_ST=NULL, len_panel, len_panel_new;
514     register index_t j, itmp;     register index_t j, itmp;
515        
516     if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */     if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
517        
518     lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);     lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);
519     degreeT=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degreeT);     degree_ST=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degree_ST);
520     ST=TMPMEMALLOC(offset[n], index_t);  Esys_checkPtr(ST);     ST=TMPMEMALLOC(offset_S[n], index_t);  Esys_checkPtr(ST);
521     if (usePanel) {     if (usePanel) {
522        notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);        notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);
523        panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);        panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);
# Line 455  void Paso_Preconditioner_AMG_RungeStuebe Line 530  void Paso_Preconditioner_AMG_RungeStuebe
530        /* those unknows which are not influenced go into F, the rest is available for F or C */        /* those unknows which are not influenced go into F, the rest is available for F or C */
531        #pragma omp parallel for private(i) schedule(static)        #pragma omp parallel for private(i) schedule(static)
532        for (i=0;i<n;++i) {        for (i=0;i<n;++i) {
533       degreeT[i]=0;       degree_ST[i]=0;
534       if (degree[i]>0) {       if (degree_S[i]>0) {
535          lambda[i]=0;          lambda[i]=0;
536          split_marker[i]=PASO_AMG_UNDECIDED;          split_marker[i]=PASO_AMG_UNDECIDED;
537       } else {       } else {
# Line 466  void Paso_Preconditioner_AMG_RungeStuebe Line 541  void Paso_Preconditioner_AMG_RungeStuebe
541        }        }
542        /* create transpose :*/        /* create transpose :*/
543        for (i=0;i<n;++i) {        for (i=0;i<n;++i) {
544          for (p=0; p<degree[i]; ++p) {          for (p=0; p<degree_S[i]; ++p) {
545             j=S[offset[i]+p];             j=S[offset_S[i]+p];
546             ST[offset[j]+degreeT[j]]=i;             ST[offset_S[j]+degree_ST[j]]=i;
547             degreeT[j]++;             degree_ST[j]++;
548          }          }
549        }        }
550        /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */        /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
# Line 477  void Paso_Preconditioner_AMG_RungeStuebe Line 552  void Paso_Preconditioner_AMG_RungeStuebe
552        for (i=0;i<n;++i) {        for (i=0;i<n;++i) {
553       if (split_marker[i]==PASO_AMG_UNDECIDED) {       if (split_marker[i]==PASO_AMG_UNDECIDED) {
554          itmp=lambda[i];          itmp=lambda[i];
555          for (p=0; p<degreeT[i]; ++p) {          for (p=0; p<degree_ST[i]; ++p) {
556             j=ST[offset[i]+p];             j=ST[offset_S[i]+p];
557             if (split_marker[j]==PASO_AMG_UNDECIDED) {             if (split_marker[j]==PASO_AMG_UNDECIDED) {
558            itmp++;            itmp++;
559             } else {  /* at this point there are no C points */             } else {  /* at this point there are no C points */
# Line 504  void Paso_Preconditioner_AMG_RungeStuebe Line 579  void Paso_Preconditioner_AMG_RungeStuebe
579             lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */             lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */
580                        
581             /* all undecided unknown strongly coupled to i are moved to F */             /* all undecided unknown strongly coupled to i are moved to F */
582             for (p=0; p<degreeT[i]; ++p) {             for (p=0; p<degree_ST[i]; ++p) {
583            j=ST[offset[i]+p];            j=ST[offset_S[i]+p];
584                        
585            if (split_marker[j]==PASO_AMG_UNDECIDED) {            if (split_marker[j]==PASO_AMG_UNDECIDED) {
586                            
587               split_marker[j]=PASO_AMG_IN_F;               split_marker[j]=PASO_AMG_IN_F;
588               lambda[j]=-1;               lambda[j]=-1;
589                            
590               for (q=0; q<degreeT[j]; ++q) {               for (q=0; q<degree_ST[j]; ++q) {
591              k=ST[offset[j]+q];              k=ST[offset_S[j]+q];
592              if (split_marker[k]==PASO_AMG_UNDECIDED) {              if (split_marker[k]==PASO_AMG_UNDECIDED) {
593                 lambda[k]++;                 lambda[k]++;
594                 if (notInPanel[k]) {                 if (notInPanel[k]) {
# Line 529  void Paso_Preconditioner_AMG_RungeStuebe Line 604  void Paso_Preconditioner_AMG_RungeStuebe
604                            
605            }            }
606             }             }
607             for (p=0; p<degree[i]; ++p) {             for (p=0; p<degree_S[i]; ++p) {
608            j=S[offset[i]+p];            j=S[offset_S[i]+p];
609            if (split_marker[j]==PASO_AMG_UNDECIDED) {            if (split_marker[j]==PASO_AMG_UNDECIDED) {
610               lambda[j]--;               lambda[j]--;
611               if (notInPanel[j]) {               if (notInPanel[j]) {
# Line 569  void Paso_Preconditioner_AMG_RungeStuebe Line 644  void Paso_Preconditioner_AMG_RungeStuebe
644          lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */          lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */
645                    
646          /* all undecided unknown strongly coupled to i are moved to F */          /* all undecided unknown strongly coupled to i are moved to F */
647          for (p=0; p<degreeT[i]; ++p) {          for (p=0; p<degree_ST[i]; ++p) {
648             j=ST[offset[i]+p];             j=ST[offset_S[i]+p];
649             if (split_marker[j]==PASO_AMG_UNDECIDED) {             if (split_marker[j]==PASO_AMG_UNDECIDED) {
650                    
651            split_marker[j]=PASO_AMG_IN_F;            split_marker[j]=PASO_AMG_IN_F;
652            lambda[j]=-1;            lambda[j]=-1;
653                    
654            for (q=0; q<degreeT[j]; ++q) {            for (q=0; q<degree_ST[j]; ++q) {
655               k=ST[offset[j]+q];               k=ST[offset_S[j]+q];
656               if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;               if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
657            }            }
658    
659             }             }
660          }          }
661          for (p=0; p<degree[i]; ++p) {          for (p=0; p<degree_S[i]; ++p) {
662             j=S[offset[i]+p];             j=S[offset_S[i]+p];
663             if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;             if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
664          }          }
665                    
666       }       }
667       i=Paso_Util_arg_max(n,lambda);       i=Paso_Util_arg_max(n,lambda);
668        }        }
669              
670     }     }
671     TMPMEMFREE(lambda);     TMPMEMFREE(lambda);
672     TMPMEMFREE(ST);     TMPMEMFREE(ST);
673     TMPMEMFREE(degreeT);     TMPMEMFREE(degree_ST);
674     TMPMEMFREE(panel);     TMPMEMFREE(panel);
675     TMPMEMFREE(notInPanel);     TMPMEMFREE(notInPanel);
676  }  }
677    /* ensures that two F nodes are connected via a C node :*/
678    void Paso_Preconditioner_AMG_enforceFFConnectivity(const dim_t n, const index_t* offset_S,
679                            const dim_t* degree_S, const index_t* S,
680                            index_t*split_marker)
681    {
682          dim_t i, p, q;
683    
684          /* now we make sure that two (strongly) connected F nodes are (strongly) connected via a C node. */
685          for (i=0;i<n;++i) {
686                if ( (split_marker[i]==PASO_AMG_IN_F) && (degree_S[i]>0) ) {
687               for (p=0; p<degree_S[i]; ++p) {
688                      register index_t j=S[offset_S[i]+p];
689                      if ( (split_marker[j]==PASO_AMG_IN_F)  && (degree_S[j]>0) )  {
690                          /* i and j are now two F nodes which are strongly connected */
691                          /* is there a C node they share ? */
692                          register index_t sharing=-1;
693                      for (q=0; q<degree_S[i]; ++q) {
694                             index_t k=S[offset_S[i]+q];
695                             if (split_marker[k]==PASO_AMG_IN_C) {
696                                register index_t* where_k=(index_t*)bsearch(&k, &(S[offset_S[j]]), degree_S[j], sizeof(index_t), Paso_comparIndex);
697                                if (where_k != NULL) {
698                                   sharing=k;
699                                   break;
700                                }
701                             }
702                          }
703                          if (sharing<0) {
704                               if (i<j) {
705                                  split_marker[j]=PASO_AMG_IN_C;
706                               } else {
707                                  split_marker[i]=PASO_AMG_IN_C;
708                                  break;  /* no point to look any further as i is now a C node */
709                               }
710                          }
711                      }
712                   }
713                }
714           }
715    }
716    
717    void Paso_Preconditioner_AMG_CIJPCoarsening( )
718    {
719      const dim_t my_n;
720      const dim_t overlap_n;
721      const dim_t n= my_n + overlap_n;
722       /* set local lambda + overlap */
723       #pragma omp parallel for private(i)
724       for (i=0; i<n ++i) {
725           w[i]=degree_ST[i];
726       }
727       for (i=0; i<my_n; i++) {
728          w2[i]=random;
729       }
730    
731    
732    Paso_Coupler_add(w, 1., w2,
733       /* adjust w accross processors */
734      
735       /*  */
736       global_n_C=0;
737       global_n_F=..;
738      
739       while (global_n_C + global_n_F < global_n) {
740    
741          
742          is_in_D[i]=FALSE;
743          /*  test  local connectivit*/
744          for i ..
745             for k in S_i:
746                 w2[i]=MAX(w2[i],w[k]);
747                 w2[k]=MAX(w2[k],w[i]);
748          /* adjust overlaps by MAX */
749          ...
750          /* points with w[i]>w2[i] become C nodes */
751          
752              
753          
754          
755          
756    
757    
758          global_n_C=?
759          global_n_F=?
760    
761       }
762      
763    }

Legend:
Removed from v.3351  
changed lines
  Added in v.3442

  ViewVC Help
Powered by ViewVC 1.1.26