/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Diff of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3283 by gross, Mon Oct 18 22:39:28 2010 UTC revision 3352 by gross, Tue Nov 16 03:58:09 2010 UTC
# Line 18  Line 18 
18    
19  /**************************************************************/  /**************************************************************/
20    
21  /* Author: artak@uq.edu.au                                */  /* Author: artak@uq.edu.au, l.gross@uq.edu.au                                */
22    
23  /**************************************************************/  /**************************************************************/
24    
# Line 61  Paso_Preconditioner_LocalAMG* Paso_Preco Line 61  Paso_Preconditioner_LocalAMG* Paso_Preco
61    Paso_Preconditioner_LocalAMG* out=NULL;    Paso_Preconditioner_LocalAMG* out=NULL;
62    bool_t verbose=options->verbose;    bool_t verbose=options->verbose;
63        
64    Paso_SparseMatrix* W_FC=NULL, *Atemp=NULL, *A_C=NULL;    Paso_SparseMatrix *Atemp=NULL, *A_C=NULL;
65    const dim_t n=A_p->numRows;    const dim_t n=A_p->numRows;
66    const dim_t n_block=A_p->row_block_size;    const dim_t n_block=A_p->row_block_size;
67    index_t* split_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL;    index_t* split_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL, *S=NULL, *degree=NULL;
68    dim_t n_F=0, n_C=0, i;    dim_t n_F=0, n_C=0, i;
69    double time0=0;    double time0=0;
70    const double theta = options->coarsening_threshold;    const double theta = options->coarsening_threshold;
# Line 76  Paso_Preconditioner_LocalAMG* Paso_Preco Line 76  Paso_Preconditioner_LocalAMG* Paso_Preco
76                
77    */    */
78    if ( (A_p->pattern->len >= options->min_coarse_sparsity * n * n ) || (n <= options->min_coarse_matrix_size) || (level > options->level_max) ) {    if ( (A_p->pattern->len >= options->min_coarse_sparsity * n * n ) || (n <= options->min_coarse_matrix_size) || (level > options->level_max) ) {
79       if (verbose) printf("Paso: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",       if (verbose) printf("Paso_Solver: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
80      level,  options->level_max, A_p->pattern->len/(1.*n * n), options->min_coarse_sparsity, n, options->min_coarse_matrix_size  );        level,  options->level_max, A_p->pattern->len/(1.*n * n), options->min_coarse_sparsity, n, options->min_coarse_matrix_size  );  
81       return NULL;       return NULL;
82    }    }
# Line 84  Paso_Preconditioner_LocalAMG* Paso_Preco Line 84  Paso_Preconditioner_LocalAMG* Paso_Preco
84    
85       split_marker=TMPMEMALLOC(n,index_t);       split_marker=TMPMEMALLOC(n,index_t);
86       counter=TMPMEMALLOC(n,index_t);       counter=TMPMEMALLOC(n,index_t);
87       if ( !( Esys_checkPtr(split_marker) || Esys_checkPtr(counter) ) ) {       degree=TMPMEMALLOC(n, dim_t);
88         S=TMPMEMALLOC(A_p->pattern->len, index_t);
89         if ( !( Esys_checkPtr(split_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree) || Esys_checkPtr(S) ) ) {
90       /*       /*
91            set splitting of unknows:            set splitting of unknows:
92                
93           */           */
94       time0=Esys_timer();       time0=Esys_timer();
95       if (n_block>1) {       if (n_block>1) {
96          Paso_Preconditioner_AMG_RSCoarsening_Block(A_p, split_marker, theta,tau);             Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree, S, theta,tau);
97       } else {       } else {
98          Paso_Preconditioner_AMG_RSCoarsening(A_p, split_marker, theta,tau);             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree, S, theta,tau);
99       }       }
100         Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree, S, split_marker, options->usePanel);
101       options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);       options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
102            
103       if (Esys_noError() ) {       if (Esys_noError() ) {
# Line 106  Paso_Preconditioner_LocalAMG* Paso_Preco Line 109  Paso_Preconditioner_LocalAMG* Paso_Preco
109          */          */
110          n_F=Paso_Util_cumsum_maskedTrue(n,counter, split_marker);          n_F=Paso_Util_cumsum_maskedTrue(n,counter, split_marker);
111          n_C=n-n_F;          n_C=n-n_F;
112          if (verbose) printf("Paso AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);          if (verbose) printf("Paso_Solver: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
113            
114          if ( n_F == 0 ) {  /*  is a nasty case. a direct solver should be used, return NULL */          if ( n_F == 0 ) {  /*  is a nasty case. a direct solver should be used, return NULL */
115             out = NULL;             out = NULL;
116          } else {          } else {
117             out=MEMALLOC(1,Paso_Preconditioner_LocalAMG);             out=MEMALLOC(1,Paso_Preconditioner_LocalAMG);
118             mask_C=TMPMEMALLOC(n,index_t);             if (! Esys_checkPtr(out)) {
            rows_in_F=TMPMEMALLOC(n_F,index_t);  
            if ( !( Esys_checkPtr(mask_C) || Esys_checkPtr(rows_in_F) || Esys_checkPtr(out)) ) {  
119            out->level = level;            out->level = level;
120            out->n = n;            out->n = n;
121            out->n_F = n_F;            out->n_F = n_F;
# Line 128  Paso_Preconditioner_LocalAMG* Paso_Preco Line 129  Paso_Preconditioner_LocalAMG* Paso_Preco
129            out->x_C = NULL;            out->x_C = NULL;
130            out->b_C = NULL;            out->b_C = NULL;
131            out->AMG_C = NULL;            out->AMG_C = NULL;
132              out->Smoother=NULL;
133               }
134               mask_C=TMPMEMALLOC(n,index_t);
135               rows_in_F=TMPMEMALLOC(n_F,index_t);
136               Esys_checkPtr(mask_C);
137               Esys_checkPtr(rows_in_F);
138               if ( Esys_noError() ) {
139    
140            out->Smoother = Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);            out->Smoother = Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
141                    
142            if ( n_F < n ) { /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */            if ( n_F < n ) { /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
143        
              /* creates index for F:*/  
             #pragma omp parallel for private(i) schedule(static)  
             for (i = 0; i < n; ++i) {  
                if  (split_marker[i]) rows_in_F[counter[i]]=i;  
             }            
             /*  create mask of C nodes with value >-1 gives new id */  
             i=Paso_Util_cumsum_maskedFalse(n,counter, split_marker);  
   
             #pragma omp parallel for private(i) schedule(static)  
             for (i = 0; i < n; ++i) {  
                if  (split_marker[i]) {  
                   mask_C[i]=-1;  
                } else {  
                   mask_C[i]=counter[i];;  
                }  
             }  
             /*  
                   get Restriction : (can we do this in one go?)    
             */  
             time0=Esys_timer();  
             W_FC=Paso_SparseMatrix_getSubmatrix(A_p, n_F, n_C, rows_in_F, mask_C);  
             if (SHOW_TIMING) printf("timing: level %d: get Weights: %e\n",level, Esys_timer()-time0);  
               
             time0=Esys_timer();  
             Paso_SparseMatrix_updateWeights(A_p,W_FC,split_marker);  
             if (SHOW_TIMING) printf("timing: level %d: updateWeights: %e\n",level, Esys_timer()-time0);  
               
             time0=Esys_timer();  
             out->P=Paso_SparseMatrix_getProlongation(W_FC,split_marker);  
             if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);  
               
             Paso_SparseMatrix_free(W_FC);  
             /*        
                construct Prolongation operator as transposed of restriction operator:  
             */  
             time0=Esys_timer();  
             out->R=Paso_SparseMatrix_getTranspose(out->P);  
             if (SHOW_TIMING) printf("timing: level %d: Paso_SparseMatrix_getTranspose: %e\n",level,Esys_timer()-time0);  
             /*  
             constrPaso AMG level 2: 4 unknowns are flagged for elimination.  
             timing: Gauss-Seidel preparation: elemination : 8.096400e-05  
             AMG: level 2: 4 unknowns eliminated.  
             uct coarse level matrix (can we do this in one call?)  
             */  
             time0=Esys_timer();  
             Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P);  
             A_C=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp);  
             Paso_SparseMatrix_free(Atemp);  
             /*A_c=Paso_Solver_getCoarseMatrix(A_p,out->R,out->P);*/  
             if (SHOW_TIMING) printf("timing: level %d : getCoarseMatrix: %e\n",level,Esys_timer()-time0);  
               
144              /* allocate helpers :*/              /* allocate helpers :*/
145              out->x_C=MEMALLOC(n_block*n_C,double);              out->x_C=MEMALLOC(n_block*n_C,double);
146              out->b_C=MEMALLOC(n_block*n_C,double);              out->b_C=MEMALLOC(n_block*n_C,double);
147              out->r=MEMALLOC(n_block*n,double);              out->r=MEMALLOC(n_block*n,double);
148                            
149              Esys_checkPtr(out->r);              Esys_checkPtr(out->r);
             Esys_checkPtr(out->Smoother);  
150              Esys_checkPtr(out->x_C);              Esys_checkPtr(out->x_C);
151              Esys_checkPtr(out->b_C);              Esys_checkPtr(out->b_C);
152                
153                if ( Esys_noError() ) {
154                   /* creates index for F:*/
155                   #pragma omp parallel private(i)
156                   {
157                      #pragma omp for schedule(static)
158                      for (i = 0; i < n; ++i) {
159                     if  (split_marker[i]) rows_in_F[counter[i]]=i;
160                      }
161                   }
162                   /*  create mask of C nodes with value >-1 gives new id */
163                   i=Paso_Util_cumsum_maskedFalse(n,counter, split_marker);
164    
165                   #pragma omp parallel for private(i) schedule(static)
166                   for (i = 0; i < n; ++i) {
167                      if  (split_marker[i]) {
168                     mask_C[i]=-1;
169                      } else {
170                     mask_C[i]=counter[i];;
171                      }
172                   }
173                   /*
174                      get Restriction :  
175                   */                  
176                   time0=Esys_timer();
177                   out->P=Paso_Preconditioner_AMG_getDirectProlongation(A_p,degree,S,n_C,mask_C);
178                   if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
179                }
180                /*      
181                   construct Prolongation operator as transposed of restriction operator:
182                */
183                if ( Esys_noError()) {
184                   time0=Esys_timer();
185                   out->R=Paso_SparseMatrix_getTranspose(out->P);
186                   if (SHOW_TIMING) printf("timing: level %d: Paso_SparseMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
187                }      
188                /*
189                construct coarse level matrix:
190                */
191                if ( Esys_noError()) {
192                   time0=Esys_timer();
193                   Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P);
194                   A_C=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp);
195                   Paso_SparseMatrix_free(Atemp);
196                   if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);            
197                }
198    
199                            
200              /*              /*
201                 constructe courser level:                 constructe courser level:
202                                
203              */              */
204              out->AMG_C=Paso_Preconditioner_LocalAMG_alloc(A_C,level+1,options);              if ( Esys_noError()) {
205                               out->AMG_C=Paso_Preconditioner_LocalAMG_alloc(A_C,level+1,options);
206                }
207              if ( Esys_noError()) {              if ( Esys_noError()) {
208                 if ( out->AMG_C == NULL ) {                 if ( out->AMG_C == NULL ) {
209                    out->reordering = options->reordering;                    out->reordering = options->reordering;
210                    out->refinements = options->coarse_matrix_refinements;                    out->refinements = options->coarse_matrix_refinements;
211                    /* no coarse level matrix has been constructed. use direct solver */                    /* no coarse level matrix has been constructed. use direct solver */
212                    #ifdef MKL                    #ifdef MKL
213                      Atemp=Paso_SparseMatrix_unroll(A_C);                      out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
214                      Paso_SparseMatrix_free(A_C);                      Paso_SparseMatrix_free(A_C);
                     out->A_C=Paso_SparseMatrix_alloc(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, Atemp->pattern,1,1, FALSE);  
                     #pragma omp parallel for private(i) schedule(static)  
                     for (i=0;i<out->A->len;++i) {  
                        out->A_C->val[i]=Atemp->val[i];  
                     }  
                     Paso_SparseMatrix_free(Atemp);  
215                      out->A_C->solver_package = PASO_MKL;                      out->A_C->solver_package = PASO_MKL;
216                        if (verbose) printf("Paso_Solver: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C);
217                    #else                    #else
218                      #ifdef UMFPACK                      #ifdef UMFPACK
219                         out->A_C=Paso_SparseMatrix_unroll(A_C);                         out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
220                         Paso_SparseMatrix_free(A_C);                         Paso_SparseMatrix_free(A_C);
221                         out->A_C->solver_package = PASO_UMFPACK;                         out->A_C->solver_package = PASO_UMFPACK;
222                           if (verbose) printf("Paso_Solver: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C);
223                      #else                      #else
224                         out->A_C=A_C;                         out->A_C=A_C;
225                         out->A_C->solver_p=Paso_Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);                         out->A_C->solver_p=Paso_Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
226                         out->A_C->solver_package = PASO_SMOOTHER;                         out->A_C->solver_package = PASO_SMOOTHER;
227                           if (verbose) printf("Paso_Solver: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C);
228                      #endif                      #endif
229                    #endif                    #endif
230                 } else {                 } else {
# Line 239  Paso_Preconditioner_LocalAMG* Paso_Preco Line 241  Paso_Preconditioner_LocalAMG* Paso_Preco
241    }    }
242    TMPMEMFREE(counter);    TMPMEMFREE(counter);
243    TMPMEMFREE(split_marker);    TMPMEMFREE(split_marker);
244      TMPMEMFREE(degree);
245      TMPMEMFREE(S);
246    
247    if (Esys_noError()) {    if (Esys_noError()) {
       if (verbose) printf("AMG: level %d: %d unknowns eliminated.\n",level, n_F);  
248       return out;       return out;
249    } else  {    } else  {
250       Paso_Preconditioner_LocalAMG_free(out);       Paso_Preconditioner_LocalAMG_free(out);
# Line 267  void Paso_Preconditioner_LocalAMG_solve( Line 270  void Paso_Preconditioner_LocalAMG_solve(
270           time0=Esys_timer();           time0=Esys_timer();
271       Paso_Copy(n, amg->r, b);                            /*  r <- b */       Paso_Copy(n, amg->r, b);                            /*  r <- b */
272       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
273           Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C);  /* b_c = R*r  */       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->R,amg->r,0.,amg->b_C);  /* b_c = R*r  */
274           time0=Esys_timer()-time0;           time0=Esys_timer()-time0;
275       /* coarse level solve */       /* coarse level solve */
276       if ( amg->AMG_C == NULL) {       if ( amg->AMG_C == NULL) {
# Line 289  void Paso_Preconditioner_LocalAMG_solve( Line 292  void Paso_Preconditioner_LocalAMG_solve(
292          Paso_Preconditioner_LocalAMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C)     */          Paso_Preconditioner_LocalAMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C)     */
293       }         }  
294       time0=time0+Esys_timer();       time0=time0+Esys_timer();
295       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x);       /* x = x + P*x_c */       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */    
296            
297           /*postsmoothing*/           /*postsmoothing*/
298                
# Line 307  void Paso_Preconditioner_LocalAMG_solve( Line 310  void Paso_Preconditioner_LocalAMG_solve(
310  /* theta = threshold for strong connections */  /* theta = threshold for strong connections */
311  /* tau = threshold for diagonal dominance */  /* tau = threshold for diagonal dominance */
312    
313  void Paso_Preconditioner_AMG_RSCoarsening(Paso_SparseMatrix* A, index_t* split_marker, const double theta, const double tau)  /*S_i={j \in N_i; i strongly coupled to j}
314    
315    in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
316    */
317    
318    void Paso_Preconditioner_AMG_setStrongConnections(Paso_SparseMatrix* A,
319                          dim_t *degree, index_t *S,
320                          const double theta, const double tau)
321  {  {
322     const dim_t n=A->numRows;     const dim_t n=A->numRows;
323     dim_t *degree=NULL;   /* number of naighbours in strong connection graph */     index_t iptr, i,j;
    index_t *S=NULL, iptr, i,j;  
324     dim_t kdeg;     dim_t kdeg;
325     double max_offdiagonal, threshold, sum_row, main_row, fnorm;     double max_offdiagonal, threshold, sum_row, main_row, fnorm;
     
    degree=TMPMEMALLOC(n, dim_t);  
    S=TMPMEMALLOC(A->pattern->len, index_t);  
326    
327     if ( !( Esys_checkPtr(degree) || Esys_checkPtr(S) ) )  {  
       /*S_i={j \in N_i; i strongly coupled to j}  
     
      in the sense that |A_{ij}| >= theta * max_k |A_{ik}|  
       */  
328        #pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static)        #pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static)
329        for (i=0;i<n;++i) {        for (i=0;i<n;++i) {
330                        
# Line 355  void Paso_Preconditioner_AMG_RSCoarsenin Line 357  void Paso_Preconditioner_AMG_RSCoarsenin
357       }       }
358       degree[i]=kdeg;       degree[i]=kdeg;
359        }        }
360          
       Paso_Preconditioner_AMG_RSCoarsening_search(n, A->pattern->ptr, degree, S, split_marker);  
         
    }  
    TMPMEMFREE(degree);  
    TMPMEMFREE(S);  
361  }  }
362    
363  /* theta = threshold for strong connections */  /* theta = threshold for strong connections */
364  /* tau = threshold for diagonal dominance */  /* tau = threshold for diagonal dominance */
365  void Paso_Preconditioner_AMG_RSCoarsening_Block(Paso_SparseMatrix* A, index_t* split_marker, const double theta, const double tau)  /*S_i={j \in N_i; i strongly coupled to j}
366    
367    in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
368    */
369    void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SparseMatrix* A,
370                                dim_t *degree, index_t *S,
371                                const double theta, const double tau)
372    
373  {  {
374     const dim_t n_block=A->row_block_size;     const dim_t n_block=A->row_block_size;
375     const dim_t n=A->numRows;     const dim_t n=A->numRows;
376     dim_t *degree=NULL;   /* number of naighbours in strong connection graph */     index_t iptr, i,j, bi;
    index_t *S=NULL, iptr, i,j, bi;  
377     dim_t kdeg, max_deg;     dim_t kdeg, max_deg;
378     register double max_offdiagonal, threshold, fnorm, sum_row, main_row;     register double max_offdiagonal, threshold, fnorm, sum_row, main_row;
379     double *rtmp;     double *rtmp;
380        
381        
    degree=TMPMEMALLOC(n, dim_t);  
    S=TMPMEMALLOC(A->pattern->len, index_t);  
     
    if ( !( Esys_checkPtr(degree) || Esys_checkPtr(S)  ) ) {  
       /*S_i={j \in N_i; i strongly coupled to j}  
     
       in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F  
       */  
382        #pragma omp parallel private(i,iptr,max_offdiagonal, kdeg, threshold,j, max_deg, fnorm, sum_row, main_row, rtmp)        #pragma omp parallel private(i,iptr,max_offdiagonal, kdeg, threshold,j, max_deg, fnorm, sum_row, main_row, rtmp)
383        {        {
384       max_deg=0;       max_deg=0;
# Line 405  void Paso_Preconditioner_AMG_RSCoarsenin Line 399  void Paso_Preconditioner_AMG_RSCoarsenin
399             #pragma ivdep             #pragma ivdep
400             for(bi=0;bi<n_block*n_block;++bi) fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];             for(bi=0;bi<n_block*n_block;++bi) fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
401             fnorm=sqrt(fnorm);             fnorm=sqrt(fnorm);
402                         rtmp[iptr-A->pattern->ptr[i]]=fnorm;
403             if( j != i) {             if( j != i) {
           rtmp[iptr-A->pattern->ptr[i]]=fnorm;  
404            max_offdiagonal = MAX(max_offdiagonal,fnorm);            max_offdiagonal = MAX(max_offdiagonal,fnorm);
405            sum_row+=fnorm;            sum_row+=fnorm;
406             } else {             } else {
# Line 431  void Paso_Preconditioner_AMG_RSCoarsenin Line 424  void Paso_Preconditioner_AMG_RSCoarsenin
424       }             }      
425       TMPMEMFREE(rtmp);       TMPMEMFREE(rtmp);
426        } /* end of parallel region */        } /* end of parallel region */
427        Paso_Preconditioner_AMG_RSCoarsening_search(n, A->pattern->ptr, degree, S, split_marker);  
    }  
    TMPMEMFREE(degree);  
    TMPMEMFREE(S);    
428  }    }  
429    
430  /* the runge stueben coarsening algorithm: */  /* the runge stueben coarsening algorithm: */
431  void Paso_Preconditioner_AMG_RSCoarsening_search(const dim_t n, const index_t* offset, const dim_t* degree, const index_t* S,  void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset,
432                           index_t*split_marker)                          const dim_t* degree, const index_t* S,
433                            index_t*split_marker, const bool_t usePanel)
434  {  {
435     index_t *lambda=NULL, j, *ST=NULL;    
436     dim_t i,k, p, q, *degreeT=NULL, itmp;     index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;
437       dim_t i,k, p, q, *degreeT=NULL, len_panel, len_panel_new;
438       register index_t j, itmp;
439        
440     if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */     if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
441        
442     lambda=TMPMEMALLOC(n, index_t);     lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);
443     degreeT=TMPMEMALLOC(n, dim_t);     degreeT=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degreeT);
444     ST=TMPMEMALLOC(offset[n], index_t);     ST=TMPMEMALLOC(offset[n], index_t);  Esys_checkPtr(ST);
445       if (usePanel) {
446          notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);
447          panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);
448       }
449      
450      
451        
452     if (! ( Esys_checkPtr(lambda) || Esys_checkPtr(degreeT) || Esys_checkPtr(ST) ) ) {     if (Esys_noError() ) {
453        /* initialize  split_marker and split_marker :*/        /* initialize  split_marker and split_marker :*/
454        /* those unknows which are not influenced go into F, the rest is available for F or C */        /* those unknows which are not influenced go into F, the rest is available for F or C */
455        #pragma omp parallel for private(i) schedule(static)        #pragma omp parallel for private(i) schedule(static)
# Line 488  void Paso_Preconditioner_AMG_RSCoarsenin Line 487  void Paso_Preconditioner_AMG_RSCoarsenin
487          lambda[i]=itmp;          lambda[i]=itmp;
488       }       }
489        }        }
490          if (usePanel) {
491         #pragma omp parallel for private(i) schedule(static)
492         for (i=0;i<n;++i) notInPanel[i]=TRUE;
493          }
494        /* start search :*/        /* start search :*/
495        i=Paso_Util_arg_max(n,lambda);        i=Paso_Util_arg_max(n,lambda);
496        while (lambda[i]>-1) { /* is there any undecided unknowns? */        while (lambda[i]>-1) { /* is there any undecided unknown? */
497        
498       /* the unknown i is moved to C */       if (usePanel) {
499       split_marker[i]=PASO_AMG_IN_C;          len_panel=0;
500       lambda[i]=-1;  /* lambda fro unavailable unknowns is set to -1 */          do {
501                   /* the unknown i is moved to C */
502       /* all undecided unknown strongly coupled to i are moved to F */             split_marker[i]=PASO_AMG_IN_C;
503       for (p=0; p<degreeT[i]; ++p) {             lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */
504          j=ST[offset[i]+p];            
505                   /* all undecided unknown strongly coupled to i are moved to F */
506          if (split_marker[j]==PASO_AMG_UNDECIDED) {             for (p=0; p<degreeT[i]; ++p) {
507                    j=ST[offset[i]+p];
508             split_marker[j]=PASO_AMG_IN_F;            
509             lambda[j]=-1;            if (split_marker[j]==PASO_AMG_UNDECIDED) {
510                      
511             for (q=0; q<degreeT[j]; ++q) {               split_marker[j]=PASO_AMG_IN_F;
512            k=ST[offset[j]+q];               lambda[j]=-1;
513            if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;              
514                 for (q=0; q<degreeT[j]; ++q) {
515                k=ST[offset[j]+q];
516                if (split_marker[k]==PASO_AMG_UNDECIDED) {
517                   lambda[k]++;
518                   if (notInPanel[k]) {
519                      notInPanel[k]=FALSE;
520                      panel[len_panel]=k;
521                      len_panel++;
522                   }
523    
524                }    /* the unknown i is moved to C */
525                split_marker[i]=PASO_AMG_IN_C;
526                lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */
527                 }
528                
529              }
530               }
531               for (p=0; p<degree[i]; ++p) {
532              j=S[offset[i]+p];
533              if (split_marker[j]==PASO_AMG_UNDECIDED) {
534                 lambda[j]--;
535                 if (notInPanel[j]) {
536                notInPanel[j]=FALSE;
537                panel[len_panel]=j;
538                len_panel++;
539                 }
540              }
541             }             }
542    
543               /* consolidate panel */
544               /* remove lambda[q]=-1 */
545               lambda_max=-1;
546               i=-1;
547               len_panel_new=0;
548               for (q=0; q<len_panel; q++) {
549                 k=panel[q];
550                 lambda_k=lambda[k];
551                 if (split_marker[k]==PASO_AMG_UNDECIDED) {
552                panel[len_panel_new]=k;
553                len_panel_new++;
554    
555                if (lambda_max == lambda_k) {
556                   if (k<i) i=k;
557                } else if (lambda_max < lambda_k) {
558                   lambda_max =lambda_k;
559                   i=k;
560                }
561                 }
562               }
563               len_panel=len_panel_new;
564            } while (len_panel>0);    
565         } else {
566            /* the unknown i is moved to C */
567            split_marker[i]=PASO_AMG_IN_C;
568            lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */
569            
570            /* all undecided unknown strongly coupled to i are moved to F */
571            for (p=0; p<degreeT[i]; ++p) {
572               j=ST[offset[i]+p];
573               if (split_marker[j]==PASO_AMG_UNDECIDED) {
574            
575              split_marker[j]=PASO_AMG_IN_F;
576              lambda[j]=-1;
577            
578              for (q=0; q<degreeT[j]; ++q) {
579                 k=ST[offset[j]+q];
580                 if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
581              }
582    
583               }
584          }          }
585            for (p=0; p<degree[i]; ++p) {
586               j=S[offset[i]+p];
587               if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
588            }
589            
590       }       }
      for (p=0; p<degree[i]; ++p) {  
         j=S[offset[i]+p];  
         if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;  
      }  
       
591       i=Paso_Util_arg_max(n,lambda);       i=Paso_Util_arg_max(n,lambda);
592        }        }
593     }     }
594     TMPMEMFREE(lambda);     TMPMEMFREE(lambda);
595     TMPMEMFREE(ST);     TMPMEMFREE(ST);
596     TMPMEMFREE(degreeT);     TMPMEMFREE(degreeT);
597       TMPMEMFREE(panel);
598       TMPMEMFREE(notInPanel);
599  }  }

Legend:
Removed from v.3283  
changed lines
  Added in v.3352

  ViewVC Help
Powered by ViewVC 1.1.26