/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Diff of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3403 by gross, Tue Dec 7 08:13:51 2010 UTC revision 3458 by gross, Mon Jan 31 07:06:42 2011 UTC
# Line 14  Line 14 
14    
15  /**************************************************************/  /**************************************************************/
16    
17  /* Paso: AMG preconditioner                                  */  /* Paso: AMG preconditioner  (local version)                  */
18    
19  /**************************************************************/  /**************************************************************/
20    
# Line 30  Line 30 
30  #include "PasoUtil.h"  #include "PasoUtil.h"
31  #include "UMFPACK.h"  #include "UMFPACK.h"
32  #include "MKL.h"  #include "MKL.h"
33    #include<stdio.h>
34    
35    
36  /**************************************************************/  /**************************************************************/
37    
38  /* free all memory used by AMG                                */  /* free all memory used by AMG                                */
39    
40  void Paso_Preconditioner_LocalAMG_free(Paso_Preconditioner_LocalAMG * in) {  void Paso_Preconditioner_AMG_free(Paso_Preconditioner_AMG * in) {
41       if (in!=NULL) {       if (in!=NULL) {
42      Paso_Preconditioner_LocalSmoother_free(in->Smoother);      Paso_Preconditioner_Smoother_free(in->Smoother);
43      Paso_SparseMatrix_free(in->P);      Paso_SystemMatrix_free(in->P);
44      Paso_SparseMatrix_free(in->R);      Paso_SystemMatrix_free(in->R);
45      Paso_SparseMatrix_free(in->A_C);      Paso_SystemMatrix_free(in->A_C);
46      Paso_Preconditioner_LocalAMG_free(in->AMG_C);      Paso_Preconditioner_AMG_free(in->AMG_C);
47      MEMFREE(in->r);      MEMFREE(in->r);
48      MEMFREE(in->x_C);      MEMFREE(in->x_C);
49      MEMFREE(in->b_C);      MEMFREE(in->b_C);
50    
       
51      MEMFREE(in);      MEMFREE(in);
52       }       }
53  }  }
54    
55  index_t Paso_Preconditioner_LocalAMG_getMaxLevel(const Paso_Preconditioner_LocalAMG * in) {  index_t Paso_Preconditioner_AMG_getMaxLevel(const Paso_Preconditioner_AMG * in) {
56     if (in->AMG_C == NULL) {     if (in->AMG_C == NULL) {
57        return in->level;        return in->level;
58     } else {     } else {
59        return Paso_Preconditioner_LocalAMG_getMaxLevel(in->AMG_C);        return Paso_Preconditioner_AMG_getMaxLevel(in->AMG_C);
60     }     }
61  }  }
62  double Paso_Preconditioner_LocalAMG_getCoarseLevelSparsity(const Paso_Preconditioner_LocalAMG * in) {  double Paso_Preconditioner_AMG_getCoarseLevelSparsity(const Paso_Preconditioner_AMG * in) {
63        if (in->AMG_C == NULL) {        if (in->AMG_C == NULL) {
64       if (in->A_C == NULL) {       if (in->A_C == NULL) {
65          return 1.;          return 1.;
66       } else {       } else {
67          return DBLE(in->A_C->pattern->len)/DBLE(in->A_C->numRows)/DBLE(in->A_C->numRows);          return Paso_SystemMatrix_getSparsity(in->A_C);
68       }       }
69        } else {        } else {
70          return Paso_Preconditioner_LocalAMG_getCoarseLevelSparsity(in->AMG_C);          return Paso_Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
71        }        }
72  }  }
73  dim_t Paso_Preconditioner_LocalAMG_getNumCoarseUnknwons(const Paso_Preconditioner_LocalAMG * in) {  dim_t Paso_Preconditioner_AMG_getNumCoarseUnknwons(const Paso_Preconditioner_AMG * in) {
74     if (in->AMG_C == NULL) {     if (in->AMG_C == NULL) {
75        if (in->A_C == NULL) {        if (in->A_C == NULL) {
76       return 0;       return 0;
77        } else {        } else {
78       return in->A_C->numRows;       return Paso_SystemMatrix_getTotalNumRows(in->A_C);
79        }        }
80     } else {     } else {
81       return Paso_Preconditioner_LocalAMG_getNumCoarseUnknwons(in->AMG_C);       return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);
82     }     }
83  }  }
84  /*****************************************************************  /*****************************************************************
# Line 85  dim_t Paso_Preconditioner_LocalAMG_getNu Line 86  dim_t Paso_Preconditioner_LocalAMG_getNu
86     constructs AMG     constructs AMG
87        
88  ******************************************************************/  ******************************************************************/
89  Paso_Preconditioner_LocalAMG* Paso_Preconditioner_LocalAMG_alloc(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {  Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
90    
91    Paso_Preconditioner_LocalAMG* out=NULL;    Paso_Preconditioner_AMG* out=NULL;
92    bool_t verbose=options->verbose;    bool_t verbose=options->verbose;
93      
94    Paso_SparseMatrix *Atemp=NULL, *A_C=NULL;    const dim_t my_n=Paso_SystemMatrix_getNumRows(A_p);
95    const dim_t n=A_p->numRows;    const dim_t overlap_n=Paso_SystemMatrix_getColOverlap(A_p);
96      const dim_t n = my_n + overlap_n;
97    
98    const dim_t n_block=A_p->row_block_size;    const dim_t n_block=A_p->row_block_size;
99    index_t* split_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL, *S=NULL, *degree=NULL;    index_t* F_marker=NULL, *counter=NULL;
100    dim_t n_F=0, n_C=0, i;    dim_t i;
101    double time0=0;    double time0=0;
102    const double theta = options->coarsening_threshold;    const double theta = options->coarsening_threshold;
103    const double tau = options->diagonal_dominance_threshold;    const double tau = options->diagonal_dominance_threshold;
104        const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
105      const dim_t total_n=Paso_SystemMatrix_getGlobalTotalNumRows(A_p);
106    
107        
108    /*    /*
109        is the input matrix A suitable for coarsening        is the input matrix A suitable for coarsening
110                
111    */    */
112    if ( (A_p->pattern->len >= options->min_coarse_sparsity * n * n ) || (n <= options->min_coarse_matrix_size) || (level > options->level_max) ) {    if ( (sparsity >= options->min_coarse_sparsity) ||
113       if (verbose) printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",         (total_n <= options->min_coarse_matrix_size) ||
114      level,  options->level_max, A_p->pattern->len/(1.*n * n), options->min_coarse_sparsity, n, options->min_coarse_matrix_size  );           (level > options->level_max) ) {
115       return NULL;  
116    }          if (verbose) {
117              /*
118                  print stopping condition:
119                          - 'SPAR' = min_coarse_matrix_sparsity exceeded
120                          - 'SIZE' = min_coarse_matrix_size exceeded
121                          - 'LEVEL' = level_max exceeded
122              */
123              printf("Paso_Preconditioner: AMG: termination of coarsening by ");
124    
125              if (sparsity >= options->min_coarse_sparsity)
126                  printf("SPAR ");
127    
128              if (total_n <= options->min_coarse_matrix_size)
129                  printf("SIZE ");
130    
131              if (level > options->level_max)
132                  printf("LEVEL ");
133    
134              printf("\n");
135    
136            printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
137               level,  options->level_max, sparsity, options->min_coarse_sparsity, total_n, options->min_coarse_matrix_size);  
138    
139           }
140    
141           return NULL;
142      }  else {
143       /* Start Coarsening : */       /* Start Coarsening : */
144         const dim_t len_S=A_p->mainBlock->pattern->len+A_p->col_coupleBlock->pattern->len;
145         dim_t* degree_S=TMPMEMALLOC(my_n, dim_t);
146         index_t *offset_S=TMPMEMALLOC(my_n, index_t);
147         index_t *S=TMPMEMALLOC(len_S, index_t);
148    
149       split_marker=TMPMEMALLOC(n,index_t);       F_marker=TMPMEMALLOC(n,index_t);
150       counter=TMPMEMALLOC(n,index_t);       counter=TMPMEMALLOC(n,index_t);
151       degree=TMPMEMALLOC(n, dim_t);  
152       S=TMPMEMALLOC(A_p->pattern->len, index_t);       if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(offset_S) || Esys_checkPtr(S) ) ) {
153       if ( !( Esys_checkPtr(split_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree) || Esys_checkPtr(S) ) ) {      /*
154       /*             make sure that corresponding values in the row_coupleBlock and col_coupleBlock are identical
155        */
156        Paso_SystemMatrix_copyColCoupleBlock(A_p);
157    
158        /*
159            set splitting of unknows:            set splitting of unknows:
160                  
161           */           */
162       time0=Esys_timer();       time0=Esys_timer();
163       if (n_block>1) {       if (n_block>1) {
164             Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree, S, theta,tau);             Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree_S, offset_S, S, theta,tau);
165       } else {       } else {
166             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree, S, theta,tau);             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);
167       }       }
168       Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree, S, split_marker, options->usePanel);      Esys_setError(SYSTEM_ERROR, "AMG:DONE.");
169       options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);      return NULL;
170    {
171       dim_t p;
172       for (i=0; i< my_n; ++i) {
173             printf("%d: ",i);
174            for (p=0; p<degree_S[i];++p) printf("%d ",S[offset_S[i]+p]);
175            printf("\n");
176       }
177    }
178          
179    
180    /*MPI:
181         Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree_S, S, F_marker, options->usePanel);
182    */
183            
184             /* in BoomerAMG interpolation is used FF connectiovity is required :*/
185    /*MPI:
186             if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
187                                 Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);  
188    */
189    
190         options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
191    
192    #ifdef AAAAA
193       if (Esys_noError() ) {       if (Esys_noError() ) {
194          #pragma omp parallel for private(i) schedule(static)          #pragma omp parallel for private(i) schedule(static)
195          for (i = 0; i < n; ++i) split_marker[i]= (split_marker[i] == PASO_AMG_IN_F);          for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] ==  PASO_AMG_IN_F);
196            
197          /*          /*
198             count number of unkowns to be eliminated:             count number of unkowns to be eliminated:
199          */          */
200          n_F=Paso_Util_cumsum_maskedTrue(n,counter, split_marker);          n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
201          n_C=n-n_F;          n_C=n-n_F;
202          if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);          if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
203            
204          if ( n_F == 0 ) {  /*  is a nasty case. a direct solver should be used, return NULL */          if ( n_F == 0 ) {  /*  is a nasty case. a direct solver should be used, return NULL */
205             out = NULL;             out = NULL;
206          } else {          } else {
207             out=MEMALLOC(1,Paso_Preconditioner_LocalAMG);             out=MEMALLOC(1,Paso_Preconditioner_AMG);
208             if (! Esys_checkPtr(out)) {             if (! Esys_checkPtr(out)) {
209            out->level = level;            out->level = level;
210            out->n = n;            out->n = n;
# Line 166  Paso_Preconditioner_LocalAMG* Paso_Preco Line 227  Paso_Preconditioner_LocalAMG* Paso_Preco
227             Esys_checkPtr(rows_in_F);             Esys_checkPtr(rows_in_F);
228             if ( Esys_noError() ) {             if ( Esys_noError() ) {
229    
230            out->Smoother = Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);            out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
231                
232            if (n_C != 0) {            if (n_C != 0) {
233                 /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */                 /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
# Line 186  Paso_Preconditioner_LocalAMG* Paso_Preco Line 247  Paso_Preconditioner_LocalAMG* Paso_Preco
247                 {                 {
248                    #pragma omp for schedule(static)                    #pragma omp for schedule(static)
249                    for (i = 0; i < n; ++i) {                    for (i = 0; i < n; ++i) {
250                   if  (split_marker[i]) rows_in_F[counter[i]]=i;                   if  (F_marker[i]) rows_in_F[counter[i]]=i;
251                    }                    }
252                 }                 }
253                 /*  create mask of C nodes with value >-1 gives new id */                 /*  create mask of C nodes with value >-1 gives new id */
254                 i=Paso_Util_cumsum_maskedFalse(n,counter, split_marker);                 i=Paso_Util_cumsum_maskedFalse(n,counter, F_marker);
255    
256                 #pragma omp parallel for private(i) schedule(static)                 #pragma omp parallel for private(i) schedule(static)
257                 for (i = 0; i < n; ++i) {                 for (i = 0; i < n; ++i) {
258                    if  (split_marker[i]) {                    if  (F_marker[i]) {
259                   mask_C[i]=-1;                   mask_C[i]=-1;
260                    } else {                    } else {
261                   mask_C[i]=counter[i];;                   mask_C[i]=counter[i];;
262                    }                    }
263                 }                 }
264                 /*                 /*
265                    get Restriction :                      get Prolongation :    
266                 */                                   */                  
267                 time0=Esys_timer();                 time0=Esys_timer();
268                 out->P=Paso_Preconditioner_AMG_getDirectProlongation(A_p,degree,S,n_C,mask_C);  /*MPI:
269                   out->P=Paso_Preconditioner_AMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);
270    */
271                 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);                 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
272              }              }
273              /*                    /*      
274                 construct Prolongation operator as transposed of restriction operator:                 construct Restriction operator as transposed of Prolongation operator:
275              */              */
276              if ( Esys_noError()) {              if ( Esys_noError()) {
277                 time0=Esys_timer();                 time0=Esys_timer();
278                 out->R=Paso_SparseMatrix_getTranspose(out->P);  /*MPI:
279                 if (SHOW_TIMING) printf("timing: level %d: Paso_SparseMatrix_getTranspose: %e\n",level,Esys_timer()-time0);                 out->R=Paso_SystemMatrix_getTranspose(out->P);
280    */
281                   if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
282              }                    }      
283              /*              /*
284              construct coarse level matrix:              construct coarse level matrix:
285              */              */
286              if ( Esys_noError()) {              if ( Esys_noError()) {
287                 time0=Esys_timer();                 time0=Esys_timer();
288                 Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P);  /*MPI:
289                 A_C=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp);                 Atemp=Paso_SystemMatrix_MatrixMatrix(A_p,out->P);
290                 Paso_SparseMatrix_free(Atemp);                 A_C=Paso_SystemMatrix_MatrixMatrix(out->R,Atemp);
291                  
292                   Paso_SystemMatrix_free(Atemp);
293    */
294    
295                 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);                             if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);            
296              }              }
297    
# Line 232  Paso_Preconditioner_LocalAMG* Paso_Preco Line 301  Paso_Preconditioner_LocalAMG* Paso_Preco
301                                
302              */              */
303              if ( Esys_noError()) {              if ( Esys_noError()) {
304                 out->AMG_C=Paso_Preconditioner_LocalAMG_alloc(A_C,level+1,options);                 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
305              }              }
306              if ( Esys_noError()) {              if ( Esys_noError()) {
307                 if ( out->AMG_C == NULL ) {                 if ( out->AMG_C == NULL ) {
# Line 240  Paso_Preconditioner_LocalAMG* Paso_Preco Line 309  Paso_Preconditioner_LocalAMG* Paso_Preco
309                    out->refinements = options->coarse_matrix_refinements;                    out->refinements = options->coarse_matrix_refinements;
310                    /* no coarse level matrix has been constructed. use direct solver */                    /* no coarse level matrix has been constructed. use direct solver */
311                    #ifdef MKL                    #ifdef MKL
312                      out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);                      out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
313                      Paso_SparseMatrix_free(A_C);                      Paso_SystemMatrix_free(A_C);
314                      out->A_C->solver_package = PASO_MKL;                      out->A_C->solver_package = PASO_MKL;
315                      if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);                      if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
316                    #else                    #else
317                      #ifdef UMFPACK                      #ifdef UMFPACK
318                         out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);                         out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
319                         Paso_SparseMatrix_free(A_C);                         Paso_SystemMatrix_free(A_C);
320                         out->A_C->solver_package = PASO_UMFPACK;                         out->A_C->solver_package = PASO_UMFPACK;
321                         if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);                         if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
322                      #else                      #else
323                         out->A_C=A_C;                         out->A_C=A_C;
324                         out->A_C->solver_p=Paso_Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);                         out->A_C->solver_p=Paso_Preconditioner_Smoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
325                         out->A_C->solver_package = PASO_SMOOTHER;                         out->A_C->solver_package = PASO_SMOOTHER;
326                         if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);                         if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
327                      #endif                      #endif
# Line 268  Paso_Preconditioner_LocalAMG* Paso_Preco Line 337  Paso_Preconditioner_LocalAMG* Paso_Preco
337             TMPMEMFREE(rows_in_F);             TMPMEMFREE(rows_in_F);
338          }          }
339       }       }
340    #endif
341    
342    }    }
343    TMPMEMFREE(counter);    TMPMEMFREE(counter);
344    TMPMEMFREE(split_marker);    TMPMEMFREE(F_marker);
345    TMPMEMFREE(degree);    TMPMEMFREE(degree_S);
346      TMPMEMFREE(offset_S);
347    TMPMEMFREE(S);    TMPMEMFREE(S);
348      }
349    
350    if (Esys_noError()) {    if (Esys_noError()) {
351       return out;       return out;
352    } else  {    } else  {
353       Paso_Preconditioner_LocalAMG_free(out);       Paso_Preconditioner_AMG_free(out);
354       return NULL;       return NULL;
355    }    }
356  }  }
357    
358    
359  void Paso_Preconditioner_LocalAMG_solve(Paso_SparseMatrix* A, Paso_Preconditioner_LocalAMG * amg, double * x, double * b) {  void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
360       const dim_t n = amg->n * amg->n_block;       const dim_t n = amg->n * amg->n_block;
361       double time0=0;       double time0=0;
362       const dim_t post_sweeps=amg->post_sweeps;       const dim_t post_sweeps=amg->post_sweeps;
# Line 291  void Paso_Preconditioner_LocalAMG_solve( Line 364  void Paso_Preconditioner_LocalAMG_solve(
364    
365       /* presmoothing */       /* presmoothing */
366       time0=Esys_timer();       time0=Esys_timer();
367       Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);       Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
368       time0=Esys_timer()-time0;       time0=Esys_timer()-time0;
369       if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);       if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
370       /* end of presmoothing */       /* end of presmoothing */
# Line 299  void Paso_Preconditioner_LocalAMG_solve( Line 372  void Paso_Preconditioner_LocalAMG_solve(
372       if (amg->n_F < amg->n) { /* is there work on the coarse level? */       if (amg->n_F < amg->n) { /* is there work on the coarse level? */
373           time0=Esys_timer();           time0=Esys_timer();
374       Paso_Copy(n, amg->r, b);                            /*  r <- b */       Paso_Copy(n, amg->r, b);                            /*  r <- b */
375       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
376       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->R,amg->r,0.,amg->b_C);  /* b_c = R*r  */       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C);  /* b_c = R*r  */
377           time0=Esys_timer()-time0;           time0=Esys_timer()-time0;
378       /* coarse level solve */       /* coarse level solve */
379       if ( amg->AMG_C == NULL) {       if ( amg->AMG_C == NULL) {
380          time0=Esys_timer();          time0=Esys_timer();
381          /*  A_C is the coarsest level */          /*  A_C is the coarsest level */
382    #ifdef FIXME
383          switch (amg->A_C->solver_package) {          switch (amg->A_C->solver_package) {
384             case (PASO_MKL):             case (PASO_MKL):
385            Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);            Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
# Line 314  void Paso_Preconditioner_LocalAMG_solve( Line 388  void Paso_Preconditioner_LocalAMG_solve(
388            Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);            Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
389            break;            break;
390             case (PASO_SMOOTHER):             case (PASO_SMOOTHER):
391            Paso_Preconditioner_LocalSmoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);            Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
392            break;            break;
393          }          }
394    #endif
395            Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
396          if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);          if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
397       } else {       } else {
398          Paso_Preconditioner_LocalAMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C)     */          Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C)     */
399       }         }  
400       time0=time0+Esys_timer();       time0=time0+Esys_timer();
401       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */           Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */    
402            
403           /*postsmoothing*/           /*postsmoothing*/
404                
405          /*solve Ax=b with initial guess x */          /*solve Ax=b with initial guess x */
406          time0=Esys_timer();          time0=Esys_timer();
407          Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);          Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
408          time0=Esys_timer()-time0;          time0=Esys_timer()-time0;
409          if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);          if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
410          /*end of postsmoothing*/          /*end of postsmoothing*/
# Line 345  void Paso_Preconditioner_LocalAMG_solve( Line 421  void Paso_Preconditioner_LocalAMG_solve(
421  in the sense that |A_{ij}| >= theta * max_k |A_{ik}|  in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
422  */  */
423    
424  void Paso_Preconditioner_AMG_setStrongConnections(Paso_SparseMatrix* A,  void Paso_Preconditioner_AMG_setStrongConnections(Paso_SystemMatrix* A,
425                        dim_t *degree, index_t *S,                            dim_t *degree_S, index_t *offset_S, index_t *S,
426                        const double theta, const double tau)                        const double theta, const double tau)
427  {  {
428     const dim_t n=A->numRows;     const dim_t my_n=Paso_SystemMatrix_getNumRows(A);
429     index_t iptr, i,j;     index_t iptr, i;
    dim_t kdeg;  
    double max_offdiagonal, threshold, sum_row, main_row, fnorm;  
430    
431    
432        #pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static)        #pragma omp parallel for private(i,iptr) schedule(static)
433        for (i=0;i<n;++i) {                for (i=0;i<my_n;++i) {        
434       max_offdiagonal = 0.;       register double max_offdiagonal = 0.;
435       sum_row=0;       register double sum_row=0;
436       main_row=0;       register double main_row=0;
437         register dim_t kdeg=0;
438             register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
439       #pragma ivdep       #pragma ivdep
440       for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {       for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
441          j=A->pattern->index[iptr];          register index_t j=A->mainBlock->pattern->index[iptr];
442          fnorm=ABS(A->val[iptr]);          register double fnorm=ABS(A->mainBlock->val[iptr]);
443                    
444          if( j != i) {          if( j != i) {
445             max_offdiagonal = MAX(max_offdiagonal,fnorm);             max_offdiagonal = MAX(max_offdiagonal,fnorm);
# Line 371  void Paso_Preconditioner_AMG_setStrongCo Line 447  void Paso_Preconditioner_AMG_setStrongCo
447          } else {          } else {
448             main_row=fnorm;             main_row=fnorm;
449          }          }
450    
451       }       }
452       threshold = theta*max_offdiagonal;       #pragma ivdep
453       kdeg=0;       for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
454                register double fnorm=ABS(A->col_coupleBlock->val[iptr]);
455       if (tau*main_row < sum_row) { /* no diagonal domainance */          max_offdiagonal = MAX(max_offdiagonal,fnorm);
456          #pragma ivdep          sum_row+=fnorm;
         for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {  
            j=A->pattern->index[iptr];  
            if(ABS(A->val[iptr])>threshold && i!=j) {  
           S[A->pattern->ptr[i]+kdeg] = j;  
           kdeg++;  
            }  
         }  
457       }       }
      degree[i]=kdeg;  
       }  
458    
459             {
460            const double threshold = theta*max_offdiagonal;
461            if (tau*main_row < sum_row) { /* no diagonal domainance */
462               #pragma ivdep
463               for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
464                  register index_t j=A->mainBlock->pattern->index[iptr];
465                  if(ABS(A->mainBlock->val[iptr])>threshold && i!=j) {
466                 S[koffset+kdeg] = j;
467                 kdeg++;
468                  }
469               }
470               #pragma ivdep
471               for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
472                  register index_t j=A->col_coupleBlock->pattern->index[iptr];
473                  if(ABS(A->col_coupleBlock->val[iptr])>threshold) {
474                 S[koffset+kdeg] = j + my_n;
475                 kdeg++;
476                  }
477               }
478                }
479             }
480             offset_S[i]=koffset;
481         degree_S[i]=kdeg;
482          }
483  }  }
484    
485  /* theta = threshold for strong connections */  /* theta = threshold for strong connections */
# Line 396  void Paso_Preconditioner_AMG_setStrongCo Line 488  void Paso_Preconditioner_AMG_setStrongCo
488    
489  in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F  in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
490  */  */
491  void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SparseMatrix* A,  void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SystemMatrix* A,
492                              dim_t *degree, index_t *S,                              dim_t *degree_S, index_t *offset_S, index_t *S,
493                              const double theta, const double tau)                              const double theta, const double tau)
494    
495  {  {
496       const dim_t my_n=Paso_SystemMatrix_getNumRows(A);
497       index_t iptr, i, bi;
498     const dim_t n_block=A->row_block_size;     const dim_t n_block=A->row_block_size;
    const dim_t n=A->numRows;  
    index_t iptr, i,j, bi;  
    dim_t kdeg, max_deg;  
    register double max_offdiagonal, threshold, fnorm, sum_row, main_row;  
    double *rtmp;  
499        
500        
501        #pragma omp parallel private(i,iptr,max_offdiagonal, kdeg, threshold,j, max_deg, fnorm, sum_row, main_row, rtmp)        #pragma omp parallel private(i,iptr, bi)
502        {        {
503       max_deg=0;       dim_t max_deg=0;
504         double *rtmp=TMPMEMALLOC(max_deg, double);
505    
506       #pragma omp for schedule(static)       #pragma omp for schedule(static)
507       for (i=0;i<n;++i) max_deg=MAX(max_deg, A->pattern->ptr[i+1]-A->pattern->ptr[i]);       for (i=0;i<my_n;++i) max_deg=MAX(max_deg, A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]
508                                                    +A->col_coupleBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i]);
509                
      rtmp=TMPMEMALLOC(max_deg, double);  
510                
511       #pragma omp for schedule(static)       #pragma omp for schedule(static)
512       for (i=0;i<n;++i) {       for (i=0;i<my_n;++i) {
513            
514          max_offdiagonal = 0.;          register double max_offdiagonal = 0.;
515          sum_row=0;          register double sum_row=0;
516          main_row=0;          register double main_row=0;
517          for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {              register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
518             j=A->pattern->index[iptr];          register dim_t kdeg=0;
519             fnorm=0;              register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
520    
521            for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
522               register index_t j=A->mainBlock->pattern->index[iptr];
523               register double fnorm=0;
524             #pragma ivdep             #pragma ivdep
525             for(bi=0;bi<n_block*n_block;++bi) fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];             for(bi=0;bi<n_block*n_block;++bi) {
526                         register double rtmp2 = A->mainBlock->val[iptr*n_block*n_block+bi];
527                         fnorm+=rtmp2*rtmp2;
528                   }
529             fnorm=sqrt(fnorm);             fnorm=sqrt(fnorm);
530             rtmp[iptr-A->pattern->ptr[i]]=fnorm;  
531               rtmp[iptr+rtmp_offset]=fnorm;
532             if( j != i) {             if( j != i) {
533            max_offdiagonal = MAX(max_offdiagonal,fnorm);            max_offdiagonal = MAX(max_offdiagonal,fnorm);
534            sum_row+=fnorm;            sum_row+=fnorm;
# Line 437  void Paso_Preconditioner_AMG_setStrongCo Line 536  void Paso_Preconditioner_AMG_setStrongCo
536            main_row=fnorm;            main_row=fnorm;
537             }             }
538          }          }
539          threshold = theta*max_offdiagonal;              rtmp_offset=A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]-A->col_coupleBlock->pattern->ptr[i];
540                  for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
541          kdeg=0;             register double fnorm=0;
         if (tau*main_row < sum_row) { /* no diagonal domainance */  
542             #pragma ivdep             #pragma ivdep
543             for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {             for(bi=0;bi<n_block*n_block;++bi) {
544            j=A->pattern->index[iptr];                       register double rtmp2 = A->col_coupleBlock->val[iptr*n_block*n_block+bi];
545            if(rtmp[iptr-A->pattern->ptr[i]] > threshold && i!=j) {                       fnorm+=rtmp2*rtmp2;
546               S[A->pattern->ptr[i]+kdeg] = j;                 }
547               kdeg++;             fnorm=sqrt(fnorm);
548            }  
549             }             rtmp[iptr+rtmp_offset]=fnorm;
550               max_offdiagonal = MAX(max_offdiagonal,fnorm);
551               sum_row+=fnorm;
552          }          }
553          degree[i]=kdeg;              {
554                const double threshold = theta*max_offdiagonal;
555    
556                if (tau*main_row < sum_row) { /* no diagonal domainance */
557                       rtmp_offset=-A->mainBlock->pattern->ptr[i];
558                   #pragma ivdep
559                   for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
560                  register index_t j=A->mainBlock->pattern->index[iptr];
561                  if(rtmp[iptr+rtmp_offset] > threshold && i!=j) {
562                     S[koffset+kdeg] = j;
563                     kdeg++;
564                  }
565                   }
566                       rtmp_offset=A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]-A->col_coupleBlock->pattern->ptr[i];
567                   #pragma ivdep
568                   for (iptr=A->col_coupleBlock->pattern->ptr[i]; iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
569                  register index_t j=A->col_coupleBlock->pattern->index[iptr];
570                  if(rtmp[iptr+rtmp_offset] > threshold) {
571                     S[koffset+kdeg] = j + my_n;
572                     kdeg++;
573                  }
574                   }
575                }
576                }
577            degree_S[i]=kdeg;
578                offset_S[i]=koffset;
579       }             }      
580       TMPMEMFREE(rtmp);       TMPMEMFREE(rtmp);
581        } /* end of parallel region */        } /* end of parallel region */
582    
583  }    }  
584    
585    #ifdef AAAAA
586  /* the runge stueben coarsening algorithm: */  /* the runge stueben coarsening algorithm: */
587  void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset,  void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset_S,
588                          const dim_t* degree, const index_t* S,                          const dim_t* degree_S, const index_t* S,
589                          index_t*split_marker, const bool_t usePanel)                          index_t*split_marker, const bool_t usePanel)
590  {  {
591        
592     index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;     index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;
593     dim_t i,k, p, q, *degreeT=NULL, len_panel, len_panel_new;     dim_t i,k, p, q, *degree_ST=NULL, len_panel, len_panel_new;
594     register index_t j, itmp;     register index_t j, itmp;
595        
596     if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */     if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
597        
598     lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);     lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);
599     degreeT=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degreeT);     degree_ST=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degree_ST);
600     ST=TMPMEMALLOC(offset[n], index_t);  Esys_checkPtr(ST);     ST=TMPMEMALLOC(offset_S[n], index_t);  Esys_checkPtr(ST);
601     if (usePanel) {     if (usePanel) {
602        notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);        notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);
603        panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);        panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);
# Line 484  void Paso_Preconditioner_AMG_RungeStuebe Line 610  void Paso_Preconditioner_AMG_RungeStuebe
610        /* those unknows which are not influenced go into F, the rest is available for F or C */        /* those unknows which are not influenced go into F, the rest is available for F or C */
611        #pragma omp parallel for private(i) schedule(static)        #pragma omp parallel for private(i) schedule(static)
612        for (i=0;i<n;++i) {        for (i=0;i<n;++i) {
613       degreeT[i]=0;       degree_ST[i]=0;
614       if (degree[i]>0) {       if (degree_S[i]>0) {
615          lambda[i]=0;          lambda[i]=0;
616          split_marker[i]=PASO_AMG_UNDECIDED;          split_marker[i]=PASO_AMG_UNDECIDED;
617       } else {       } else {
# Line 495  void Paso_Preconditioner_AMG_RungeStuebe Line 621  void Paso_Preconditioner_AMG_RungeStuebe
621        }        }
622        /* create transpose :*/        /* create transpose :*/
623        for (i=0;i<n;++i) {        for (i=0;i<n;++i) {
624          for (p=0; p<degree[i]; ++p) {          for (p=0; p<degree_S[i]; ++p) {
625             j=S[offset[i]+p];             j=S[offset_S[i]+p];
626             ST[offset[j]+degreeT[j]]=i;             ST[offset_S[j]+degree_ST[j]]=i;
627             degreeT[j]++;             degree_ST[j]++;
628          }          }
629        }        }
630        /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */        /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
# Line 506  void Paso_Preconditioner_AMG_RungeStuebe Line 632  void Paso_Preconditioner_AMG_RungeStuebe
632        for (i=0;i<n;++i) {        for (i=0;i<n;++i) {
633       if (split_marker[i]==PASO_AMG_UNDECIDED) {       if (split_marker[i]==PASO_AMG_UNDECIDED) {
634          itmp=lambda[i];          itmp=lambda[i];
635          for (p=0; p<degreeT[i]; ++p) {          for (p=0; p<degree_ST[i]; ++p) {
636             j=ST[offset[i]+p];             j=ST[offset_S[i]+p];
637             if (split_marker[j]==PASO_AMG_UNDECIDED) {             if (split_marker[j]==PASO_AMG_UNDECIDED) {
638            itmp++;            itmp++;
639             } else {  /* at this point there are no C points */             } else {  /* at this point there are no C points */
# Line 533  void Paso_Preconditioner_AMG_RungeStuebe Line 659  void Paso_Preconditioner_AMG_RungeStuebe
659             lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */             lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */
660                        
661             /* all undecided unknown strongly coupled to i are moved to F */             /* all undecided unknown strongly coupled to i are moved to F */
662             for (p=0; p<degreeT[i]; ++p) {             for (p=0; p<degree_ST[i]; ++p) {
663            j=ST[offset[i]+p];            j=ST[offset_S[i]+p];
664                        
665            if (split_marker[j]==PASO_AMG_UNDECIDED) {            if (split_marker[j]==PASO_AMG_UNDECIDED) {
666                            
667               split_marker[j]=PASO_AMG_IN_F;               split_marker[j]=PASO_AMG_IN_F;
668               lambda[j]=-1;               lambda[j]=-1;
669                            
670               for (q=0; q<degreeT[j]; ++q) {               for (q=0; q<degree_ST[j]; ++q) {
671              k=ST[offset[j]+q];              k=ST[offset_S[j]+q];
672              if (split_marker[k]==PASO_AMG_UNDECIDED) {              if (split_marker[k]==PASO_AMG_UNDECIDED) {
673                 lambda[k]++;                 lambda[k]++;
674                 if (notInPanel[k]) {                 if (notInPanel[k]) {
# Line 558  void Paso_Preconditioner_AMG_RungeStuebe Line 684  void Paso_Preconditioner_AMG_RungeStuebe
684                            
685            }            }
686             }             }
687             for (p=0; p<degree[i]; ++p) {             for (p=0; p<degree_S[i]; ++p) {
688            j=S[offset[i]+p];            j=S[offset_S[i]+p];
689            if (split_marker[j]==PASO_AMG_UNDECIDED) {            if (split_marker[j]==PASO_AMG_UNDECIDED) {
690               lambda[j]--;               lambda[j]--;
691               if (notInPanel[j]) {               if (notInPanel[j]) {
# Line 598  void Paso_Preconditioner_AMG_RungeStuebe Line 724  void Paso_Preconditioner_AMG_RungeStuebe
724          lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */          lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */
725                    
726          /* all undecided unknown strongly coupled to i are moved to F */          /* all undecided unknown strongly coupled to i are moved to F */
727          for (p=0; p<degreeT[i]; ++p) {          for (p=0; p<degree_ST[i]; ++p) {
728             j=ST[offset[i]+p];             j=ST[offset_S[i]+p];
729             if (split_marker[j]==PASO_AMG_UNDECIDED) {             if (split_marker[j]==PASO_AMG_UNDECIDED) {
730                    
731            split_marker[j]=PASO_AMG_IN_F;            split_marker[j]=PASO_AMG_IN_F;
732            lambda[j]=-1;            lambda[j]=-1;
733                    
734            for (q=0; q<degreeT[j]; ++q) {            for (q=0; q<degree_ST[j]; ++q) {
735               k=ST[offset[j]+q];               k=ST[offset_S[j]+q];
736               if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;               if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
737            }            }
738    
739             }             }
740          }          }
741          for (p=0; p<degree[i]; ++p) {          for (p=0; p<degree_S[i]; ++p) {
742             j=S[offset[i]+p];             j=S[offset_S[i]+p];
743             if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;             if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
744          }          }
745                    
746       }       }
747       i=Paso_Util_arg_max(n,lambda);       i=Paso_Util_arg_max(n,lambda);
748        }        }
749              
750     }     }
751     TMPMEMFREE(lambda);     TMPMEMFREE(lambda);
752     TMPMEMFREE(ST);     TMPMEMFREE(ST);
753     TMPMEMFREE(degreeT);     TMPMEMFREE(degree_ST);
754     TMPMEMFREE(panel);     TMPMEMFREE(panel);
755     TMPMEMFREE(notInPanel);     TMPMEMFREE(notInPanel);
756  }  }
757    /* ensures that two F nodes are connected via a C node :*/
758    void Paso_Preconditioner_AMG_enforceFFConnectivity(const dim_t n, const index_t* offset_S,
759                            const dim_t* degree_S, const index_t* S,
760                            index_t*split_marker)
761    {
762          dim_t i, p, q;
763    
764          /* now we make sure that two (strongly) connected F nodes are (strongly) connected via a C node. */
765          for (i=0;i<n;++i) {
766                if ( (split_marker[i]==PASO_AMG_IN_F) && (degree_S[i]>0) ) {
767               for (p=0; p<degree_S[i]; ++p) {
768                      register index_t j=S[offset_S[i]+p];
769                      if ( (split_marker[j]==PASO_AMG_IN_F)  && (degree_S[j]>0) )  {
770                          /* i and j are now two F nodes which are strongly connected */
771                          /* is there a C node they share ? */
772                          register index_t sharing=-1;
773                      for (q=0; q<degree_S[i]; ++q) {
774                             index_t k=S[offset_S[i]+q];
775                             if (split_marker[k]==PASO_AMG_IN_C) {
776                                register index_t* where_k=(index_t*)bsearch(&k, &(S[offset_S[j]]), degree_S[j], sizeof(index_t), Paso_comparIndex);
777                                if (where_k != NULL) {
778                                   sharing=k;
779                                   break;
780                                }
781                             }
782                          }
783                          if (sharing<0) {
784                               if (i<j) {
785                                  split_marker[j]=PASO_AMG_IN_C;
786                               } else {
787                                  split_marker[i]=PASO_AMG_IN_C;
788                                  break;  /* no point to look any further as i is now a C node */
789                               }
790                          }
791                      }
792                   }
793                }
794           }
795    }
796    #endif
797    
798    #ifdef DFG
799    void Paso_Preconditioner_AMG_CIJPCoarsening( )
800    {
801    
802      
803      const dim_t my_n;
804      const dim_t overlap_n;
805      const dim_t n= my_n + overlap_n;
806    
807    
808          /* initialize  split_marker and split_marker :*/
809          /* those unknows which are not influenced go into F, the rest is available for F or C */
810          #pragma omp parallel for private(i) schedule(static)
811          for (i=0;i<n;++i) {
812         degree_ST[i]=0;
813         if (degree_S[i]>0) {
814            lambda[i]=0;
815            split_marker[i]=PASO_AMG_UNDECIDED;
816         } else {
817            split_marker[i]=PASO_AMG_IN_F;
818            lambda[i]=-1;
819         }
820          }
821    
822       /* set local lambda + overlap */
823       #pragma omp parallel for private(i)
824       for (i=0; i<n ++i) {
825           w[i]=degree_ST[i];
826       }
827       for (i=0; i<my_n; i++) {
828          w2[i]=random;
829       }
830    
831    
832       /* add noise to w */
833       Paso_Coupler_add(n, w, 1., w2, col_coupler);
834    
835       /*  */
836       global_n_C=0;
837       global_n_F=..;
838      
839       while (global_n_C + global_n_F < global_n) {
840    
841          
842          is_in_D[i]=FALSE;
843          /*  test  local connectivit*/
844          /* w2[i] = max(w[k] | k in S_i or k in S^T_i */
845          #pragma omp parallel for private(i)
846          for (i=0; i<n; ++i) w2[i]=0;
847                
848          for (i=0; i<my_n; ++i) {
849             for( iPtr =0 ; iPtr < degree_S[i]; ++iPtr) {
850                 k=S[offset_S[i]+iPtr];
851                 w2[i]=MAX(w2[i],w[k]);
852                 w2[k]=MAX(w2[k],w[i]);
853             }
854          }
855          /* adjust overlaps by MAX */
856          Paso_Coupler_max(n, w2, col_coupler);
857    
858          /* points with w[i]>w2[i] become C nodes */
859       }
860      
861    }
862    #endif

Legend:
Removed from v.3403  
changed lines
  Added in v.3458

  ViewVC Help
Powered by ViewVC 1.1.26