/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Diff of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3352 by gross, Tue Nov 16 03:58:09 2010 UTC revision 3450 by gross, Fri Jan 21 06:39:56 2011 UTC
# Line 14  Line 14 
14    
15  /**************************************************************/  /**************************************************************/
16    
17  /* Paso: AMG preconditioner                                  */  /* Paso: AMG preconditioner  (local version)                  */
18    
19  /**************************************************************/  /**************************************************************/
20    
# Line 35  Line 35 
35    
36  /* free all memory used by AMG                                */  /* free all memory used by AMG                                */
37    
38  void Paso_Preconditioner_LocalAMG_free(Paso_Preconditioner_LocalAMG * in) {  void Paso_Preconditioner_AMG_free(Paso_Preconditioner_AMG * in) {
39       if (in!=NULL) {       if (in!=NULL) {
40      Paso_Preconditioner_LocalSmoother_free(in->Smoother);      Paso_Preconditioner_Smoother_free(in->Smoother);
41      Paso_SparseMatrix_free(in->P);      Paso_SystemMatrix_free(in->P);
42      Paso_SparseMatrix_free(in->R);      Paso_SystemMatrix_free(in->R);
43      Paso_SparseMatrix_free(in->A_C);      Paso_SystemMatrix_free(in->A_C);
44      Paso_Preconditioner_LocalAMG_free(in->AMG_C);      Paso_Preconditioner_AMG_free(in->AMG_C);
45      MEMFREE(in->r);      MEMFREE(in->r);
46      MEMFREE(in->x_C);      MEMFREE(in->x_C);
47      MEMFREE(in->b_C);      MEMFREE(in->b_C);
48    
       
49      MEMFREE(in);      MEMFREE(in);
50       }       }
51  }  }
52    
53    index_t Paso_Preconditioner_AMG_getMaxLevel(const Paso_Preconditioner_AMG * in) {
54       if (in->AMG_C == NULL) {
55          return in->level;
56       } else {
57          return Paso_Preconditioner_AMG_getMaxLevel(in->AMG_C);
58       }
59    }
60    double Paso_Preconditioner_AMG_getCoarseLevelSparsity(const Paso_Preconditioner_AMG * in) {
61          if (in->AMG_C == NULL) {
62         if (in->A_C == NULL) {
63            return 1.;
64         } else {
65            return Paso_SystemMatrix_getSparsity(in->A_C);
66         }
67          } else {
68            return Paso_Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
69          }
70    }
71    dim_t Paso_Preconditioner_AMG_getNumCoarseUnknwons(const Paso_Preconditioner_AMG * in) {
72       if (in->AMG_C == NULL) {
73          if (in->A_C == NULL) {
74         return 0;
75          } else {
76         return Paso_SystemMatrix_getTotalNumRows(in->A_C);
77          }
78       } else {
79         return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);
80       }
81    }
82  /*****************************************************************  /*****************************************************************
83    
84     constructs AMG     constructs AMG
85        
86  ******************************************************************/  ******************************************************************/
87  Paso_Preconditioner_LocalAMG* Paso_Preconditioner_LocalAMG_alloc(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {  Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
88    
89    Paso_Preconditioner_LocalAMG* out=NULL;    Paso_Preconditioner_AMG* out=NULL;
90    bool_t verbose=options->verbose;    bool_t verbose=options->verbose;
91      
92    Paso_SparseMatrix *Atemp=NULL, *A_C=NULL;    const dim_t my_n=Paso_SystemMatrix_getNumRows(A_p);
93    const dim_t n=A_p->numRows;    const dim_t overlap_n=Paso_SystemMatrix_getColOverlap(A_p);
94      const dim_t n = my_n + overlap_n;
95    
96    const dim_t n_block=A_p->row_block_size;    const dim_t n_block=A_p->row_block_size;
97    index_t* split_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL, *S=NULL, *degree=NULL;    index_t* F_marker=NULL, *counter=NULL, *mask_C=NULL;
98    dim_t n_F=0, n_C=0, i;    dim_t i;
99    double time0=0;    double time0=0;
100    const double theta = options->coarsening_threshold;    const double theta = options->coarsening_threshold;
101    const double tau = options->diagonal_dominance_threshold;    const double tau = options->diagonal_dominance_threshold;
102        const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
103      const dim_t total_n=Paso_SystemMatrix_getGlobalTotalNumRows(A_p);
104    
105        
106    /*    /*
107        is the input matrix A suitable for coarsening        is the input matrix A suitable for coarsening
108                
109    */    */
110    if ( (A_p->pattern->len >= options->min_coarse_sparsity * n * n ) || (n <= options->min_coarse_matrix_size) || (level > options->level_max) ) {    if ( (sparsity >= options->min_coarse_sparsity) ||
111       if (verbose) printf("Paso_Solver: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",         (total_n <= options->min_coarse_matrix_size) ||
112      level,  options->level_max, A_p->pattern->len/(1.*n * n), options->min_coarse_sparsity, n, options->min_coarse_matrix_size  );           (level > options->level_max) ) {
113       return NULL;  
114    }          if (verbose) {
115       /* Start Coarsening : */            /*
116                  print stopping condition:
117                          - 'SPAR' = min_coarse_matrix_sparsity exceeded
118                          - 'SIZE' = min_coarse_matrix_size exceeded
119                          - 'LEVEL' = level_max exceeded
120              */
121              printf("Paso_Preconditioner: AMG: termination of coarsening by ");
122    
123              if (sparsity >= options->min_coarse_sparsity)
124                  printf("SPAR ");
125    
126              if (total_n <= options->min_coarse_matrix_size)
127                  printf("SIZE ");
128    
129              if (level > options->level_max)
130                  printf("LEVEL ");
131    
132              printf("\n");
133    
134       split_marker=TMPMEMALLOC(n,index_t);          printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
135               level,  options->level_max, sparsity, options->min_coarse_sparsity, total_n, options->min_coarse_matrix_size);  
136    
137           }
138    
139           return NULL;
140      }  else {
141          /* Start Coarsening : */
142          const dim_t len_S=A_p->mainBlock->pattern->len+A_p->col_coupleBlock->pattern->len;
143    
144         F_marker=TMPMEMALLOC(n,index_t);
145       counter=TMPMEMALLOC(n,index_t);       counter=TMPMEMALLOC(n,index_t);
146       degree=TMPMEMALLOC(n, dim_t);  
147       S=TMPMEMALLOC(A_p->pattern->len, index_t);       dim_t* degree_S=TMPMEMALLOC(my_n, dim_t);
148       if ( !( Esys_checkPtr(split_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree) || Esys_checkPtr(S) ) ) {       index_t *offset_S=TMPMEMALLOC(my_n, index_t);
149         index_t *S=TMPMEMALLOC(len_S, index_t);
150         if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(offset_S) || Esys_checkPtr(S) ) ) {
151       /*       /*
152            set splitting of unknows:            set splitting of unknows:
153                
154           */           */
155       time0=Esys_timer();       time0=Esys_timer();
156       if (n_block>1) {       if (n_block>1) {
157             Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree, S, theta,tau);             Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree_S, offset_S, S, theta,tau);
158       } else {       } else {
159             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree, S, theta,tau);             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);
160       }       }
161       Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree, S, split_marker, options->usePanel);  {
162       options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);     dim_t p;
163       for (i=0; i< my_n; ++i) {
164             printf("%d: ",i);
165            for (p=0; p<degree_S[i];++p) printf("%d ",S[offset_S[i]+p]);
166            printf("\n");
167       }
168    }
169          
170    
171    /*MPI:
172         Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree_S, S, F_marker, options->usePanel);
173    */
174            
175             /* in BoomerAMG interpolation is used FF connectiovity is required :*/
176    /*MPI:
177             if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
178                                 Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);  
179    */
180    
181         options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
182    
183    #ifdef AAAAA
184       if (Esys_noError() ) {       if (Esys_noError() ) {
185          #pragma omp parallel for private(i) schedule(static)          #pragma omp parallel for private(i) schedule(static)
186          for (i = 0; i < n; ++i) split_marker[i]= (split_marker[i] == PASO_AMG_IN_F);          for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] ==  PASO_AMG_IN_F);
187            
188          /*          /*
189             count number of unkowns to be eliminated:             count number of unkowns to be eliminated:
190          */          */
191          n_F=Paso_Util_cumsum_maskedTrue(n,counter, split_marker);          n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
192          n_C=n-n_F;          n_C=n-n_F;
193          if (verbose) printf("Paso_Solver: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);          if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
194            
195          if ( n_F == 0 ) {  /*  is a nasty case. a direct solver should be used, return NULL */          if ( n_F == 0 ) {  /*  is a nasty case. a direct solver should be used, return NULL */
196             out = NULL;             out = NULL;
197          } else {          } else {
198             out=MEMALLOC(1,Paso_Preconditioner_LocalAMG);             out=MEMALLOC(1,Paso_Preconditioner_AMG);
199             if (! Esys_checkPtr(out)) {             if (! Esys_checkPtr(out)) {
200            out->level = level;            out->level = level;
201            out->n = n;            out->n = n;
# Line 137  Paso_Preconditioner_LocalAMG* Paso_Preco Line 218  Paso_Preconditioner_LocalAMG* Paso_Preco
218             Esys_checkPtr(rows_in_F);             Esys_checkPtr(rows_in_F);
219             if ( Esys_noError() ) {             if ( Esys_noError() ) {
220    
221            out->Smoother = Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);            out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
222                
223            if ( n_F < n ) { /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */            if (n_C != 0) {
224                   /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
225        
226              /* allocate helpers :*/              /* allocate helpers :*/
227              out->x_C=MEMALLOC(n_block*n_C,double);              out->x_C=MEMALLOC(n_block*n_C,double);
# Line 156  Paso_Preconditioner_LocalAMG* Paso_Preco Line 238  Paso_Preconditioner_LocalAMG* Paso_Preco
238                 {                 {
239                    #pragma omp for schedule(static)                    #pragma omp for schedule(static)
240                    for (i = 0; i < n; ++i) {                    for (i = 0; i < n; ++i) {
241                   if  (split_marker[i]) rows_in_F[counter[i]]=i;                   if  (F_marker[i]) rows_in_F[counter[i]]=i;
242                    }                    }
243                 }                 }
244                 /*  create mask of C nodes with value >-1 gives new id */                 /*  create mask of C nodes with value >-1 gives new id */
245                 i=Paso_Util_cumsum_maskedFalse(n,counter, split_marker);                 i=Paso_Util_cumsum_maskedFalse(n,counter, F_marker);
246    
247                 #pragma omp parallel for private(i) schedule(static)                 #pragma omp parallel for private(i) schedule(static)
248                 for (i = 0; i < n; ++i) {                 for (i = 0; i < n; ++i) {
249                    if  (split_marker[i]) {                    if  (F_marker[i]) {
250                   mask_C[i]=-1;                   mask_C[i]=-1;
251                    } else {                    } else {
252                   mask_C[i]=counter[i];;                   mask_C[i]=counter[i];;
253                    }                    }
254                 }                 }
255                 /*                 /*
256                    get Restriction :                      get Prolongation :    
257                 */                                   */                  
258                 time0=Esys_timer();                 time0=Esys_timer();
259                 out->P=Paso_Preconditioner_AMG_getDirectProlongation(A_p,degree,S,n_C,mask_C);  /*MPI:
260                   out->P=Paso_Preconditioner_AMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);
261    */
262                 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);                 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
263              }              }
264              /*                    /*      
265                 construct Prolongation operator as transposed of restriction operator:                 construct Restriction operator as transposed of Prolongation operator:
266              */              */
267              if ( Esys_noError()) {              if ( Esys_noError()) {
268                 time0=Esys_timer();                 time0=Esys_timer();
269                 out->R=Paso_SparseMatrix_getTranspose(out->P);  /*MPI:
270                 if (SHOW_TIMING) printf("timing: level %d: Paso_SparseMatrix_getTranspose: %e\n",level,Esys_timer()-time0);                 out->R=Paso_SystemMatrix_getTranspose(out->P);
271    */
272                   if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
273              }                    }      
274              /*              /*
275              construct coarse level matrix:              construct coarse level matrix:
276              */              */
277              if ( Esys_noError()) {              if ( Esys_noError()) {
278                 time0=Esys_timer();                 time0=Esys_timer();
279                 Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P);  /*MPI:
280                 A_C=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp);                 Atemp=Paso_SystemMatrix_MatrixMatrix(A_p,out->P);
281                 Paso_SparseMatrix_free(Atemp);                 A_C=Paso_SystemMatrix_MatrixMatrix(out->R,Atemp);
282                  
283                   Paso_SystemMatrix_free(Atemp);
284    */
285    
286                 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);                             if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);            
287              }              }
288    
# Line 202  Paso_Preconditioner_LocalAMG* Paso_Preco Line 292  Paso_Preconditioner_LocalAMG* Paso_Preco
292                                
293              */              */
294              if ( Esys_noError()) {              if ( Esys_noError()) {
295                 out->AMG_C=Paso_Preconditioner_LocalAMG_alloc(A_C,level+1,options);                 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
296              }              }
297              if ( Esys_noError()) {              if ( Esys_noError()) {
298                 if ( out->AMG_C == NULL ) {                 if ( out->AMG_C == NULL ) {
# Line 210  Paso_Preconditioner_LocalAMG* Paso_Preco Line 300  Paso_Preconditioner_LocalAMG* Paso_Preco
300                    out->refinements = options->coarse_matrix_refinements;                    out->refinements = options->coarse_matrix_refinements;
301                    /* no coarse level matrix has been constructed. use direct solver */                    /* no coarse level matrix has been constructed. use direct solver */
302                    #ifdef MKL                    #ifdef MKL
303                      out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);                      out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
304                      Paso_SparseMatrix_free(A_C);                      Paso_SystemMatrix_free(A_C);
305                      out->A_C->solver_package = PASO_MKL;                      out->A_C->solver_package = PASO_MKL;
306                      if (verbose) printf("Paso_Solver: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C);                      if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
307                    #else                    #else
308                      #ifdef UMFPACK                      #ifdef UMFPACK
309                         out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);                         out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
310                         Paso_SparseMatrix_free(A_C);                         Paso_SystemMatrix_free(A_C);
311                         out->A_C->solver_package = PASO_UMFPACK;                         out->A_C->solver_package = PASO_UMFPACK;
312                         if (verbose) printf("Paso_Solver: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C);                         if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
313                      #else                      #else
314                         out->A_C=A_C;                         out->A_C=A_C;
315                         out->A_C->solver_p=Paso_Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);                         out->A_C->solver_p=Paso_Preconditioner_Smoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
316                         out->A_C->solver_package = PASO_SMOOTHER;                         out->A_C->solver_package = PASO_SMOOTHER;
317                         if (verbose) printf("Paso_Solver: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C);                         if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
318                      #endif                      #endif
319                    #endif                    #endif
320                 } else {                 } else {
# Line 238  Paso_Preconditioner_LocalAMG* Paso_Preco Line 328  Paso_Preconditioner_LocalAMG* Paso_Preco
328             TMPMEMFREE(rows_in_F);             TMPMEMFREE(rows_in_F);
329          }          }
330       }       }
331    #endif
332    
333    }    }
334    TMPMEMFREE(counter);    TMPMEMFREE(counter);
335    TMPMEMFREE(split_marker);    TMPMEMFREE(F_marker);
336    TMPMEMFREE(degree);    TMPMEMFREE(degree_S);
337      TMPMEMFREE(offset_S);
338    TMPMEMFREE(S);    TMPMEMFREE(S);
339      }
340    
341    if (Esys_noError()) {    if (Esys_noError()) {
342       return out;       return out;
343    } else  {    } else  {
344       Paso_Preconditioner_LocalAMG_free(out);       Paso_Preconditioner_AMG_free(out);
345       return NULL;       return NULL;
346    }    }
347  }  }
348    
349    
350  void Paso_Preconditioner_LocalAMG_solve(Paso_SparseMatrix* A, Paso_Preconditioner_LocalAMG * amg, double * x, double * b) {  void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
351       const dim_t n = amg->n * amg->n_block;       const dim_t n = amg->n * amg->n_block;
352       double time0=0;       double time0=0;
353       const dim_t post_sweeps=amg->post_sweeps;       const dim_t post_sweeps=amg->post_sweeps;
# Line 261  void Paso_Preconditioner_LocalAMG_solve( Line 355  void Paso_Preconditioner_LocalAMG_solve(
355    
356       /* presmoothing */       /* presmoothing */
357       time0=Esys_timer();       time0=Esys_timer();
358       Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);       Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
359       time0=Esys_timer()-time0;       time0=Esys_timer()-time0;
360       if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);       if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
361       /* end of presmoothing */       /* end of presmoothing */
# Line 269  void Paso_Preconditioner_LocalAMG_solve( Line 363  void Paso_Preconditioner_LocalAMG_solve(
363       if (amg->n_F < amg->n) { /* is there work on the coarse level? */       if (amg->n_F < amg->n) { /* is there work on the coarse level? */
364           time0=Esys_timer();           time0=Esys_timer();
365       Paso_Copy(n, amg->r, b);                            /*  r <- b */       Paso_Copy(n, amg->r, b);                            /*  r <- b */
366       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
367       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->R,amg->r,0.,amg->b_C);  /* b_c = R*r  */       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C);  /* b_c = R*r  */
368           time0=Esys_timer()-time0;           time0=Esys_timer()-time0;
369       /* coarse level solve */       /* coarse level solve */
370       if ( amg->AMG_C == NULL) {       if ( amg->AMG_C == NULL) {
371          time0=Esys_timer();          time0=Esys_timer();
372          /*  A_C is the coarsest level */          /*  A_C is the coarsest level */
373    #ifdef FIXME
374          switch (amg->A_C->solver_package) {          switch (amg->A_C->solver_package) {
375             case (PASO_MKL):             case (PASO_MKL):
376            Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);            Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
# Line 284  void Paso_Preconditioner_LocalAMG_solve( Line 379  void Paso_Preconditioner_LocalAMG_solve(
379            Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);            Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
380            break;            break;
381             case (PASO_SMOOTHER):             case (PASO_SMOOTHER):
382            Paso_Preconditioner_LocalSmoother_solve(amg->A_C, amg->Smoother,amg->x_C,amg->b_C,pre_sweeps, FALSE);            Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
383            break;            break;
384          }          }
385    #endif
386            Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
387          if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);          if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
388       } else {       } else {
389          Paso_Preconditioner_LocalAMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C)     */          Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C)     */
390       }         }  
391       time0=time0+Esys_timer();       time0=time0+Esys_timer();
392       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */           Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */    
393            
394           /*postsmoothing*/           /*postsmoothing*/
395                
396          /*solve Ax=b with initial guess x */          /*solve Ax=b with initial guess x */
397          time0=Esys_timer();          time0=Esys_timer();
398          Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);          Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
399          time0=Esys_timer()-time0;          time0=Esys_timer()-time0;
400          if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);          if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
401          /*end of postsmoothing*/          /*end of postsmoothing*/
# Line 315  void Paso_Preconditioner_LocalAMG_solve( Line 412  void Paso_Preconditioner_LocalAMG_solve(
412  in the sense that |A_{ij}| >= theta * max_k |A_{ik}|  in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
413  */  */
414    
415  void Paso_Preconditioner_AMG_setStrongConnections(Paso_SparseMatrix* A,  void Paso_Preconditioner_AMG_setStrongConnections(Paso_SystemMatrix* A,
416                        dim_t *degree, index_t *S,                            dim_t *degree_S, index_t *offset_S, index_t *S,
417                        const double theta, const double tau)                        const double theta, const double tau)
418  {  {
419     const dim_t n=A->numRows;     const dim_t my_n=Paso_SystemMatrix_getNumRows(A);
420     index_t iptr, i,j;     index_t iptr, i;
    dim_t kdeg;  
    double max_offdiagonal, threshold, sum_row, main_row, fnorm;  
421    
422    
423        #pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static)        #pragma omp parallel for private(i,iptr) schedule(static)
424        for (i=0;i<n;++i) {        for (i=0;i<my_n;++i) {        
425                   register double max_offdiagonal = 0.;
426       max_offdiagonal = 0.;       register double sum_row=0;
427       sum_row=0;       register double main_row=0;
      main_row=0;  
428       #pragma ivdep       #pragma ivdep
429       for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {       for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
430          j=A->pattern->index[iptr];          register index_t j=A->mainBlock->pattern->index[iptr];
431          fnorm=ABS(A->val[iptr]);          register double fnorm=ABS(A->mainBlock->val[iptr]);
432                    
433          if( j != i) {          if( j != i) {
434             max_offdiagonal = MAX(max_offdiagonal,fnorm);             max_offdiagonal = MAX(max_offdiagonal,fnorm);
# Line 342  void Paso_Preconditioner_AMG_setStrongCo Line 436  void Paso_Preconditioner_AMG_setStrongCo
436          } else {          } else {
437             main_row=fnorm;             main_row=fnorm;
438          }          }
439    
440       }       }
441       threshold = theta*max_offdiagonal;       #pragma ivdep
442       kdeg=0;       for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
443            register double fnorm=ABS(A->col_coupleBlock->val[iptr]);
444            max_offdiagonal = MAX(max_offdiagonal,fnorm);
445            sum_row+=fnorm;
446         }
447    
448    
449         const double threshold = theta*max_offdiagonal;
450         register dim_t kdeg=0;
451             register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
452       if (tau*main_row < sum_row) { /* no diagonal domainance */       if (tau*main_row < sum_row) { /* no diagonal domainance */
453          #pragma ivdep          #pragma ivdep
454          for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {          for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
455             j=A->pattern->index[iptr];             register index_t j=A->mainBlock->pattern->index[iptr];
456             if(ABS(A->val[iptr])>threshold && i!=j) {             if(ABS(A->mainBlock->val[iptr])>threshold && i!=j) {
457            S[A->pattern->ptr[i]+kdeg] = j;            S[koffset+kdeg] = j;
458            kdeg++;            kdeg++;
459             }             }
460          }          }
461       }          #pragma ivdep
462       degree[i]=kdeg;          for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
463               register index_t j=A->col_coupleBlock->pattern->index[iptr];
464               if(ABS(A->col_coupleBlock->val[iptr])>threshold) {
465              S[koffset+kdeg] = j + my_n;
466              kdeg++;
467               }
468            }
469             }
470             offset_S[i]=koffset;
471         degree_S[i]=kdeg;
472        }        }
   
473  }  }
474    
475  /* theta = threshold for strong connections */  /* theta = threshold for strong connections */
# Line 366  void Paso_Preconditioner_AMG_setStrongCo Line 478  void Paso_Preconditioner_AMG_setStrongCo
478    
479  in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F  in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
480  */  */
481  void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SparseMatrix* A,  void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SystemMatrix* A,
482                              dim_t *degree, index_t *S,                              dim_t *degree_S, index_t *offset_S, index_t *S,
483                              const double theta, const double tau)                              const double theta, const double tau)
484    
485  {  {
486       const dim_t my_n=Paso_SystemMatrix_getNumRows(A);
487       index_t iptr, i, bi;
488     const dim_t n_block=A->row_block_size;     const dim_t n_block=A->row_block_size;
    const dim_t n=A->numRows;  
    index_t iptr, i,j, bi;  
    dim_t kdeg, max_deg;  
    register double max_offdiagonal, threshold, fnorm, sum_row, main_row;  
    double *rtmp;  
489        
490        
491        #pragma omp parallel private(i,iptr,max_offdiagonal, kdeg, threshold,j, max_deg, fnorm, sum_row, main_row, rtmp)        #pragma omp parallel private(i,iptr, bi)
492        {        {
493       max_deg=0;       dim_t max_deg=0;
494       #pragma omp for schedule(static)       #pragma omp for schedule(static)
495       for (i=0;i<n;++i) max_deg=MAX(max_deg, A->pattern->ptr[i+1]-A->pattern->ptr[i]);       for (i=0;i<my_n;++i) max_deg=MAX(max_deg, A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]
496                                                    +A->col_coupleBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i]);
497                
498       rtmp=TMPMEMALLOC(max_deg, double);       double *rtmp=TMPMEMALLOC(max_deg, double);
499                
500       #pragma omp for schedule(static)       #pragma omp for schedule(static)
501       for (i=0;i<n;++i) {       for (i=0;i<my_n;++i) {
502            
503          max_offdiagonal = 0.;          register double max_offdiagonal = 0.;
504          sum_row=0;          register double sum_row=0;
505          main_row=0;          register double main_row=0;
506          for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {              register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
507             j=A->pattern->index[iptr];  
508             fnorm=0;          for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
509               register index_t j=A->mainBlock->pattern->index[iptr];
510               register double fnorm=0;
511             #pragma ivdep             #pragma ivdep
512             for(bi=0;bi<n_block*n_block;++bi) fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];             for(bi=0;bi<n_block*n_block;++bi) {
513                         register double rtmp2 = A->mainBlock->val[iptr*n_block*n_block+bi];
514                         fnorm+=rtmp2*rtmp2;
515                   }
516             fnorm=sqrt(fnorm);             fnorm=sqrt(fnorm);
517             rtmp[iptr-A->pattern->ptr[i]]=fnorm;  
518               rtmp[iptr+rtmp_offset]=fnorm;
519             if( j != i) {             if( j != i) {
520            max_offdiagonal = MAX(max_offdiagonal,fnorm);            max_offdiagonal = MAX(max_offdiagonal,fnorm);
521            sum_row+=fnorm;            sum_row+=fnorm;
# Line 407  void Paso_Preconditioner_AMG_setStrongCo Line 523  void Paso_Preconditioner_AMG_setStrongCo
523            main_row=fnorm;            main_row=fnorm;
524             }             }
525          }          }
526          threshold = theta*max_offdiagonal;              rtmp_offset=A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]-A->col_coupleBlock->pattern->ptr[i];
527                  for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
528          kdeg=0;             register double fnorm=0;
529               #pragma ivdep
530               for(bi=0;bi<n_block*n_block;++bi) {
531                         register double rtmp2 = A->col_coupleBlock->val[iptr*n_block*n_block+bi];
532                         fnorm+=rtmp2*rtmp2;
533                   }
534               fnorm=sqrt(fnorm);
535    
536               rtmp[iptr+rtmp_offset]=fnorm;
537               max_offdiagonal = MAX(max_offdiagonal,fnorm);
538               sum_row+=fnorm;
539            }
540                
541            const double threshold = theta*max_offdiagonal;
542            register dim_t kdeg=0;
543                register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
544    
545          if (tau*main_row < sum_row) { /* no diagonal domainance */          if (tau*main_row < sum_row) { /* no diagonal domainance */
546                   rtmp_offset=-A->mainBlock->pattern->ptr[i];
547               #pragma ivdep
548               for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
549              register index_t j=A->mainBlock->pattern->index[iptr];
550              if(rtmp[iptr+rtmp_offset] > threshold && i!=j) {
551                 S[koffset+kdeg] = j;
552                 kdeg++;
553              }
554               }
555                   rtmp_offset=A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]-A->col_coupleBlock->pattern->ptr[i];
556             #pragma ivdep             #pragma ivdep
557             for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {             for (iptr=A->col_coupleBlock->pattern->ptr[i]; iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
558            j=A->pattern->index[iptr];            register index_t j=A->col_coupleBlock->pattern->index[iptr];
559            if(rtmp[iptr-A->pattern->ptr[i]] > threshold && i!=j) {            if(rtmp[iptr+rtmp_offset] > threshold) {
560               S[A->pattern->ptr[i]+kdeg] = j;               S[koffset+kdeg] = j + my_n;
561               kdeg++;               kdeg++;
562            }            }
563             }             }
564                  
565          }          }
566          degree[i]=kdeg;          degree_S[i]=kdeg;
567                offset_S[i]=koffset;
568       }             }      
569       TMPMEMFREE(rtmp);       TMPMEMFREE(rtmp);
570        } /* end of parallel region */        } /* end of parallel region */
571    
572  }    }  
573    
574    #ifdef AAAAA
575  /* the runge stueben coarsening algorithm: */  /* the runge stueben coarsening algorithm: */
576  void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset,  void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset_S,
577                          const dim_t* degree, const index_t* S,                          const dim_t* degree_S, const index_t* S,
578                          index_t*split_marker, const bool_t usePanel)                          index_t*split_marker, const bool_t usePanel)
579  {  {
580        
581     index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;     index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;
582     dim_t i,k, p, q, *degreeT=NULL, len_panel, len_panel_new;     dim_t i,k, p, q, *degree_ST=NULL, len_panel, len_panel_new;
583     register index_t j, itmp;     register index_t j, itmp;
584        
585     if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */     if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
586        
587     lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);     lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);
588     degreeT=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degreeT);     degree_ST=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degree_ST);
589     ST=TMPMEMALLOC(offset[n], index_t);  Esys_checkPtr(ST);     ST=TMPMEMALLOC(offset_S[n], index_t);  Esys_checkPtr(ST);
590     if (usePanel) {     if (usePanel) {
591        notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);        notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);
592        panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);        panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);
# Line 454  void Paso_Preconditioner_AMG_RungeStuebe Line 599  void Paso_Preconditioner_AMG_RungeStuebe
599        /* those unknows which are not influenced go into F, the rest is available for F or C */        /* those unknows which are not influenced go into F, the rest is available for F or C */
600        #pragma omp parallel for private(i) schedule(static)        #pragma omp parallel for private(i) schedule(static)
601        for (i=0;i<n;++i) {        for (i=0;i<n;++i) {
602       degreeT[i]=0;       degree_ST[i]=0;
603       if (degree[i]>0) {       if (degree_S[i]>0) {
604          lambda[i]=0;          lambda[i]=0;
605          split_marker[i]=PASO_AMG_UNDECIDED;          split_marker[i]=PASO_AMG_UNDECIDED;
606       } else {       } else {
# Line 465  void Paso_Preconditioner_AMG_RungeStuebe Line 610  void Paso_Preconditioner_AMG_RungeStuebe
610        }        }
611        /* create transpose :*/        /* create transpose :*/
612        for (i=0;i<n;++i) {        for (i=0;i<n;++i) {
613          for (p=0; p<degree[i]; ++p) {          for (p=0; p<degree_S[i]; ++p) {
614             j=S[offset[i]+p];             j=S[offset_S[i]+p];
615             ST[offset[j]+degreeT[j]]=i;             ST[offset_S[j]+degree_ST[j]]=i;
616             degreeT[j]++;             degree_ST[j]++;
617          }          }
618        }        }
619        /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */        /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
# Line 476  void Paso_Preconditioner_AMG_RungeStuebe Line 621  void Paso_Preconditioner_AMG_RungeStuebe
621        for (i=0;i<n;++i) {        for (i=0;i<n;++i) {
622       if (split_marker[i]==PASO_AMG_UNDECIDED) {       if (split_marker[i]==PASO_AMG_UNDECIDED) {
623          itmp=lambda[i];          itmp=lambda[i];
624          for (p=0; p<degreeT[i]; ++p) {          for (p=0; p<degree_ST[i]; ++p) {
625             j=ST[offset[i]+p];             j=ST[offset_S[i]+p];
626             if (split_marker[j]==PASO_AMG_UNDECIDED) {             if (split_marker[j]==PASO_AMG_UNDECIDED) {
627            itmp++;            itmp++;
628             } else {  /* at this point there are no C points */             } else {  /* at this point there are no C points */
# Line 503  void Paso_Preconditioner_AMG_RungeStuebe Line 648  void Paso_Preconditioner_AMG_RungeStuebe
648             lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */             lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */
649                        
650             /* all undecided unknown strongly coupled to i are moved to F */             /* all undecided unknown strongly coupled to i are moved to F */
651             for (p=0; p<degreeT[i]; ++p) {             for (p=0; p<degree_ST[i]; ++p) {
652            j=ST[offset[i]+p];            j=ST[offset_S[i]+p];
653                        
654            if (split_marker[j]==PASO_AMG_UNDECIDED) {            if (split_marker[j]==PASO_AMG_UNDECIDED) {
655                            
656               split_marker[j]=PASO_AMG_IN_F;               split_marker[j]=PASO_AMG_IN_F;
657               lambda[j]=-1;               lambda[j]=-1;
658                            
659               for (q=0; q<degreeT[j]; ++q) {               for (q=0; q<degree_ST[j]; ++q) {
660              k=ST[offset[j]+q];              k=ST[offset_S[j]+q];
661              if (split_marker[k]==PASO_AMG_UNDECIDED) {              if (split_marker[k]==PASO_AMG_UNDECIDED) {
662                 lambda[k]++;                 lambda[k]++;
663                 if (notInPanel[k]) {                 if (notInPanel[k]) {
# Line 528  void Paso_Preconditioner_AMG_RungeStuebe Line 673  void Paso_Preconditioner_AMG_RungeStuebe
673                            
674            }            }
675             }             }
676             for (p=0; p<degree[i]; ++p) {             for (p=0; p<degree_S[i]; ++p) {
677            j=S[offset[i]+p];            j=S[offset_S[i]+p];
678            if (split_marker[j]==PASO_AMG_UNDECIDED) {            if (split_marker[j]==PASO_AMG_UNDECIDED) {
679               lambda[j]--;               lambda[j]--;
680               if (notInPanel[j]) {               if (notInPanel[j]) {
# Line 568  void Paso_Preconditioner_AMG_RungeStuebe Line 713  void Paso_Preconditioner_AMG_RungeStuebe
713          lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */          lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */
714                    
715          /* all undecided unknown strongly coupled to i are moved to F */          /* all undecided unknown strongly coupled to i are moved to F */
716          for (p=0; p<degreeT[i]; ++p) {          for (p=0; p<degree_ST[i]; ++p) {
717             j=ST[offset[i]+p];             j=ST[offset_S[i]+p];
718             if (split_marker[j]==PASO_AMG_UNDECIDED) {             if (split_marker[j]==PASO_AMG_UNDECIDED) {
719                    
720            split_marker[j]=PASO_AMG_IN_F;            split_marker[j]=PASO_AMG_IN_F;
721            lambda[j]=-1;            lambda[j]=-1;
722                    
723            for (q=0; q<degreeT[j]; ++q) {            for (q=0; q<degree_ST[j]; ++q) {
724               k=ST[offset[j]+q];               k=ST[offset_S[j]+q];
725               if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;               if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
726            }            }
727    
728             }             }
729          }          }
730          for (p=0; p<degree[i]; ++p) {          for (p=0; p<degree_S[i]; ++p) {
731             j=S[offset[i]+p];             j=S[offset_S[i]+p];
732             if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;             if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
733          }          }
734                    
735       }       }
736       i=Paso_Util_arg_max(n,lambda);       i=Paso_Util_arg_max(n,lambda);
737        }        }
738              
739     }     }
740     TMPMEMFREE(lambda);     TMPMEMFREE(lambda);
741     TMPMEMFREE(ST);     TMPMEMFREE(ST);
742     TMPMEMFREE(degreeT);     TMPMEMFREE(degree_ST);
743     TMPMEMFREE(panel);     TMPMEMFREE(panel);
744     TMPMEMFREE(notInPanel);     TMPMEMFREE(notInPanel);
745  }  }
746    /* ensures that two F nodes are connected via a C node :*/
747    void Paso_Preconditioner_AMG_enforceFFConnectivity(const dim_t n, const index_t* offset_S,
748                            const dim_t* degree_S, const index_t* S,
749                            index_t*split_marker)
750    {
751          dim_t i, p, q;
752    
753          /* now we make sure that two (strongly) connected F nodes are (strongly) connected via a C node. */
754          for (i=0;i<n;++i) {
755                if ( (split_marker[i]==PASO_AMG_IN_F) && (degree_S[i]>0) ) {
756               for (p=0; p<degree_S[i]; ++p) {
757                      register index_t j=S[offset_S[i]+p];
758                      if ( (split_marker[j]==PASO_AMG_IN_F)  && (degree_S[j]>0) )  {
759                          /* i and j are now two F nodes which are strongly connected */
760                          /* is there a C node they share ? */
761                          register index_t sharing=-1;
762                      for (q=0; q<degree_S[i]; ++q) {
763                             index_t k=S[offset_S[i]+q];
764                             if (split_marker[k]==PASO_AMG_IN_C) {
765                                register index_t* where_k=(index_t*)bsearch(&k, &(S[offset_S[j]]), degree_S[j], sizeof(index_t), Paso_comparIndex);
766                                if (where_k != NULL) {
767                                   sharing=k;
768                                   break;
769                                }
770                             }
771                          }
772                          if (sharing<0) {
773                               if (i<j) {
774                                  split_marker[j]=PASO_AMG_IN_C;
775                               } else {
776                                  split_marker[i]=PASO_AMG_IN_C;
777                                  break;  /* no point to look any further as i is now a C node */
778                               }
779                          }
780                      }
781                   }
782                }
783           }
784    }
785    #endif
786    
787    #ifdef DFG
788    void Paso_Preconditioner_AMG_CIJPCoarsening( )
789    {
790    
791      
792      const dim_t my_n;
793      const dim_t overlap_n;
794      const dim_t n= my_n + overlap_n;
795    
796    
797          /* initialize  split_marker and split_marker :*/
798          /* those unknows which are not influenced go into F, the rest is available for F or C */
799          #pragma omp parallel for private(i) schedule(static)
800          for (i=0;i<n;++i) {
801         degree_ST[i]=0;
802         if (degree_S[i]>0) {
803            lambda[i]=0;
804            split_marker[i]=PASO_AMG_UNDECIDED;
805         } else {
806            split_marker[i]=PASO_AMG_IN_F;
807            lambda[i]=-1;
808         }
809          }
810    
811       /* set local lambda + overlap */
812       #pragma omp parallel for private(i)
813       for (i=0; i<n ++i) {
814           w[i]=degree_ST[i];
815       }
816       for (i=0; i<my_n; i++) {
817          w2[i]=random;
818       }
819    
820    
821       /* add noise to w */
822       Paso_Coupler_add(n, w, 1., w2, col_coupler);
823    
824       /*  */
825       global_n_C=0;
826       global_n_F=..;
827      
828       while (global_n_C + global_n_F < global_n) {
829    
830          
831          is_in_D[i]=FALSE;
832          /*  test  local connectivit*/
833          /* w2[i] = max(w[k] | k in S_i or k in S^T_i */
834          #pragma omp parallel for private(i)
835          for (i=0; i<n; ++i) w2[i]=0;
836                
837          for (i=0; i<my_n; ++i) {
838             for( iPtr =0 ; iPtr < degree_S[i]; ++iPtr) {
839                 k=S[offset_S[i]+iPtr];
840                 w2[i]=MAX(w2[i],w[k]);
841                 w2[k]=MAX(w2[k],w[i]);
842             }
843          }
844          /* adjust overlaps by MAX */
845          Paso_Coupler_max(n, w2, col_coupler);
846    
847          /* points with w[i]>w2[i] become C nodes */
848       }
849      
850    }
851    #endif

Legend:
Removed from v.3352  
changed lines
  Added in v.3450

  ViewVC Help
Powered by ViewVC 1.1.26