/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Diff of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3485 by gross, Thu Mar 24 22:44:40 2011 UTC revision 4286 by caltinay, Thu Mar 7 04:28:11 2013 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2010 by University of Queensland  * Copyright (c) 2003-2013 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development since 2012 by School of Earth Sciences
13    *
14    *****************************************************************************/
15    
16    
17  /**************************************************************/  /************************************************************************************/
18    
19  /* Paso: AMG preconditioner  (local version)                  */  /* Paso: AMG preconditioner  (local version)                  */
20    
21  /**************************************************************/  /************************************************************************************/
22    
23  /* Author: artak@uq.edu.au, l.gross@uq.edu.au                                */  /* Author: artak@uq.edu.au, l.gross@uq.edu.au                 */
24    
25  /**************************************************************/  /************************************************************************************/
26    
27  #define SHOW_TIMING FALSE  #define SHOW_TIMING FALSE
28    #define MY_DEBUG 0
29    #define MY_DEBUG1 1
30    
31  #include "Paso.h"  #include "Paso.h"
32  #include "Preconditioner.h"  #include "Preconditioner.h"
# Line 33  Line 37 
37  #include<stdio.h>  #include<stdio.h>
38    
39    
40  /**************************************************************/  /************************************************************************************/
41    
42  /* free all memory used by AMG                                */  /* free all memory used by AMG                                */
43    
# Line 47  void Paso_Preconditioner_AMG_free(Paso_P Line 51  void Paso_Preconditioner_AMG_free(Paso_P
51      MEMFREE(in->r);      MEMFREE(in->r);
52      MEMFREE(in->x_C);      MEMFREE(in->x_C);
53      MEMFREE(in->b_C);      MEMFREE(in->b_C);
54            Paso_MergedSolver_free(in->merged_solver);
55    
56      MEMFREE(in);      MEMFREE(in);
57       }       }
# Line 81  dim_t Paso_Preconditioner_AMG_getNumCoar Line 86  dim_t Paso_Preconditioner_AMG_getNumCoar
86       return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);       return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);
87     }     }
88  }  }
89  /*****************************************************************  /***************************************************************************************
90    
91     constructs AMG     constructs AMG
92        
93  ******************************************************************/  ****************************************************************************************/
94  Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {  Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
95    
96    Paso_Preconditioner_AMG* out=NULL;    Paso_Preconditioner_AMG* out=NULL;
97      Paso_SystemMatrix *A_C=NULL;
98    bool_t verbose=options->verbose;    bool_t verbose=options->verbose;
99    
100    const dim_t my_n=A_p->mainBlock->numRows;    const dim_t my_n=A_p->mainBlock->numRows;
# Line 97  Paso_Preconditioner_AMG* Paso_Preconditi Line 103  Paso_Preconditioner_AMG* Paso_Preconditi
103    const dim_t n = my_n + overlap_n;    const dim_t n = my_n + overlap_n;
104    
105    const dim_t n_block=A_p->row_block_size;    const dim_t n_block=A_p->row_block_size;
106    index_t* F_marker=NULL, *counter=NULL;    index_t* F_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F;
107    dim_t i;    dim_t i, my_n_F, my_n_C, n_C, F_flag, *F_set=NULL, global_n_C=0, global_n_F=0, n_F;
108    double time0=0;    double time0=0;
109    const double theta = options->coarsening_threshold;    const double theta = options->coarsening_threshold;
110    const double tau = options->diagonal_dominance_threshold;    const double tau = options->diagonal_dominance_threshold;
111    const double sparsity=Paso_SystemMatrix_getSparsity(A_p);    const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
112    const dim_t total_n=Paso_SystemMatrix_getGlobalTotalNumRows(A_p);    const dim_t global_n=Paso_SystemMatrix_getGlobalNumRows(A_p);
113    
114    
     
115    /*    /*
116        is the input matrix A suitable for coarsening        is the input matrix A suitable for coarsening?
117                
118    */    */
119    if ( (sparsity >= options->min_coarse_sparsity) ||    if ( (sparsity >= options->min_coarse_sparsity) ||
120         (total_n <= options->min_coarse_matrix_size) ||         (global_n <= options->min_coarse_matrix_size) ||
121         (level > options->level_max) ) {         (level > options->level_max) ) {
122    
123          if (verbose) {          if (verbose) {
# Line 124  Paso_Preconditioner_AMG* Paso_Preconditi Line 130  Paso_Preconditioner_AMG* Paso_Preconditi
130            printf("Paso_Preconditioner: AMG: termination of coarsening by ");            printf("Paso_Preconditioner: AMG: termination of coarsening by ");
131    
132            if (sparsity >= options->min_coarse_sparsity)            if (sparsity >= options->min_coarse_sparsity)
133                printf("SPAR ");                printf("SPAR");
134    
135            if (total_n <= options->min_coarse_matrix_size)            if (global_n <= options->min_coarse_matrix_size)
136                printf("SIZE ");                printf("SIZE");
137    
138            if (level > options->level_max)            if (level > options->level_max)
139                printf("LEVEL ");                printf("LEVEL");
140    
141            printf("\n");            printf("\n");
142    
143          printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",          printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
144             level,  options->level_max, sparsity, options->min_coarse_sparsity, total_n, options->min_coarse_matrix_size);               level,  options->level_max, sparsity, options->min_coarse_sparsity, global_n, options->min_coarse_matrix_size);  
145    
146         }         }
147    
# Line 144  Paso_Preconditioner_AMG* Paso_Preconditi Line 150  Paso_Preconditioner_AMG* Paso_Preconditi
150       /* Start Coarsening : */       /* Start Coarsening : */
151            
152       /* this is the table for strong connections combining mainBlock, col_coupleBlock and row_coupleBlock */       /* this is the table for strong connections combining mainBlock, col_coupleBlock and row_coupleBlock */
153       const dim_t len_S=A_p->mainBlock->pattern->len + A_p->col_coupleBlock->pattern->len + A_p->row_coupleBlock->pattern->len;       const dim_t len_S=A_p->mainBlock->pattern->len + A_p->col_coupleBlock->pattern->len + A_p->row_coupleBlock->pattern->len  + A_p->row_coupleBlock->numRows * A_p->col_coupleBlock->numCols;
154    
155       dim_t* degree_S=TMPMEMALLOC(n, dim_t);       dim_t* degree_S=TMPMEMALLOC(n, dim_t);
156       index_t *offset_S=TMPMEMALLOC(n, index_t);       index_t *offset_S=TMPMEMALLOC(n, index_t);
# Line 163  Paso_Preconditioner_AMG* Paso_Preconditi Line 169  Paso_Preconditioner_AMG* Paso_Preconditi
169             make sure that corresponding values in the row_coupleBlock and col_coupleBlock are identical             make sure that corresponding values in the row_coupleBlock and col_coupleBlock are identical
170      */      */
171          Paso_SystemMatrix_copyColCoupleBlock(A_p);          Paso_SystemMatrix_copyColCoupleBlock(A_p);
172          /* Paso_SystemMatrix_copyRemoteCoupleBlock(A_p, FALSE); */          Paso_SystemMatrix_copyRemoteCoupleBlock(A_p, FALSE);
173    
174      /*      /*
175            set splitting of unknows:            set splitting of unknowns:
176                    
177           */           */
178       time0=Esys_timer();       time0=Esys_timer();
# Line 176  Paso_Preconditioner_AMG* Paso_Preconditi Line 182  Paso_Preconditioner_AMG* Paso_Preconditi
182             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);
183       }       }
184       Paso_Preconditioner_AMG_transposeStrongConnections(n, degree_S, offset_S, S, n, degree_ST, offset_ST, ST);       Paso_Preconditioner_AMG_transposeStrongConnections(n, degree_S, offset_S, S, n, degree_ST, offset_ST, ST);
185        /*   Paso_SystemMatrix_extendedRowsForST(A_p, degree_ST, offset_ST, ST);
186  {   */
187     dim_t p;  
    for (i=0; i<n; ++i) {  
          printf("%d: ",i);  
         for (p=0; p<degree_S[i];++p) printf("%d ",S[offset_S[i]+p]);  
         printf("\n");  
    }  
 }  
188       Paso_Preconditioner_AMG_CIJPCoarsening(n,my_n,F_marker,       Paso_Preconditioner_AMG_CIJPCoarsening(n,my_n,F_marker,
189                              degree_S, offset_S, S, degree_ST, offset_ST, ST,                              degree_S, offset_S, S, degree_ST, offset_ST, ST,
190                          A_p->col_coupler->connector,A_p->col_distribution);                          A_p->col_coupler->connector,A_p->col_distribution);
 Esys_setError(SYSTEM_ERROR, "AMG:DONE.");  
 return NULL;  
191                
192    
193  /*MPI:           /* in BoomerAMG if interpolation is used FF connectivity is required */
      Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree_S, S, F_marker, options->usePanel);  
 */  
       
          /* in BoomerAMG interpolation is used FF connectiovity is required :*/  
194  /*MPI:  /*MPI:
195           if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)           if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
196                               Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);                                 Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);  
197  */  */
198    
199       options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);       options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
   
 #ifdef AAAAA  
200       if (Esys_noError() ) {       if (Esys_noError() ) {
201          #pragma omp parallel for private(i) schedule(static)          #pragma omp parallel for private(i) schedule(static)
202          for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] ==  PASO_AMG_IN_F);          for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] ==  PASO_AMG_IN_F);
203            
204          /*          /*
205             count number of unkowns to be eliminated:             count number of unknowns to be eliminated:
206          */          */
207          n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);          my_n_F=Paso_Util_cumsum_maskedTrue(my_n,counter, F_marker);
208          n_C=n-n_F;          n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
209          if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);              /* collect my_n_F values on all processes, a direct solver should
210                        be used if any my_n_F value is 0 */
211          if ( n_F == 0 ) {  /*  is a nasty case. a direct solver should be used, return NULL */              F_set = TMPMEMALLOC(A_p->mpi_info->size, dim_t);
212            #ifdef ESYS_MPI
213                MPI_Allgather(&my_n_F, 1, MPI_INT, F_set, 1, MPI_INT, A_p->mpi_info->comm);
214            #endif
215                global_n_F=0;
216                F_flag = 1;
217                for (i=0; i<A_p->mpi_info->size; i++) {
218                    global_n_F+=F_set[i];
219                    if (F_set[i] == 0) F_flag = 0;
220                }
221                TMPMEMFREE(F_set);
222    
223            my_n_C=my_n-my_n_F;
224                global_n_C=global_n-global_n_F;
225            if (verbose) printf("Paso_Preconditioner: AMG (non-local) level %d: %d unknowns are flagged for elimination. %d left.\n",level,global_n_F,global_n_C);
226    
227            
228    /*          if ( n_F == 0 ) {  is a nasty case. a direct solver should be used, return NULL */
229                if (F_flag == 0) {
230             out = NULL;             out = NULL;
231          } else {          } else {
232             out=MEMALLOC(1,Paso_Preconditioner_AMG);             out=MEMALLOC(1,Paso_Preconditioner_AMG);
233             if (! Esys_checkPtr(out)) {             if (! Esys_checkPtr(out)) {
234            out->level = level;            out->level = level;
           out->n = n;  
           out->n_F = n_F;  
           out->n_block = n_block;  
235            out->A_C = NULL;            out->A_C = NULL;
236            out->P = NULL;              out->P = NULL;  
237            out->R = NULL;                      out->R = NULL;          
# Line 235  return NULL; Line 242  return NULL;
242            out->b_C = NULL;            out->b_C = NULL;
243            out->AMG_C = NULL;            out->AMG_C = NULL;
244            out->Smoother=NULL;            out->Smoother=NULL;
245                      out->merged_solver=NULL;
246             }             }
247             mask_C=TMPMEMALLOC(n,index_t);             mask_C=TMPMEMALLOC(n,index_t);
248             rows_in_F=TMPMEMALLOC(n_F,index_t);             rows_in_F=TMPMEMALLOC(n_F,index_t);
# Line 242  return NULL; Line 250  return NULL;
250             Esys_checkPtr(rows_in_F);             Esys_checkPtr(rows_in_F);
251             if ( Esys_noError() ) {             if ( Esys_noError() ) {
252    
253            out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);            out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), 0, verbose);
254                
255            if (n_C != 0) {            if (global_n_C != 0) {
256                 /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */              /*  create mask of C nodes with value >-1, gives new id */
257                n_C=Paso_Util_cumsum_maskedFalse(n, mask_C, F_marker);
258                /* if nothing has been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
259        
260              /* allocate helpers :*/              /* allocate helpers :*/
261              out->x_C=MEMALLOC(n_block*n_C,double);              out->x_C=MEMALLOC(n_block*my_n_C,double);
262              out->b_C=MEMALLOC(n_block*n_C,double);              out->b_C=MEMALLOC(n_block*my_n_C,double);
263              out->r=MEMALLOC(n_block*n,double);              out->r=MEMALLOC(n_block*my_n,double);
264                            
265              Esys_checkPtr(out->r);              Esys_checkPtr(out->r);
266              Esys_checkPtr(out->x_C);              Esys_checkPtr(out->x_C);
# Line 265  return NULL; Line 275  return NULL;
275                   if  (F_marker[i]) rows_in_F[counter[i]]=i;                   if  (F_marker[i]) rows_in_F[counter[i]]=i;
276                    }                    }
277                 }                 }
                /*  create mask of C nodes with value >-1 gives new id */  
                i=Paso_Util_cumsum_maskedFalse(n,counter, F_marker);  
   
                #pragma omp parallel for private(i) schedule(static)  
                for (i = 0; i < n; ++i) {  
                   if  (F_marker[i]) {  
                  mask_C[i]=-1;  
                   } else {  
                  mask_C[i]=counter[i];;  
                   }  
                }  
278                 /*                 /*
279                    get Prolongation :                        get Prolongation :    
280                 */                                   */                  
281    
282                 time0=Esys_timer();                 time0=Esys_timer();
283  /*MPI:  
284                 out->P=Paso_Preconditioner_AMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);                 out->P=Paso_Preconditioner_AMG_getProlongation(A_p,offset_S, degree_S,S,n_C,mask_C, options->interpolation_method);
285  */  
                if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);  
286              }              }
287    
288              /*                    /*      
289                 construct Restriction operator as transposed of Prolongation operator:                 construct Restriction operator as transposed of Prolongation operator:
290              */              */
291    
292              if ( Esys_noError()) {              if ( Esys_noError()) {
293                 time0=Esys_timer();                 time0=Esys_timer();
294  /*MPI:  
295                 out->R=Paso_SystemMatrix_getTranspose(out->P);                 out->R=Paso_Preconditioner_AMG_getRestriction(out->P);
296  */  
297                 if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);                 if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
298              }                    }      
299              /*              /*
300              construct coarse level matrix:                          construct coarse level matrix:
301              */                          */
302              if ( Esys_noError()) {                          if ( Esys_noError()) {
303                 time0=Esys_timer();                             time0=Esys_timer();
 /*MPI:  
                Atemp=Paso_SystemMatrix_MatrixMatrix(A_p,out->P);  
                A_C=Paso_SystemMatrix_MatrixMatrix(out->R,Atemp);  
                Paso_Preconditioner_AMG_setStrongConnections  
                Paso_SystemMatrix_free(Atemp);  
 */  
304    
305                 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);                                         A_C = Paso_Preconditioner_AMG_buildInterpolationOperator(A_p, out->P, out->R);
306              }  
307                               if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
308                            }
309    
               
310              /*              /*
311                 constructe courser level:                 construct coarser level:
312                                
313              */              */
314              if ( Esys_noError()) {              if ( Esys_noError()) {
315                 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);                 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
316              }              }
317    
318              if ( Esys_noError()) {              if ( Esys_noError()) {
319                 if ( out->AMG_C == NULL ) {                out->A_C=A_C;
320                    out->reordering = options->reordering;                if ( out->AMG_C == NULL ) {
321                    out->refinements = options->coarse_matrix_refinements;                    /* merge the system matrix into 1 rank when
322                    /* no coarse level matrix has been constructed. use direct solver */                   it's not suitable coarsening due to the
323                    #ifdef MKL                   total number of unknowns are too small */
324                      out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);                                out->merged_solver= Paso_MergedSolver_alloc(A_C, options);
325                      Paso_SystemMatrix_free(A_C);                }
                     out->A_C->solver_package = PASO_MKL;  
                     if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);  
                   #else  
                     #ifdef UMFPACK  
                        out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);  
                        Paso_SystemMatrix_free(A_C);  
                        out->A_C->solver_package = PASO_UMFPACK;  
                        if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);  
                     #else  
                        out->A_C=A_C;  
                        out->A_C->solver_p=Paso_Preconditioner_Smoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);  
                        out->A_C->solver_package = PASO_SMOOTHER;  
                        if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);  
                     #endif  
                   #endif  
                } else {  
                   /* finally we set some helpers for the solver step */  
                   out->A_C=A_C;  
                }  
326              }                      }        
327            }            }
328             }             }
# Line 352  return NULL; Line 330  return NULL;
330             TMPMEMFREE(rows_in_F);             TMPMEMFREE(rows_in_F);
331          }          }
332       }       }
 #endif  
333    
334    }    }
335    TMPMEMFREE(counter);    TMPMEMFREE(counter);
# Line 376  return NULL; Line 353  return NULL;
353    
354    
355  void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {  void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
356       const dim_t n = amg->n * amg->n_block;       const dim_t n = A->mainBlock->numRows * A->mainBlock->row_block_size;
357       double time0=0;       double time0=0;
358       const dim_t post_sweeps=amg->post_sweeps;       const dim_t post_sweeps=amg->post_sweeps;
359       const dim_t pre_sweeps=amg->pre_sweeps;       const dim_t pre_sweeps=amg->pre_sweeps;
# Line 384  void Paso_Preconditioner_AMG_solve(Paso_ Line 361  void Paso_Preconditioner_AMG_solve(Paso_
361       /* presmoothing */       /* presmoothing */
362       time0=Esys_timer();       time0=Esys_timer();
363       Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);       Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
364    
365       time0=Esys_timer()-time0;       time0=Esys_timer()-time0;
366       if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);       if (SHOW_TIMING) printf("timing: level %d: Presmoothing: %e\n",amg->level, time0);
367       /* end of presmoothing */       /* end of presmoothing */
368            
369       if (amg->n_F < amg->n) { /* is there work on the coarse level? */       time0=Esys_timer();
370           time0=Esys_timer();  
371       Paso_Copy(n, amg->r, b);                            /*  r <- b */       Paso_Copy(n, amg->r, b);                            /*  r <- b */
372       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
373       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C);  /* b_c = R*r  */       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C);  /* b_c = R*r  */
374           time0=Esys_timer()-time0;  
375       /* coarse level solve */       time0=Esys_timer()-time0;
376       if ( amg->AMG_C == NULL) {       /* coarse level solve */
377         if ( amg->AMG_C == NULL) {
378          time0=Esys_timer();          time0=Esys_timer();
379          /*  A_C is the coarsest level */          /*  A_C is the coarsest level */
380  #ifdef FIXME              Paso_MergedSolver_solve(amg->merged_solver,amg->x_C, amg->b_C);
         switch (amg->A_C->solver_package) {  
            case (PASO_MKL):  
           Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);  
           break;  
            case (PASO_UMFPACK):  
           Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);  
           break;  
            case (PASO_SMOOTHER):  
           Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);  
           break;  
         }  
 #endif  
         Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);  
381          if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);          if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
382       } else {       } else {
383          Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C)     */          Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C)     */
      }    
      time0=time0+Esys_timer();  
      Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */      
       
          /*postsmoothing*/  
         
         /*solve Ax=b with initial guess x */  
         time0=Esys_timer();  
         Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);  
         time0=Esys_timer()-time0;  
         if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);  
         /*end of postsmoothing*/  
       
384       }       }
385       return;  
386        time0=time0+Esys_timer();
387        Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */    
388    
389        /*postsmoothing*/
390          
391        /*solve Ax=b with initial guess x */
392        time0=Esys_timer();
393        Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
394        time0=Esys_timer()-time0;
395        if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
396        return;
397  }  }
398    
399  /* theta = threshold for strong connections */  /* theta = threshold for strong connections */
# Line 451  void Paso_Preconditioner_AMG_setStrongCo Line 415  void Paso_Preconditioner_AMG_setStrongCo
415     index_t iptr, i;     index_t iptr, i;
416     double *threshold_p=NULL;     double *threshold_p=NULL;
417    
   
418     threshold_p = TMPMEMALLOC(2*my_n, double);     threshold_p = TMPMEMALLOC(2*my_n, double);
419        
420     #pragma omp parallel for private(i,iptr) schedule(static)     #pragma omp parallel for private(i,iptr) schedule(static)
# Line 461  void Paso_Preconditioner_AMG_setStrongCo Line 424  void Paso_Preconditioner_AMG_setStrongCo
424       register double sum_row=0;       register double sum_row=0;
425       register double main_row=0;       register double main_row=0;
426       register dim_t kdeg=0;       register dim_t kdeg=0;
427           const register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];           register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
428    
429                    
430       /* collect information for row i: */       /* collect information for row i: */
431       #pragma ivdep       #pragma ivdep
432       for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {       for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
433          register index_t j=A->mainBlock->pattern->index[iptr];          register index_t j=A->mainBlock->pattern->index[iptr];
434          register double fnorm=ABS(A->mainBlock->val[iptr]);          register double fnorm=ABS(A->mainBlock->val[iptr]);
           
435          if( j != i) {          if( j != i) {
436             max_offdiagonal = MAX(max_offdiagonal,fnorm);             max_offdiagonal = MAX(max_offdiagonal,fnorm);
437             sum_row+=fnorm;             sum_row+=fnorm;
# Line 477  void Paso_Preconditioner_AMG_setStrongCo Line 440  void Paso_Preconditioner_AMG_setStrongCo
440          }          }
441    
442       }       }
443        
444       #pragma ivdep       #pragma ivdep
445       for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {       for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
446          register double fnorm=ABS(A->col_coupleBlock->val[iptr]);          register double fnorm=ABS(A->col_coupleBlock->val[iptr]);
447    
448          max_offdiagonal = MAX(max_offdiagonal,fnorm);          max_offdiagonal = MAX(max_offdiagonal,fnorm);
449          sum_row+=fnorm;          sum_row+=fnorm;
450       }       }
# Line 489  void Paso_Preconditioner_AMG_setStrongCo Line 453  void Paso_Preconditioner_AMG_setStrongCo
453           {           {
454          const double threshold = theta*max_offdiagonal;          const double threshold = theta*max_offdiagonal;
455              threshold_p[2*i+1]=threshold;              threshold_p[2*i+1]=threshold;
456          if (tau*main_row < sum_row) { /* no diagonal domainance */          if (tau*main_row < sum_row) { /* no diagonal dominance */
457                 threshold_p[2*i]=1;                 threshold_p[2*i]=1;
458             #pragma ivdep             #pragma ivdep
459             for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {             for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
# Line 514  void Paso_Preconditioner_AMG_setStrongCo Line 478  void Paso_Preconditioner_AMG_setStrongCo
478           offset_S[i]=koffset;           offset_S[i]=koffset;
479       degree_S[i]=kdeg;       degree_S[i]=kdeg;
480        }        }
481    
482        /* now we need to distribute the threshold and the diagonal dominance indicator */        /* now we need to distribute the threshold and the diagonal dominance indicator */
483        if (A->mpi_info->size > 1) {        if (A->mpi_info->size > 1) {
484    
# Line 529  void Paso_Preconditioner_AMG_setStrongCo Line 494  void Paso_Preconditioner_AMG_setStrongCo
494    
495            #pragma omp parallel for private(i,iptr) schedule(static)            #pragma omp parallel for private(i,iptr) schedule(static)
496            for (i=0; i<overlap_n; i++) {            for (i=0; i<overlap_n; i++) {
           
497            const double threshold = remote_threshold[2*i+1];            const double threshold = remote_threshold[2*i+1];
498            register dim_t kdeg=0;            register dim_t kdeg=0;
499                const register index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i];                register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i]+A->remote_coupleBlock->pattern->ptr[i];
500                if (remote_threshold[2*i]>0) {                if (remote_threshold[2*i]>0) {
501               #pragma ivdep          #pragma ivdep
502               for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {          for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
503                register index_t j=A->row_coupleBlock->pattern->index[iptr];                register index_t j=A->row_coupleBlock->pattern->index[iptr];
504                if(ABS(A->row_coupleBlock->val[iptr])>threshold) {                if(ABS(A->row_coupleBlock->val[iptr])>threshold) {
505               S[koffset+kdeg] = j ;               S[koffset+kdeg] = j ;
506               kdeg++;               kdeg++;
507                }                }
508             }          }
509    
510            #pragma ivdep
511            for (iptr=A->remote_coupleBlock->pattern->ptr[i];iptr<A->remote_coupleBlock->pattern->ptr[i+1]; iptr++) {
512              register index_t j=A->remote_coupleBlock->pattern->index[iptr];
513              if(ABS(A->remote_coupleBlock->val[iptr])>threshold && i!=j) {
514                  S[koffset+kdeg] = j + my_n;
515                  kdeg++;
516              }
517            }
518                }                }
519                offset_S[i+my_n]=koffset;                offset_S[i+my_n]=koffset;
520            degree_S[i+my_n]=kdeg;            degree_S[i+my_n]=kdeg;
# Line 564  void Paso_Preconditioner_AMG_setStrongCo Line 536  void Paso_Preconditioner_AMG_setStrongCo
536                              const double theta, const double tau)                              const double theta, const double tau)
537    
538  {  {
539     const dim_t n_block=A->block_size;     const dim_t block_size=A->block_size;
540     const dim_t my_n=A->mainBlock->numRows;     const dim_t my_n=A->mainBlock->numRows;
541     const dim_t overlap_n=A->row_coupleBlock->numRows;     const dim_t overlap_n=A->row_coupleBlock->numRows;
542        
# Line 573  void Paso_Preconditioner_AMG_setStrongCo Line 545  void Paso_Preconditioner_AMG_setStrongCo
545        
546        
547     threshold_p = TMPMEMALLOC(2*my_n, double);     threshold_p = TMPMEMALLOC(2*my_n, double);
548      
549     #pragma omp parallel private(i,iptr, bi)     #pragma omp parallel private(i,iptr,bi)
550     {     {
551        
552        dim_t max_deg=0;        dim_t max_deg=0;
# Line 593  void Paso_Preconditioner_AMG_setStrongCo Line 565  void Paso_Preconditioner_AMG_setStrongCo
565       register double main_row=0;       register double main_row=0;
566       register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];       register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
567       register dim_t kdeg=0;       register dim_t kdeg=0;
568       const register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];       register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
569            
570       /* collect information for row i: */       /* collect information for row i: */
571       for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {       for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
572          register index_t j=A->mainBlock->pattern->index[iptr];          register index_t j=A->mainBlock->pattern->index[iptr];
573          register double fnorm=0;          register double fnorm=0;
574          #pragma ivdep          #pragma ivdep
575          for(bi=0;bi<n_block;++bi) {          for(bi=0;bi<block_size;++bi) {
576              register double  rtmp2= A->mainBlock->val[iptr*n_block+bi];              register double  rtmp2= A->mainBlock->val[iptr*block_size+bi];
577             fnorm+=rtmp2*rtmp2;             fnorm+=rtmp2*rtmp2;
578          }          }
579          fnorm=sqrt(fnorm);          fnorm=sqrt(fnorm);
# Line 620  void Paso_Preconditioner_AMG_setStrongCo Line 592  void Paso_Preconditioner_AMG_setStrongCo
592       for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {       for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
593          register double fnorm=0;          register double fnorm=0;
594          #pragma ivdep          #pragma ivdep
595          for(bi=0;bi<n_block;++bi) {          for(bi=0;bi<block_size;++bi) {
596             register double rtmp2 = A->col_coupleBlock->val[iptr*n_block+bi];             register double rtmp2 = A->col_coupleBlock->val[iptr*block_size+bi];
597             fnorm+=rtmp2*rtmp2;             fnorm+=rtmp2*rtmp2;
598          }          }
599          fnorm=sqrt(fnorm);          fnorm=sqrt(fnorm);
# Line 638  void Paso_Preconditioner_AMG_setStrongCo Line 610  void Paso_Preconditioner_AMG_setStrongCo
610          rtmp_offset=-A->mainBlock->pattern->ptr[i];          rtmp_offset=-A->mainBlock->pattern->ptr[i];
611                    
612          threshold_p[2*i+1]=threshold;          threshold_p[2*i+1]=threshold;
613          if (tau*main_row < sum_row) { /* no diagonal domainance */          if (tau*main_row < sum_row) { /* no diagonal dominance */
614             threshold_p[2*i]=1;             threshold_p[2*i]=1;
            rtmp_offset=-A->mainBlock->pattern->ptr[i];  
615             #pragma ivdep             #pragma ivdep
616             for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {             for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
617            register index_t j=A->mainBlock->pattern->index[iptr];            register index_t j=A->mainBlock->pattern->index[iptr];
# Line 685  void Paso_Preconditioner_AMG_setStrongCo Line 656  void Paso_Preconditioner_AMG_setStrongCo
656            
657       const double threshold2 = remote_threshold[2*i+1]*remote_threshold[2*i+1];       const double threshold2 = remote_threshold[2*i+1]*remote_threshold[2*i+1];
658       register dim_t kdeg=0;       register dim_t kdeg=0;
659       const register index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i];       register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i]+A->remote_coupleBlock->pattern->ptr[i];
660       if (remote_threshold[2*i]>0) {       if (remote_threshold[2*i]>0) {
661          #pragma ivdep          #pragma ivdep
662          for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {          for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
663             register index_t j=A->row_coupleBlock->pattern->index[iptr];             register index_t j=A->row_coupleBlock->pattern->index[iptr];
664             register double fnorm2=0;             register double fnorm2=0;
665             #pragma ivdepremote_threshold[2*i]             #pragma ivdepremote_threshold[2*i]
666             for(bi=0;bi<n_block;++bi) {             for(bi=0;bi<block_size;++bi) {
667            register double rtmp2 = A->row_coupleBlock->val[iptr*n_block+bi];            register double rtmp2 = A->row_coupleBlock->val[iptr*block_size+bi];
668            fnorm2+=rtmp2*rtmp2;            fnorm2+=rtmp2*rtmp2;
669             }             }
670                        
# Line 702  void Paso_Preconditioner_AMG_setStrongCo Line 673  void Paso_Preconditioner_AMG_setStrongCo
673            kdeg++;            kdeg++;
674             }             }
675          }          }
676    
677            #pragma ivdep
678                for (iptr=A->remote_coupleBlock->pattern->ptr[i];iptr<A->remote_coupleBlock->pattern->ptr[i+1]; ++iptr) {
679                   register index_t j=A->remote_coupleBlock->pattern->index[iptr];
680                   register double fnorm2=0;
681                   #pragma ivdepremote_threshold[2*i]
682                   for(bi=0;bi<block_size;++bi) {
683                      register double rtmp2 = A->remote_coupleBlock->val[iptr*block_size+bi];
684                      fnorm2+=rtmp2*rtmp2;
685                   }
686                   if(fnorm2 > threshold2 && i != j) {
687                      S[koffset+kdeg] = j + my_n;
688                      kdeg++;
689                   }
690                }
691                    
692       }       }
693       offset_S[i+my_n]=koffset;       offset_S[i+my_n]=koffset;
# Line 711  void Paso_Preconditioner_AMG_setStrongCo Line 697  void Paso_Preconditioner_AMG_setStrongCo
697     }     }
698     TMPMEMFREE(threshold_p);     TMPMEMFREE(threshold_p);
699  }  }
700    
701  void Paso_Preconditioner_AMG_transposeStrongConnections(const dim_t n, const dim_t* degree_S, const index_t* offset_S, const index_t* S,  void Paso_Preconditioner_AMG_transposeStrongConnections(const dim_t n, const dim_t* degree_S, const index_t* offset_S, const index_t* S,
702                              const dim_t nT, dim_t* degree_ST, index_t* offset_ST,index_t* ST)                              const dim_t nT, dim_t* degree_ST, index_t* offset_ST,index_t* ST)
703  {  {
# Line 738  void Paso_Preconditioner_AMG_transposeSt Line 725  void Paso_Preconditioner_AMG_transposeSt
725     }     }
726  }  }
727    
728    int compareindex(const void *a, const void *b)
729    {
730      return (*(int *)a - *(int *)b);
731    }
732    
733  void Paso_Preconditioner_AMG_CIJPCoarsening(const dim_t n, const dim_t my_n, index_t*split_marker,  void Paso_Preconditioner_AMG_CIJPCoarsening(const dim_t n, const dim_t my_n, index_t*split_marker,
734                          const dim_t* degree_S, const index_t* offset_S, const index_t* S,                          const dim_t* degree_S, const index_t* offset_S, const index_t* S,
735                          const dim_t* degree_ST, const index_t* offset_ST, const index_t* ST,                          const dim_t* degree_ST, const index_t* offset_ST, const index_t* ST,
# Line 754  void Paso_Preconditioner_AMG_CIJPCoarsen Line 746  void Paso_Preconditioner_AMG_CIJPCoarsen
746    Status=TMPMEMALLOC(n, double);    Status=TMPMEMALLOC(n, double);
747    random = Paso_Distribution_createRandomVector(col_dist,1);    random = Paso_Distribution_createRandomVector(col_dist,1);
748    ST_flag=TMPMEMALLOC(offset_ST[n-1]+ degree_ST[n-1], index_t);    ST_flag=TMPMEMALLOC(offset_ST[n-1]+ degree_ST[n-1], index_t);
749      
750    #pragma omp parallel for private(i)    #pragma omp parallel for private(i)
751    for (i=0; i< my_n; ++i) {    for (i=0; i< my_n; ++i) {
752        w[i]=degree_ST[i]+random[i];        w[i]=degree_ST[i]+random[i];
# Line 764  void Paso_Preconditioner_AMG_CIJPCoarsen Line 756  void Paso_Preconditioner_AMG_CIJPCoarsen
756         Status[i]=1; /* status undefined */         Status[i]=1; /* status undefined */
757        }        }
758    }    }
759    
760    #pragma omp parallel for private(i, iptr)    #pragma omp parallel for private(i, iptr)
761    for (i=0; i< n; ++i) {    for (i=0; i< n; ++i) {
762        for( iptr =0 ; iptr < degree_ST[i]; ++iptr)  {        for( iptr =0 ; iptr < degree_ST[i]; ++iptr)  {
# Line 773  void Paso_Preconditioner_AMG_CIJPCoarsen Line 766  void Paso_Preconditioner_AMG_CIJPCoarsen
766    
767        
768    numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );    numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
769    printf(" coarsening loop start: num of undefined rows = %d \n",numUndefined);    /* printf(" coarsening loop start: num of undefined rows = %d \n",numUndefined);  */
   
     
770    iter=0;    iter=0;
771    while (numUndefined > 0) {    while (numUndefined > 0) {
772       Paso_Coupler_fillOverlap(n, w, w_coupler);       Paso_Coupler_fillOverlap(n, w, w_coupler);
773       {  
774      int p;        /* calculate the maximum value of neighbours following active strong connections:
775      for (p=0; p<my_n; ++p) {          w2[i]=MAX(w[k]) with k in ST[i] or S[i] and (i,k) connection is still active  */      
        printf(" %d : %f %f \n",p, w[p], Status[p]);  
     }  
     for (p=my_n; p<n; ++p) {  
        printf(" %d : %f \n",p, w[p]);  
     }  
       
       
      }  
       
       /* calculate the maximum value of naigbours following active strong connections:  
         w2[i]=MAX(w[k]) with k in ST[i] or S[i] and (i,k) conenction is still active  */        
776        #pragma omp parallel for private(i, iptr)        #pragma omp parallel for private(i, iptr)
777        for (i=0; i<my_n; ++i) {        for (i=0; i<my_n; ++i) {
778       if (Status[i]>0) { /* status is still undefined */       if (Status[i]>0) { /* status is still undefined */
# Line 800  void Paso_Preconditioner_AMG_CIJPCoarsen Line 780  void Paso_Preconditioner_AMG_CIJPCoarsen
780          register bool_t inD=TRUE;          register bool_t inD=TRUE;
781          const double wi=w[i];          const double wi=w[i];
782    
   
783          for( iptr =0 ; iptr < degree_S[i]; ++iptr) {          for( iptr =0 ; iptr < degree_S[i]; ++iptr) {
   
784             const index_t k=S[offset_S[i]+iptr];             const index_t k=S[offset_S[i]+iptr];
785             const index_t* start_p = &ST[offset_ST[k]];             const index_t* start_p = &ST[offset_ST[k]];
786             const index_t* where_p=(index_t*)bsearch(&i, start_p, degree_ST[k], sizeof(index_t), Paso_comparIndex);             const index_t* where_p=(index_t*)bsearch(&i, start_p, degree_ST[k], sizeof(index_t), Paso_comparIndex);
787    
788             if (ST_flag[(index_t)(where_p-start_p)]>0) {             if (ST_flag[offset_ST[k] + (index_t)(where_p-start_p)]>0) {
 printf("S: %d (%e) -> %d    (%e)\n",i, wi, k, w[k]);        
789            if (wi <= w[k] ) {            if (wi <= w[k] ) {
790               inD=FALSE;               inD=FALSE;
791               break;               break;
# Line 821  printf("S: %d (%e) -> %d   (%e)\n",i, wi, Line 798  printf("S: %d (%e) -> %d   (%e)\n",i, wi,
798            for( iptr =0 ; iptr < degree_ST[i]; ++iptr) {            for( iptr =0 ; iptr < degree_ST[i]; ++iptr) {
799               const index_t k=ST[offset_ST[i]+iptr];               const index_t k=ST[offset_ST[i]+iptr];
800               if ( ST_flag[offset_ST[i]+iptr] > 0 ) {               if ( ST_flag[offset_ST[i]+iptr] > 0 ) {
 printf("ST: %d (%e) -> %d   (%e)\n",i, wi, k, w[k]);        
               
801                         if (wi <= w[k] ) {                         if (wi <= w[k] ) {
802                 inD=FALSE;                 inD=FALSE;
803                 break;                 break;
# Line 832  printf("ST: %d (%e) -> %d  (%e)\n",i, wi, Line 807  printf("ST: %d (%e) -> %d  (%e)\n",i, wi,
807          }              }    
808          if (inD) {          if (inD) {
809             Status[i]=0.; /* is in D */             Status[i]=0.; /* is in D */
 printf("%d is in D\n",i);  
810          }          }
811       }       }
812            
# Line 864  printf("%d is in D\n",i); Line 838  printf("%d is in D\n",i);
838            const index_t j=ST[offset_ST[i]+jptr];            const index_t j=ST[offset_ST[i]+jptr];
839            if ( (Status[j] == 0.) && (ST_flag[offset_ST[i]+jptr]>0) ) {            if ( (Status[j] == 0.) && (ST_flag[offset_ST[i]+jptr]>0) ) {
840               w[i]--;               w[i]--;
 printf("%d reduced by %d\n",i,j);  
841               ST_flag[offset_ST[i]+jptr]=-1;               ST_flag[offset_ST[i]+jptr]=-1;
842            }            }
843             }             }
# Line 886  printf("%d reduced by %d\n",i,j); Line 859  printf("%d reduced by %d\n",i,j);
859    
860               for (jptr=0; jptr<degree_ST[i]; ++jptr) {               for (jptr=0; jptr<degree_ST[i]; ++jptr) {
861              const index_t j=ST[offset_ST[i]+jptr];              const index_t j=ST[offset_ST[i]+jptr];
 printf("check connection: %d %d\n",i,j);  
862              ST_flag[offset_ST[i]+jptr]=-1;              ST_flag[offset_ST[i]+jptr]=-1;
863              for (kptr=0; kptr<degree_ST[j]; ++kptr) {              for (kptr=0; kptr<degree_ST[j]; ++kptr) {
864                 const index_t k=ST[offset_ST[j]+kptr];                 const index_t k=ST[offset_ST[j]+kptr];
 printf("check connection: %d of %d\n",k,j);  
865                 if (NULL != bsearch(&k, start_p, degree_ST[i], sizeof(index_t), Paso_comparIndex) ) { /* k in ST[i] ? */                 if (NULL != bsearch(&k, start_p, degree_ST[i], sizeof(index_t), Paso_comparIndex) ) { /* k in ST[i] ? */
 printf("found!\n");  
866                    if (ST_flag[offset_ST[j]+kptr] >0) {                    if (ST_flag[offset_ST[j]+kptr] >0) {
867                   if (j< my_n ) {                   if (j< my_n ) {
868                      w[j]--;                      w[j]--;
 printf("%d reduced by %d and %d \n",j, i,k);  
869                   }                   }
870                   ST_flag[offset_ST[j]+kptr]=-1;                   ST_flag[offset_ST[j]+kptr]=-1;
871                    }                    }
# Line 906  printf("%d reduced by %d and %d \n",j, i Line 875  printf("%d reduced by %d and %d \n",j, i
875            }            }
876          }          }
877       }       }
878    
879       /* adjust status */       /* adjust status */
880       #pragma omp parallel for private(i)       #pragma omp parallel for private(i)
881       for (i=0; i< my_n; ++i) {       for (i=0; i< my_n; ++i) {
882          if ( Status[i] == 0. ) {          if ( Status[i] == 0. ) {
883             Status[i] = -10;   /* this is now a C point */             Status[i] = -10;   /* this is now a C point */
884          } else if ( w[i]<1.) {          } else if (Status[i] == 1. && w[i]<1.) {
885             Status[i] = -100;   /* this is now a F point */               Status[i] = -100;   /* this is now a F point */  
886          }          }
887       }       }
888            
889             i = numUndefined;
890       numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );       numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
891         if (numUndefined == i) {
892           Esys_setError(SYSTEM_ERROR, "Can NOT reduce numUndefined.");
893           return;
894         }
895    
896       iter++;       iter++;
897       printf(" coarsening loop %d: num of undefined rows = %d \n",iter, numUndefined);       /* printf(" coarsening loop %d: num of undefined rows = %d \n",iter, numUndefined); */
   } /* end of while loop */  
898    
899      } /* end of while loop */
900    
901    /* map to output :*/    /* map to output :*/
902    Paso_Coupler_fillOverlap(n, Status, w_coupler);    Paso_Coupler_fillOverlap(n, Status, w_coupler);
# Line 933  printf("%d reduced by %d and %d \n",j, i Line 908  printf("%d reduced by %d and %d \n",j, i
908          split_marker[i]=PASO_AMG_IN_F;          split_marker[i]=PASO_AMG_IN_F;
909       }       }
910    }    }
   
911    /* clean up : */    /* clean up : */
912    Paso_Coupler_free(w_coupler);    Paso_Coupler_free(w_coupler);
913    TMPMEMFREE(random);    TMPMEMFREE(random);
# Line 943  printf("%d reduced by %d and %d \n",j, i Line 917  printf("%d reduced by %d and %d \n",j, i
917        
918    return;    return;
919  }  }
920    

Legend:
Removed from v.3485  
changed lines
  Added in v.4286

  ViewVC Help
Powered by ViewVC 1.1.26